In [1]:
from langchain_openai import OpenAIEmbeddings
from graphdatascience import GraphDataScience
from getpass import getpass
import pandas as pd
import os

  from .autonotebook import tqdm as notebook_tqdm


# Set up connections

In [136]:
openai_api_key = getpass()

 ········


In [3]:
neo4j_password = getpass()

 ········


In [4]:
neo4j_uri = "neo4j+s://bc9f751a.databases.neo4j.io"
neo4j_user = "neo4j"
gds = GraphDataScience(neo4j_uri, auth=(neo4j_user, neo4j_password))

In [5]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=openai_api_key)

# Embed topic descriptions

In [6]:
topics = gds.run_cypher("""
                        MATCH (t:Topic)
                        WHERE t.embedding IS NULL
                        RETURN t.description AS description""")

In [7]:
topics['embedding'] = embeddings.embed_documents(topics['description'].to_list())



In [8]:
topics.head()

Unnamed: 0,description,embedding
0,,"[0.015375844493788012, -0.034777941596249584, ..."
1,! — NOT expression in Cypher,"[-0.001502172470261095, 0.03528362585172272, 0..."
2,!RATED relationship,"[0.01292134665386383, 0.02400438344185642, -0...."
3,!tables command,"[0.0033105522452637954, 0.0042203221288704, 0...."
4,"""(RELATIONSHIP)"" suffix","[0.019056276849615902, 0.0011400599897678637, ..."


In [9]:
gds.run_cypher("""CREATE VECTOR INDEX topic_vectors IF NOT EXISTS 
                  FOR (t:Topic)
                  ON (t.embedding)
                  OPTIONS {indexConfig: 
                      {`vector.dimensions`: 1536,
                       `vector.similarity_function`: 'cosine'
                       }}
                       """)

In [12]:
for i in range(0, int(topics.shape[0]/1000) + 1):
    gds.run_cypher("""
        UNWIND $topicData AS d
        MATCH (t:Topic {description:trim(d['description'])})
        CALL db.create.setNodeVectorProperty(t, 'embedding', d['embedding'])""",
                   {'topicData': topics.iloc[i*1000:(i+1)*1000,:].to_dict('records')})
    if (i+1)%5 == 0:
        print(f"Finished row {(i+1)*1000}")
    

Finished row 5000
Finished row 10000
Finished row 15000
Finished row 20000
Finished row 25000
Finished row 30000
Finished row 35000
Finished row 40000
Finished row 45000
Finished row 50000
Finished row 55000
Finished row 60000
Finished row 65000
Finished row 70000


# Calculate nearest neighbors

In [149]:
g_topics, result = gds.graph.project("topics", {"Topic":{"properties": "embedding"}}, "*")

In [150]:
result

nodeProjection                        {'Topic': {'label': 'Topic', 'properties': {'embedding': {'property': 'embedding', 'defaultValue': None}}}}
relationshipProjection    {'__ALL__': {'aggregation': 'DEFAULT', 'orientation': 'NATURAL', 'indexInverse': False, 'properties': {}, 'type': '*'}}
graphName                                                                                                                                  topics
nodeCount                                                                                                                                   70954
relationshipCount                                                                                                                               0
projectMillis                                                                                                                                1745
Name: 0, dtype: object

## Check similiarty scores for some sample document pairs

In [95]:
gds.run_cypher("""
UNWIND
[
    ['Cypher MATCH clause', 'Cypher MATCH statement'],
    ['Cypher MATCH clause', 'Inline MATCH clause'],
    ['MATCH (n)-[rs*]-() syntax deprecated', 'Cypher MATCH clause'],
    ['MATCH in Cypher', 'Cypher MATCH clause'],
    ['MATCH query', 'MATCH in Cypher'],
    ['MATCH with LIMIT in Cypher', 'MATCH in Cypher']
] AS themes
MATCH (n1), (n2) WHERE n1.description = themes[0] and n2.description = themes[1] 
RETURN 
n1.description AS theme1,
n2.description AS theme2,
gds.similarity.cosine(n1.embedding, n2.embedding) AS sim""")

Unnamed: 0,theme1,theme2,sim
0,Cypher MATCH clause,Cypher MATCH statement,0.946668
1,Cypher MATCH clause,Inline MATCH clause,0.708626
2,MATCH (n)-[rs*]-() syntax deprecated,Cypher MATCH clause,0.52579
3,MATCH in Cypher,Cypher MATCH clause,0.784752
4,MATCH query,MATCH in Cypher,0.547737
5,MATCH with LIMIT in Cypher,MATCH in Cypher,0.741662


## Mutate KNN at a level below which we don't think there will be many meaningful pairs

In [151]:
knn_result = gds.knn.mutate(g_topics,
                            mutateRelationshipType = "IS_SIMILAR",
                            mutateProperty = "similarity",
                            nodeProperties = "embedding",
                            topK = 5,
                            similarityCutoff = 0.85)

In [140]:
similarity_result = gds.run_cypher("""
    CALL gds.graph.relationshipProperty.stream("topics", "similarity", ["IS_SIMILAR"])
    YIELD sourceNodeId, targetNodeId, propertyValue AS similarity
    WITH gds.util.asNode(sourceNodeId) AS source, gds.util.asNode(targetNodeId) AS target, similarity
    RETURN source.description AS source, target.description AS target, similarity
    ORDER BY similarity""")
                    

In [141]:
similarity_result[similarity_result['similarity']>0.87].head(20)

Unnamed: 0,source,target,similarity
26807,Operators in Cypher,Expressions in Cypher,0.870002
26808,Classic algorithms in Neo4j,Available procedures in Neo4j,0.870003
26809,Graph-based predictions,Predictive graph-based features,0.870004
26810,Predictive graph-based features,Graph-based predictions,0.870004
26811,localdatetime.truncate in Cypher,LOCAL DATETIME in Cypher,0.870004
26812,GDS library configuration,GDS version 2.2+,0.870004
26813,Neo4j Single Sign-On (SSO),Single Sign-On (SSO),0.870006
26814,Single Sign-On (SSO),Neo4j Single Sign-On (SSO),0.870006
26815,Analytical databases,Analytic queries,0.870006
26816,Analytic queries,Analytical databases,0.870006


# Check WCC at a reasonable similarity cutoff

In [152]:
knn_result['relationshipsWritten']

169653

In [153]:
knn_result['similarityDistribution']

{'min': 0.8499984741210938,
 'p5': 0.8546295166015625,
 'max': 0.9985771179199219,
 'p99': 0.9849662780761719,
 'p1': 0.8509101867675781,
 'p10': 0.8593254089355469,
 'p90': 0.95648193359375,
 'p50': 0.8990097045898438,
 'p25': 0.8736991882324219,
 'p75': 0.9295692443847656,
 'p95': 0.9695320129394531,
 'mean': 0.9037189434961921,
 'p100': 0.9985771179199219,
 'stdDev': 0.03569805808001403}

In [154]:
gds.wcc.write(g_topics,
              writeProperty = "wccId",
              relationshipTypes = ["IS_SIMILAR"],
              relationshipWeightProperty = "similarity",
              threshold = 0.87,
              minComponentSize=2)

writeMillis                                                                                                                                                                                                                                                                                                                                                                 1287
nodePropertiesWritten                                                                                                                                                                                                                                                                                                                                                      37971
componentCount                                                                                                                                                                                                                                                        

In [155]:
gds.run_cypher("""
MATCH (t:Topic)
WHERE t.wccId IS NOT NULL
WITH t.wccId AS id, collect(t.description) AS themes
RETURN id, size(themes) AS themeCount, themes
ORDER BY themeCount DESC
LIMIT 10""")

Unnamed: 0,id,themeCount,themes
0,12,27048,"[""+"" operator in Cypher, "":Person"" label, "":`ACTED_IN`"" in Cypher, "">"" operator in Cypher, ""ACTED_IN in Cypher"", ""BOOLEAN"", ""INTEGER"" in Cypher, ""KNOWS"" relationship, ""LIST OF STRING"", ""LOAD CSV"" in Cypher, ""Movie"" node label, ""Movie"" node type, ""Neo4j Aura"", ""Nodes with label Person"", ""Non-null values"", ""Person"" label in Cypher, ""Person"" node, ""Query The Movie node"", ""RELATED_TO"" relationship, ""RELATIONSHIP"", ""RELATIONSHIP"" in Cypher, ""SIMILAR"" relationship, ""SIMILAR"" relationship type, ""STRING"", ""STRING"" in Cypher, ""T"" shaped pattern in Cypher, ""Valid"" node label, ""WITH in Cypher"", ""_labels"" parameter, ""_type"" property in Cypher, ""abs in Cypher"", ""acted_in.roles"" key, ""admin"" role, ""all in Cypher"", ""any Predicate in Cypher"", ""any in Cypher"", ""avg function in Cypher"", ""count() function in Cypher"", ""count(expression) in Cypher"", ""editor"" role, ""file"" in Cypher, ""file"" parameter, ""graphml"" in Cypher, ""id"" key in Cypher, ""key String"" in Cypher, ""localhost:7687"", ""localhost:7687"" connection, ""my-model"" in GDS Model Catalog, ""nEO4J"", ""name"" property, ""neo4j"", ""neo4j"" default database, ""node"" in Cypher, ""path"" parameter, ""read-write"", ""read-write"" ""primary"", ""read-write"" mode, ""refers to"" relationship in Neo4j, ""relationships"", ""roles"" property, ""system"" database, $NEO4J_HOME directory, $config in Cypher, $deletedNodes in Cypher, $parameter in Cypher, & — AND expression in Cypher, '<' for incoming relationships, '>' for outgoing relationships, 'City' node, 'Continuous' checkpoint policy, 'IN' type in Cypher, 'LINKS_TO' relationship, 'OVERLAP' metric, 'Page' node label, 'REDIRECTS' relationship, 'Volumetric' checkpoint policy, 'embedding' property, 'gds.graph.project' function, 'hour' in Cypher, 'k' nodes, 'millisecond' in Cypher, 'minute' in Cypher, 'month' in Cypher, 'week' in Cypher, 'weekYear' in Cypher, 'year' in Cypher, (:A) in Cypher, (:A:C) in Cypher, (:B) in Cypher, (:B:C) in Cypher, (:C) in Cypher, (:Keyword) node label, (:Page) node label, (:Patient) node, (:Person) label in Cypher, (:Person) node, (:Person) node label, (:Person:Engineering), (:Person:Engineering) in Cypher, (:Person:Engineering) node, ...]"
1,0,6294,"[! — NOT expression in Cypher, !tables command, ""(RELATIONSHIP)"" suffix, ""+Person"" in Cypher, ""-"" operator in Cypher, ""/Romance"" in Cypher, "":param"" command in Cypher, "":use system"" in Cypher, "":use"" command in Cypher, ""A"" ""B"" ""C"" nodes, ""Bob"" in Cypher, ""Broader than"" relationship in Neo4j, ""Charlie"" in Cypher, ""HAS A"" relationship, ""IN"" in Cypher, ""Job"" and ""Person"" nodes, ""KNOWS in Cypher"", ""KNOWS"" relationship type, ""NODE"", ""Neo4j<3GRANDstack"", ""Person"" node type, ""Something"" node label, ""Test"" node label, ""Transaction terminated."", ""_text"" property in Cypher, ""all.cypher"", ""asin function in Cypher"", ""classLabel"" property on nodes, ""d"" in Cypher, ""gds"" naming convention, ""h"" in Cypher, ""min"" value, ""motion pictures"" dataset, ""name"" key in Cypher, ""neo4j-data"" storage class, ""s"" in Cypher, ""system"" keyword in Cypher, ""title"" key, ""title"" property, ""value AnyValue"" in Cypher, $relationshipQuery in Cypher, $relationshipQuery parameter, ${config:<setting>} in Cypher, 'OUT' type in Cypher, 'Person' node, 'day' in Cypher, 'file' parameter in Cypher, 'myGraph' in Cypher, 'quarter' in Cypher, (:A:B) in Cypher, (:A:B:C) in Cypher, (:Company {name: ""Neo4j""}), (:Engineer) nodes, (:M)→[:R] syntax in Cypher, (:Person)-[:ACTED_IN]->(:Movie) in Cypher, (:Person:DevRel)-[:FOLLOWS]-(:Person:Field), (:Person:DevRel)-[:KNOWS]->(:Person:Product), (:Person:Engineering {name: ""Martin""}), (:Person:Engineering {name: ""Zhen""}), (:Person:Engineering) label, (:Person:Engineering) syntax, (:Person:Product), (:Person:Product) node, (:Person:Product) syntax in Cypher, (:Person:Product)-[:KNOWS]-(:Person:Product), (:Person:Sales), (:Person:Sales) node, (Person)-[KNOWS]→(Person), +NODES, --from-pagecache option, --pagecache parameter, --temp-path option, --temp-path parameter, --to-path-txn parameter, -12:00 timezone, -[:GENDER]-> in Cypher, -additional-config in Cypher, -additional-config=<file> option, -archive-cluster-state[=true, -archive-path parameter, -array-delimiter=<char>, -to-format parameter, -to-path-data parameter, .Net Driver version 4.3, .cypher file extension, .execute_query() keyword argument, /Engineering node filter, /backups mount, /etc/default/neo4j path, 0 or NaN, 0.75 percentile in Cypher, 0B default value, 1 Quadrillion Nodes & Relationships, 10,000 nodes per transaction, 12 nodes graph, 26 transactions, 2D point in Cypher, 3-core subgraph, 34 billion relationships, 50 graph algorithms, ...]"
2,672,213,"[gds.allShortestPaths.delta.mutate, gds.allShortestPaths.delta.stats, gds.allShortestPaths.delta.stats.estimate, gds.allShortestPaths.delta.stream, gds.allShortestPaths.delta.stream.estimate, gds.allShortestPaths.delta.write, gds.allShortestPaths.delta.write.estimate, gds.allShortestPaths.dijkstra.stream, gds.allShortestPaths.dijkstra.stream.estimate, gds.alpha.allShortestPaths.stream, gds.alpha.hits.mutate, gds.alpha.hits.mutate.estimate, gds.alpha.hits.stream.estimate, gds.alpha.hits.write.estimate, gds.alpha.knn.filtered.stats, gds.alpha.knn.filtered.stream, gds.alpha.maxkcut.mutate, gds.alpha.maxkcut.stream, gds.alpha.ml.linkPrediction.predict.stream, gds.alpha.ml.nodeClassification.predict.mutate, gds.alpha.nodeSimilarity.filtered.mutate, gds.alpha.nodeSimilarity.filtered.mutate.estimate, gds.alpha.nodeSimilarity.filtered.stats, gds.alpha.nodeSimilarity.filtered.stats.estimate, gds.alpha.nodeSimilarity.filtered.stream, gds.alpha.nodeSimilarity.filtered.write.estimate, gds.alpha.pipeline.nodeClassification.addRandomForest (deprecated), gds.alpha.pipeline.nodeRegression.addLinearRegression, gds.alpha.pipeline.nodeRegression.addRandomForest, gds.alpha.pipeline.nodeRegression.configureSplit, gds.alpha.pipeline.nodeRegression.create, gds.alpha.pipeline.nodeRegression.predict.mutate, gds.alpha.pipeline.nodeRegression.predict.stream, gds.alpha.pipeline.nodeRegression.selectFeatures, gds.alpha.pipeline.nodeRegression.train, gds.alpha.scc.stream, gds.alpha.scc.write, gds.alpha.sllpa.mutate, gds.alpha.sllpa.mutate.estimate, gds.alpha.sllpa.stats, gds.alpha.sllpa.stats.estimate, gds.alpha.sllpa.stream.estimate, gds.alpha.sllpa.write, gds.alpha.sllpa.write.estimate, gds.bellmanFord.mutate, gds.bellmanFord.mutate.estimate, gds.bellmanFord.stats.estimate, gds.bellmanFord.stream, gds.bellmanFord.stream.estimate, gds.bellmanFord.write.estimate, gds.beta.closeness.mutate, gds.beta.closeness.stream, gds.beta.collapsePath.mutate, gds.beta.graph.export.csv.estimate procedure, gds.beta.graphSage.mutate, gds.beta.graphSage.mutate.estimate, gds.beta.graphSage.stream.estimate, gds.beta.graphSage.train, gds.beta.graphSage.train procedure, gds.beta.graphSage.train.estimate, gds.beta.graphSage.write, gds.beta.graphSage.write.estimate, gds.beta.hashgnn.mutate, gds.beta.hashgnn.mutate.estimate, gds.beta.hashgnn.stream.estimate, gds.beta.influenceMaximization.celf.mutate, gds.beta.influenceMaximization.celf.mutate.estimate, gds.beta.influenceMaximization.celf.stream.estimate, gds.beta.k1coloring.mutate, gds.beta.k1coloring.mutate.estimate, gds.beta.k1coloring.stats, gds.beta.k1coloring.stats.estimate, gds.beta.k1coloring.stream.estimate, gds.beta.k1coloring.write.estimate, gds.beta.kmeans.mutate.estimate, gds.beta.kmeans.stats, gds.beta.kmeans.stats.estimate, gds.beta.kmeans.stream, gds.beta.kmeans.stream.estimate, gds.beta.kmeans.write.estimate, gds.beta.leiden.mutate, gds.beta.leiden.mutate.estimate, gds.beta.leiden.stats.estimate, gds.beta.leiden.stream, gds.beta.leiden.stream.estimate, gds.beta.leiden.write, gds.beta.leiden.write.estimate, gds.beta.modularityOptimization.mutate, gds.beta.modularityOptimization.mutate.estimate, gds.beta.modularityOptimization.stream.estimate, gds.beta.modularityOptimization.write, gds.beta.modularityOptimization.write.estimate, gds.beta.node2vec.mutate.estimate, gds.beta.node2vec.stream procedure, gds.beta.node2vec.stream.estimate, gds.beta.node2vec.write, gds.beta.node2vec.write.estimate, gds.beta.pipeline.linkPrediction.addFeature, gds.beta.pipeline.linkPrediction.addLogisticRegression, gds.beta.pipeline.linkPrediction.addRandomForest, ...]"
3,324,161,"[I'm sorry, but I can't access external content such as GitHub repositories or any other URLs. Therefore, I can't provide a summary or extract information from the document you're referring to. If you have specific content or questions about Neo4j, Cypher syntax, database administration, graphs, or data science, feel free to share the text or ask directly!, I'm sorry, but I cannot assist with decoding or interpreting encoded text without knowing its content or context. My capabilities are focused on providing information and answering questions based on clear, unencoded text, especially related to software like Neo4j, Cypher syntax, and related topics. If you have any questions or need information on those subjects, feel free to ask!, I'm sorry, but I need more context or content related to Neo4j, Cypher syntax, database administration, graphs, or data science to provide a list of phrases. Could you please provide more details or specify the document's content?, I'm sorry, but I need more context or content related to Neo4j, Cypher syntax, database administration, graphs, or data science to provide relevant phrases., I'm sorry, but I need more context or content to provide a summary or extract relevant phrases related to Neo4j, Cypher syntax, database administration, graphs, or data science., I'm sorry, but it seems there might have been a misunderstanding. Without a specific text or document related to Neo4j, Cypher syntax, database administration, graphs, or data science, I can't generate a list of phrases. Could you please provide more details or clarify your request?, I'm sorry, but it seems there might have been a misunderstanding. Without specific text or context related to Neo4j, Cypher syntax, database administration, graphs, or data science, I can't generate a list of phrases. Could you please provide more detailed information or clarify your request?, I'm sorry, but it seems there was a misunderstanding in your request. Could you please provide more details or clarify the text or topics you want summarized into phrases related to Neo4j software, Cypher syntax, database administration, graphs, or data science?, I'm sorry, but it seems there was a misunderstanding. Without a specific text or document related to Neo4j, Cypher syntax, database administration, graphs, or data science, I can't generate a summary or extract key phrases. Please provide the text or specify the topics you're interested in within these areas., I'm sorry, but the text you've provided appears to be an encoded certificate or a similar cryptographic text, not a document containing readable information about Neo4j, Cypher syntax, database administration, graphs, or data science. Therefore, I can't extract any relevant phrases related to these topics from this text. If you have any other text or questions related to Neo4j or related topics, feel free to share!, I'm sorry, but the text you've provided appears to be encoded or scrambled, and does not contain readable content related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide a text related to these topics for analysis., I'm sorry, but the text you've provided appears to be encoded or scrambled, and does not contain readable content related to Neo4j, Cypher syntax, database administration, graphs, or data science. To provide a summary or extract topics, I would need a text that contains clear and direct information on these subjects. Please provide a text related to Neo4j or any of the mentioned topics for analysis., I'm sorry, but the text you've provided seems to be encoded or scrambled, and it doesn't contain any readable information or specific topics related to Neo4j, Cypher syntax, database administration, graphs, or data science. Could you please provide a text that contains specific information or questions about these topics?, I'm sorry, but without specific content related to Neo4j, Cypher syntax, database administration, graphs, or data science, I cannot generate a list of phrases. Please provide relevant text or details for analysis., I'm sorry, but you haven't provided a specific document or text for me to summarize. Could you please provide the text or details about the Neo4j software, Cypher syntax, database administration, graphs, or data science that you would like summarized?, I'm sorry, but you haven't provided a specific text or document for me to summarize. Could you please provide the text or specify the topics you're interested in related to Neo4j, Cypher syntax, database administration, graphs, or data science?, I'm sorry, but you haven't provided any specific examples or content related to Neo4j, Cypher syntax, database administration, graphs, or data science for me to summarize. Could you please provide more details or examples?, I'm sorry, but you haven't provided any specific text or details related to Neo4j, Cypher syntax, database administration, graphs, or data science for me to summarize. Could you please provide more information or a specific document related to these topics?, I'm sorry, but you haven't provided any specific text or document for me to summarize. Could you please provide the text or details about the document related to Neo4j, Cypher syntax, database administration, graphs, or data science?, I'm sorry, but you haven't provided any text or document for me to summarize. Could you please provide the text or specify the topics you're interested in related to Neo4j, Cypher syntax, database administration, graphs, or data science?, I'm sorry, but you haven't provided any text related to Neo4j, Cypher syntax, database administration, graphs, or data science for me to summarize. Could you please provide the relevant text or details?, Insufficient data provided for content analysis. Please provide more detailed text related to Neo4j software, Cypher syntax, database administration, graphs, or data science., No relevant information for Neo4j, Cypher syntax, database administration, graphs, or data science., No relevant information provided regarding Neo4j, Cypher syntax, database administration, graphs, or data science., Not applicable to Neo4j or Cypher topics, Not applicable to Neo4j software or related topics, The provided text appears to be a base64 encoded string rather than a document containing information about Neo4j, Cypher syntax, database administration, graphs, or data science. Without decoding and analyzing the actual content, it's not possible to extract relevant topics or phrases related to Neo4j software or any of the specified areas. Please provide the decoded text or a direct description of the content for analysis., The provided text does not contain enough information related to Neo4j software, Cypher syntax, database administration, graphs, or data science to generate a list of topics. Please provide a more detailed document related to these areas for analysis., The provided text does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an excerpt of SVG code and a partial introduction to a section discussing the potential enhancements of GDS workflows by LLMs, which does not match the requested topics., The provided text does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Therefore, I cannot generate a list of phrases based on the given instructions., The provided text does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Therefore, I cannot generate relevant phrases based on the given instructions., The provided text does not contain sufficient information related to Neo4j software, Cypher syntax, database administration, graphs, or data science to extract relevant phrases. It appears to be a snippet of a URL and some JSON metadata, likely from a GitHub repository related to Neo4j but without specific details on the topics requested. Please provide a text with explicit content on Neo4j or related topics for analysis., This document does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j or graph databases., This document does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be SVG (Scalable Vector Graphics) code, which is unrelated to the requested topics., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be about Azure Kubernetes Service (AKS) cluster creation commands., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path data, which is unrelated to the requested topics., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path data, which is used for defining the shape of an element in SVG (Scalable Vector Graphics)., This document does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path data, which is used for defining the shape of an element in vector graphics., This document does not contain relevant information about Neo4j software, Cypher syntax, database administration, graphs, or data science., This document does not pertain to Neo4j, Cypher syntax, database administration, graphs, or data science directly. It appears to describe JSONPath syntax, which is used for querying JSON documents, not related to Neo4j or Cypher query language., This input does not contain any information related to Neo4j software, Cypher syntax, database administration, graphs, or data science., This input does not contain enough context or information related to Neo4j, Cypher syntax, database administration, graphs, or data science to generate a relevant response. Please provide more detailed content or a specific question related to these topics., This input does not contain enough information related to Neo4j software, Cypher syntax, database administration, graphs, or data science to generate a list of topics., This input does not contain enough information to extract topics related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide more detailed content for analysis., This input does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science in a manner that allows for the extraction of specific phrases or topics. Please provide a text that directly relates to these areas for analysis., This input does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context required. Please provide text relevant to these topics for analysis., This input does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a data snippet possibly from a database or dataset, not specific to Neo4j or its query language, Cypher., This input does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a snippet of data possibly related to book information, which does not align with the requested topics for summarization. Please provide content relevant to Neo4j or related technical topics for an appropriate summary., This input does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an HTML snippet with SVG content, which is not relevant to the requested topics., This input does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Therefore, I cannot generate a list of phrases based on the provided text., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a list of UUIDs., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path data and HTML anchor tags, which are not relevant to the requested topics., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path data, which is unrelated to the requested topics. Please provide a text related to Neo4j or the specified areas for an appropriate summary., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path definition, which is unrelated to the requested topics. Please provide relevant text for analysis., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be an SVG path element, which is used in HTML and SVG graphics, not related to the requested topics., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide relevant content for analysis., This input does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Therefore, I cannot generate a list of phrases based on the provided text., This input does not contain relevant information about Neo4j software, Cypher syntax, database administration, graphs, or data science., This input does not contain relevant information about Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a fragment of SVG code and HTML structure, which is not related to the requested topics. Please provide a text related to Neo4j or the specified areas for an appropriate response., This input does not contain text related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a JSON object with numerical values, which does not provide information relevant to the requested topics., This input does not contain text related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a list of numerical arrays and does not provide information relevant to generating a list of phrases on the requested topics., This input does not provide enough context or content related to Neo4j, Cypher syntax, database administration, graphs, or data science to generate a relevant response., This input does not provide enough context or information related to Neo4j, Cypher syntax, database administration, graphs, or data science to generate a relevant response. Please provide more detailed content or a specific question related to these topics., This prompt does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Therefore, I cannot generate a list of phrases based on the provided text., This prompt does not contain information related to Neo4j, Cypher syntax, database administration, graphs, or data science., This prompt does not contain technical information or specific details related to Neo4j software, Cypher syntax, database administration, graphs, or data science that can be summarized into phrases as requested., This prompt does not provide enough information to generate a list of phrases related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide more detailed content or specific topics for analysis., This request does not contain content directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Therefore, I cannot generate a list of phrases based on the provided text., This request does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Therefore, I cannot generate a list of phrases based on the provided text., This request does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be a data structure or document describing books, including attributes like author, title, genre, price, publish date, and description. Therefore, I cannot provide phrases based on the provided text that would relate to Neo4j or the specified topics., This request does not contain information directly related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Therefore, I cannot generate a list of phrases based on the provided text., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in a direct manner that allows for the extraction of specific topics or phrases related to technical aspects of the software or query language., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in a direct manner to extract relevant phrases., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in a direct manner. It appears to be a list of articles from a publication. Please provide content directly related to Neo4j or the specified topics for an accurate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in a direct manner. Therefore, I cannot generate a list of phrases based on the provided text., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in a technical context that would allow for the extraction of relevant phrases., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j or graph databases. Therefore, I cannot generate a list of phrases based on the provided text., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Please provide a text related to these topics for an accurate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Please provide content relevant to these topics for an appropriate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Please provide relevant content for analysis., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Please provide relevant text for analysis., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Please provide text related to these topics for an appropriate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Please provide text related to these topics for an appropriate summary., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Therefore, I cannot generate a list of phrases based on the provided text., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of Neo4j. Therefore, I cannot provide relevant phrases based on the provided text., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science in the context of graph databases. Therefore, I cannot provide relevant phrases based on the provided text., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be about Azure network commands for creating subnets., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be data related to movies. Please provide content relevant to Neo4j or related topics for assistance., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It appears to be related to AWS credentials and Kubernetes commands, which are outside the specified topics of interest., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It focuses on Google Cloud Platform (GCP) resources and deployment configurations. Please provide content relevant to Neo4j or related topics for an appropriate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. It is focused on Azure command-line instructions for creating subnets within a virtual network. Please provide content relevant to Neo4j or related topics for an appropriate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide a document or text related to these topics for an appropriate response., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide a document or text related to these topics for an appropriate summary., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide a document or text related to these topics for analysis., This request does not contain information related to Neo4j software, Cypher syntax, database administration, graphs, or data science. Please provide a document or text relevant to these topics for an appropriate response., ...]"
4,4304,111,"['wgs-84-3d' name, 2D CRS, 2D and 3D points, 2D and 3D spatial POINT types, 2D or 3D points, 2D point in Cartesian, 2D point in Cartesian CRS, 2D point object, 2D point representation, 2D point return type, 2D points calculation, 2D/3D Cartesian space, 3D CRS, 3D Cartesian CRS, 3D Cartesian points, 3D and 2D points comparison, 3D point comparison, 3D point object, :Product in Cypher, Assumed CRS WGS-84-3D, Assumed Cartesian, Assumed Cartesian-3D CRS, CRS (Coordinate Reference System), CRS assumed WGS-84, CRS support in Cypher, CRS: 'wgs-84' in Cypher, CRS: wgs-84, Cartesian 2D, Cartesian 2D point, Cartesian 2D point creation, Cartesian 2D points, Cartesian 3D point creation, Cartesian 3D point in Cypher, Cartesian 3D points, Cartesian CRS, Cartesian and geographic systems, Cartesian coordinate reference systems, Cartesian coordinate system, Cartesian coordinates, Cartesian coordinates acceptance, Cartesian product, Cartesian product in Cypher, Cartesian-3D, CartesianPoint in Cypher, CartesianProduct, CartesianProduct operator, CartesianProductWarning, Cartesian_3D coordinates, Coordinate Reference System, Coordinate Reference System (CRS), Coordinate Reference Systems, Coordinate Reference Systems (CRS), Coordinate reference system, Coordinate system, Coordinate system identification, Coordinate systems incomparability, Default Cartesian CRS, Geographic CRS, Geographic CRS requirement, Geographic CRSs, Geographic coordinate reference systems, Geographical information in graphs, Location data, Location data in graphs, Location metadata, Optional string 'cartesian-3D', Same Coordinate Reference System, Spatial data, Spatial data analysis, Spatial data handling, Spatial data in graphs, Spatial point assignment, Spatial point types, Spatial points comparison, Spatial type POINT, Spatial values, Spatial values comparison, WGS 84 2D, WGS 84 2D Spatial, WGS 84 2D point, WGS 84 2D point creation, WGS 84 2D points, WGS 84 3D, WGS 84 3D Spatial, WGS 84 3D point, WGS 84 CRS, WGS 84 geographic coordinate system, WGS-84, WGS-84 3D point, WGS-84 CRS, WGS-84 as optional string, WGS-84-3D, WGS-84-3D for 3D spatial data, WGS_84 coordinates, WGS_84_3D coordinates, cartesian, cartesian coordinate system, cartesian product, coordinate reference system, crs: 'wgs-84' in Cypher, ...]"
5,6906,104,"[Analyze fraud patterns, Bank fraud detection, Community-based fraud detection, Connected Fraud Data, Credit card fraud detection, Criminal fraud detection, Data science in fraud prevention, Expanding Fraud Communities, Exploring Connected Fraud Data, Exploring Fraud Detection, Exploring Fraud Detection series, Exploring connected fraud data, Fraud Detection, Fraud Detection Techniques, Fraud Detection with Graph Data Science, Fraud Detection with Neo4j, Fraud Indicators, Fraud Risk Labeling, Fraud accounts, Fraud classification, Fraud classification use cases, Fraud communities, Fraud detection, Fraud detection challenges, Fraud detection example, Fraud detection in Neo4j, Fraud detection in financial networks, Fraud detection in transactions, Fraud detection methodologies, Fraud detection methods, Fraud detection patterns, Fraud detection series, Fraud detection use case, Fraud detection workflow, Fraud patterns, Fraud risk accounts, Fraud risk communities, Fraud risk evaluation, Fraud risk identification, Fraud risk labeling, Fraud risk labels, Fraud risk ranking, Fraud risk ratio, Fraud use case, FraudUser accounts analysis, Fraudulent activity identification, Good for fraud detection, Graph analysis for fraud detection, Graph analysis for fraud prevention, Graph databases for fraud analysis, High probability fraud risks, High risk fraud communities, Identify fraud patterns, Identifying Fraud Accounts, Identifying fraud patterns, Identifying fraud risks, Identifying fraudulent activities, Identifying fraudulent transactions, Labeled fraud accounts, Labeled fraud risks, Labeling Fraud Risk User Accounts, Labeling fraud risk accounts, Labeling user accounts, Machine learning for fraud detection, Machine learning for fraud prediction, Machine learning in fraud detection, Neo4j for Fraud Detection, Newly identified fraud risks, Predict fraud patterns, Predict fraud risk, Predict fraud risks, Predicting Fraud Risk, Predicting Fraud Risk Accounts, Predicting fraud risk, Predicting fraud risk label, Predictive analytics in fraud detection, Proactive fraud detection, Proactively predict fraud, Stringent rules for fraud detection, Sum of fraudRiskRatios, Understanding fraud patterns, Unlabeled high-probability fraud risk predictions, average fraud risk, credit card fraud patterns, detecting fraud in networks, device fraud patterns, explore graph for fraud patterns, fraud activity detection, fraud detection, fraud detection analysis, fraud detection improvement, fraud detection scenario, fraud detection series, fraud detection use case, fraud detection with graphs, fraud risk accounts, fraud risk label, fraud risk users, fraudulent user accounts, labeled fraud communities, ...]"
6,3887,96,"[Applications of LLMs, Build LLM App, Build Your First LLM App, Building LLM Apps, Building LLM applications, Building LLM-powered applications, Choosing LLM models, Defining LLM, Enhancements with LLM, Enhancing LLM accuracy, Evaluating LLMs, Experimental application of LLM, Fine-Tuning LLMs, Fine-tune open-source LLM, Fine-tuning LLM, Fine-tuning LLM model, Fine-tuning LLM models, Fine-tuning LLMs, Finetuning LLM with examples, Generalization capabilities of LLMs, Improving LLM results, Instruction following in LLM, Integrate LLM workflows, Interacting with LLM, LLM, LLM Provider integration, LLM applications, LLM backed applications, LLM call for answers, LLM capabilities, LLM context enhancement, LLM enhanced applications, LLM enhancement, LLM evaluation, LLM fine-tuning, LLM for model explanation, LLM input combination, LLM integrations in Neo4j, LLM limitations, LLM model, LLM model fine-tuning, LLM model interaction, LLM models, LLM parameter, LLM prompt strategies, LLM prompts, LLM prompts integration, LLM providers, LLM reasoning, LLM response, LLM responses column, LLM space, LLM syntax, LLM usage, LLM-Powered Applications, LLM-powered applications, LLMs, LLMs Fine-Tuning, LLMs Integration, LLMs Limitations, LLMs and GDS integration, LLMs as enhancements, LLMs assistance, LLMs capabilities, LLMs complementarity, LLMs definition, LLMs enhance recommendation systems, LLMs in recommendation algorithms, LLMs in research, LLMs integration, LLMs issues, LLMs limitations, Lack of open-source LLM models, Limitations of LLMs, Limited knowledge in LLMs, Local LLM hosting, Local LLMs, Open Source LLMs, Open-source LLM models, Overcoming LLM limitations, Overcoming LLMs Limitations, Practical uses of LLMs, Providing instructions to LLM, Quantized LLM applications, Quantized Open-Source LLM Applications, Quantized open-source LLM applications, Retrieval-augmented LLM applications, Suitable for LLM applications, Tutorial on LLM App, Use cases for LLM, Use cases in LLM applications, Use in LLM applications, finetuning open-source LLMs, future of LLM-based applications, potential LLM integration, retrieval-augmented LLM workflow]"
7,1811,64,"[Access cloud provider bucket, Access cloud resources, Aura cloud optimization, Aura cloud service, Aura fully managed cloud service, Aura managed cloud service, Azure console, Azure deployment, Azure region deployment, Azure resources configuration, Azure resources deployment, Azure storage configuration, Clone To Existing instance, Clone to Existing, Cloning an instance, Cloning instances, Cloud bucket storage, Cloud computing, Cloud computing impact, Cloud deployment, Cloud deployment (GKE, AWS, AKS), Cloud deployment options, Cloud deployments, Cloud environment limitations, Cloud environment resources, Cloud environment specifications, Cloud environments, Cloud environments support, Cloud infrastructure, Cloud instances, Cloud object storage, Cloud object storage access, Cloud provider bucket backup, Cloud provider buckets, Cloud provider console, Cloud provider marketplaces, Cloud provider: AWS, Cloud resources, Cloud resources deployment, Cloud services integration, Cloud storage, Cloud storage backup, Cloud storage uploads, Cloud-based instance, Cloud-based version, Cluster instance configuration, Cluster instance location changes, Cluster instances, Database instance in the cloud, Dedicated cloud infrastructure, Deploy cloud resources, Deployed cloud resources, Deploys cloud resources, Fully managed cloud service, Integration with cloud services, Local and cloud instances, Local or cloud instance, Managed cloud service, Other cloud deployment options, Shift to cloud computing, cloud environment, cloud environment considerations, cloud environment limitations, free cloud instance]"
8,535,61,"[Build Docker image, Configure settings in Docker, Consult Docker documentation, Custom container image, Docker, Docker Compose, Docker Compose installation, Docker Compose service configuration, Docker Compose setup, Docker best practices, Docker compose, Docker compose setup, Docker configuration, Docker container, Docker container configuration, Docker container image, Docker container integration, Docker container networking, Docker container setup, Docker containers, Docker containers management, Docker deployment, Docker documentation, Docker environment variables, Docker host deployment, Docker hosts, Docker image, Docker image registry, Docker images, Docker integration, Docker maintenance operations, Docker network orchestration, Docker official documentation, Docker orchestration, Docker registry configuration, Docker security practices, Docker settings, Docker specific operations, Docker usage, Docker user permissions, Docker-compose, Docker-compose command, Docker-compose.yml preparation, Docker-specific configuration, Docker-specific operations, Docker-specific settings, Dockerfile, Dockerfile example, Introduction to Docker, Load docker image, Local deployment via Docker, Multiple Docker hosts, Naming Docker container, Official Docker documentation, Run Docker container, Run docker image locally, Security settings in Docker, Software deployment with Docker, User and group settings in Docker, Uses Docker, Using Docker services]"
9,279,60,"[""randomWalk"" input, ""randomWalk"" sampling method, Biased random walk, Classic random walk, Generating random walks, Number of random walks, Personalized random walk, Probability of walking, RWR (Random Walk with Restarts), Random Walk, Random Walk Restart sampling, Random Walk Sampling, Random Walk With Restarts, Random Walk algorithm, Random Walks, Random Walks comparison, Random Walks in graphs, Random Walks with Restarts, Random walk from nodes, Random walk sampling, Random walk simulation, Random walk with restarts, Random walk with restarts sampling, Random walk with restarts sampling algorithm, Random walking, Random walks, Random walks computation, Random walks generation, Random walks in graph, Random walks in graphs, Random walks sampling, Random walks with restarts, RandomWalk algorithm, RandomWalk stats, RandomWalksWithRestarts algorithm, Run RandomWalk in stats mode, Second order random walks, Second-order random walks, Weighted random sampling, Weighted random walks, Weights in random walks, adjusting random walk behavior, probabilities in random walks, random walk, random walk in graphs, random walk method, random walk probabilities, random walk restarts, random walk sampling, random walk simulation, random walk tendency, random walk with restarts, random walk with restarts sampling, random walks, random walks in training, random walks per node, random walks training, randomWalk sampling, simple random walks, steps in random walk]"


# Refactor similarity relationships for use with Leiden

In [156]:
gds.graph.relationships.toUndirected(g_topics, 
                                     relationship_type = "IS_SIMILAR", 
                                     mutate_relationship_type = "UNDIRECTED_SIMILAR", 
                                     aggregation = "MAX")

inputRelationships                                                                                                                                                                                                               169653
relationshipsWritten                                                                                                                                                                                                             245224
mutateMillis                                                                                                                                                                                                                          0
postProcessingMillis                                                                                                                                                                                                                  0
preProcessingMillis                                                     

In [157]:
gds.set_database("neo4j")
relationships = gds.graph.relationshipProperties.stream(g_topics, "similarity", ["UNDIRECTED_SIMILAR"])

Undirected relationships stream once in each direction. Drop the half of relationships where targetNodeId < sourceNodeId

In [158]:
relationships = relationships[relationships['sourceNodeId']<relationships['targetNodeId']]

The relationship weights are bunched between 0.8 and 1.0. Transform them so that they range from 0.0 to 1.0.

In [159]:
gds.run_cypher("""
UNWIND $rows AS row
WITH gds.util.asNode(row['sourceNodeId']) AS source,
gds.util.asNode(row['targetNodeId']) AS target,
row['propertyValue'] AS similarity
WITH gds.graph.project("reweight_topics",
source,
target,
{relationshipType: "IS_SIMILAR",
relationshipProperties: {similarity: (similarity-0.8)/0.2}},
{undirectedRelationshipTypes: ["IS_SIMILAR"]}) AS g
RETURN g.graphName AS graphName, g.relationshipCount AS relationshipCount, g.nodeCount AS nodeCount""",
               {"rows": relationships.to_dict("records")})

ClientError: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke function `gds.graph.project`: Caused by: java.lang.IllegalArgumentException: Graph reweight_topics already exists}

In [None]:
g_topics2 = gds.graph.get("reweight_topics")

# Test Leiden at different values of gamma

In [161]:
def test_leiden_gamma(gamma):
    property_name = f"leidenGamma{gamma}"
    try:
        gds.graph.nodeProperties.drop(g_topics2, property_name)
    except:
        pass
    gds.leiden.mutate(g_topics2,
                      mutateProperty = property_name,
                      relationshipTypes = ["IS_SIMILAR"],
                      relationshipWeightProperty = "similarity",
                      gamma=gamma)
    biggest_communities = gds.run_cypher("""
        CALL gds.graph.nodeProperty.stream("reweight_topics", $propName)
        YIELD nodeId, propertyValue
        WITH propertyValue, collect(nodeId) AS ids, count(*) as themeCount
        ORDER BY themeCount desc
        LIMIT 20
        RETURN themeCount, [id in ids | gds.util.asNode(id).description] AS themes""",
                                        {"propName": property_name})
    single_doc_communities = gds.run_cypher("""
        CALL gds.graph.nodeProperty.stream("reweight_topics", $propName)
        YIELD nodeId, propertyValue
        WITH gds.util.asNode(nodeId) as t, propertyValue AS leidenId
        MATCH (d)-[:HAS_TOPIC]->(t)
        WITH leidenId, count(distinct d) AS docCount
        WITH leidenId, CASE WHEN docCount = 1 then 1 else 0 end AS singleDocCommunity
        RETURN count(*) AS communityCount, 
        sum(singleDocCommunity) AS singleDocCommunities, 
        sum(singleDocCommunity) * 1.0 / count(*) AS singleDocCommunityPercent""",
                                           {"propName": property_name})
    return {"biggest_communities": biggest_communities, "single_doc_communities": single_doc_communities}
                                           


In [187]:
gamma_tests = {gamma: test_leiden_gamma(gamma) for gamma in [4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0]}

In [188]:
for gamma, result in gamma_tests.items():
    print(f"Gamma value: {gamma}")
    print(result['single_doc_communities'])

Gamma value: 4.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0            2696                    92                   0.034125
Gamma value: 8.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0            2813                    92                   0.032705
Gamma value: 16.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0            2962                    92                    0.03106
Gamma value: 32.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0            3181                    92                   0.028922
Gamma value: 64.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0            3471                    92                   0.026505
Gamma value: 128.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0            3920                    92                   0.023469
Gamma value: 256.0
   communityCount  singleDocCommunities  singleDocCommunityPercent
0  

In [210]:
for gamma, result in gamma_tests.items():
    print(f"Gamma value: {gamma}")
    print(result['biggest_communities']['themeCount'].head())

Gamma value: 4.0
0    950
1    752
2    624
3    360
4    360
Name: themeCount, dtype: int64
Gamma value: 8.0
0    648
1    571
2    548
3    350
4    332
Name: themeCount, dtype: int64
Gamma value: 16.0
0    518
1    349
2    275
3    268
4    232
Name: themeCount, dtype: int64
Gamma value: 32.0
0    252
1    181
2    179
3    170
4    163
Name: themeCount, dtype: int64
Gamma value: 64.0
0    122
1    119
2    119
3    118
4    116
Name: themeCount, dtype: int64
Gamma value: 128.0
0    86
1    81
2    68
3    68
4    66
Name: themeCount, dtype: int64
Gamma value: 256.0
0    49
1    49
2    47
3    46
4    45
Name: themeCount, dtype: int64
Gamma value: 512.0
0    36
1    33
2    32
3    31
4    30
Name: themeCount, dtype: int64
Gamma value: 1024.0
0    21
1    20
2    20
3    19
4    19
Name: themeCount, dtype: int64


In [211]:
pd.set_option('display.max_colwidth', None)

## Examine the largest clusters at various gamma values

In [212]:
gamma_tests[32.0]['biggest_communities']

Unnamed: 0,themeCount,themes
0,252,"[Role in Cypher, skip in Cypher, sub in Cypher, write in Cypher, in Cypher, jobId in Cypher, job type in Cypher, post-process in Cypher, AND in Cypher, DOWN in Cypher, second in Cypher, WHEN in Cypher, THEN in Cypher, END in Cypher, ""abs in Cypher"", OUT in Cypher, MENTORS in Cypher, Cypher in Cypher, Person in Cypher, Dropped with Cypher, age in Cypher, expression in Cypher, ONLINE in Cypher, Label: Person in Cypher, released in Cypher, direction in Cypher, out-degree in Cypher, Location in Cypher, p90 in Cypher, Replaced in Cypher, CEILING in Cypher, GET in Cypher, division in Cypher, Division in Cypher, collect in Cypher, http in Cypher, Rels in Cypher, Inlined in Cypher statements, Collect in Cypher, asin in Cypher, IP address in Cypher, Prefix in Cypher, Existence in Cypher, SHOW SETTING in Cypher, 0-9 in Cypher, penalty in Cypher, Apply in Cypher, Repeat in Cypher, Side-effects in Cypher, ""Charlie"" in Cypher, writing Cypher, TEXT in Cypher, traverse in Cypher, Log in Cypher, BRIEF in Cypher, road in Cypher, Dashes in Cypher, Cypher result encapsulation, source in Cypher, done in Cypher, Cypher, relationshipsWritten in Cypher, DEP in Cypher, stdev in Cypher, stDevP function in Cypher, Sleep in Cypher, rel in Cypher, title in Cypher, description in Cypher, SET in Cypher, short description in Cypher, year of birth in Cypher, acting roles in Cypher, WITH HEADERS in Cypher, FAMILY in Cypher, best split in Cypher, route in Cypher, CREATED in Cypher, Connect in Cypher, IN TRANSACTIONS in Cypher, Reading patterns in Cypher, AS COPY OF in Cypher, READ in Cypher, INCOMING in Cypher, SemiApply in Cypher, internal activity in Cypher, FLOAT in Cypher, top five ordering in Cypher, FOLLOWS in Cypher, list deconstruction in Cypher, Writing in Cypher, grants in Cypher, WRITE in Cypher, Equality in Cypher, DIRECTED in Cypher, > in Cypher, latitude in Cypher, rightmost characters in Cypher, Called in Cypher, Distance in Cypher, ...]"
1,181,"[properties on nodes, sets node properties, String type node properties, Returning node properties, Calculate node properties, Based on node properties, comparing node properties, algorithms for node properties, Based on other node properties, numeric node properties, Scale node properties, Defining node properties, Unique properties on node, Comparing nodes, Configured node properties, Specifying node properties, compute node properties, Graph node properties, Renaming node properties, Add node properties, Write node properties, Streams node properties, write node properties, written node properties, streaming node properties, Writes node properties, writeNodeProperties config, Normalizing node properties, ignore node properties, Select node properties, Predict node property values, Generate node properties, stream node properties procedure, compress node properties, stream node properties, writeNodeProperties function, writing node properties, Deriving node properties, Index on node property, sets node property, Unique node property, adding node properties, Additional node properties, configurations for node properties, Query node properties, Setting a property on a node, Accessing node property, Added property for nodes, Supported node property types, Optional node properties, Transforming node properties, Adding node properties, Removing node properties, Encoding node properties, binary node properties, Compute node properties, Loading node properties, Set node properties, Patient node properties, Target node properties, Uniform node properties, Drop node properties, Incorporate node properties, project node properties, Incrementing node properties, Scaled node properties, Unspecified node properties, hide properties or nodes, Training without node properties, Set property, virtual node properties, New node properties, real node properties, distinct node property values, Numeric node properties, Set JSON property on node, Remove node properties, Access to node properties, Setting node properties, Focus on node properties, Updating node properties, Incorporates node properties, Removed node properties, modifying node properties, Set node property, add node property, Ensure node property, SetProperty on node or relationship, Stream node properties, Add node property steps, Sets property on node, Update node property, Delete node property, Add node property, Access property node, Convert property to node, Update a node property example, Update a node property, Class node property, Get node property, ...]"
2,179,"[GDS admins, Configuring GDS, GDS enabled feature, GDS configuration, GDS community edition limitations, GDS library limitations, GDS integration, GDS client installation, GDS compatibility, GDS schema, Relationship properties in GDS, GDS implementation, GDS 2.3, GDS Cypher procedure API, GDS procedure API, gds.graph.exists, GDS Graph Exists capabilities, GDS performance optimization, GDS library installation, GDS Algorithm performance, GDS workflow, GDS algorithms, GDS library compatibility, GDS performance tuning, GDS library configuration, GDS logging capabilities, GDS edition information, GDS new approach, GDS workloads, GDS plugin, server-side GDS, GDS beta functions, Install GDS library, GDS algorithms execution modes, GDS Algorithm Families, Using GDS, GDS Node Similarity Algorithms, Benefits of GDS, GDS users, Recommendation for GDS installation, GDS software license, GDS software, GDS license configuration, GDS new versions, GDS administrator, GDS supports multiple users, Users familiar with GDS, GDS Configuration, Running GDS procedures, GDS write procedures, Access GDS resources, GDS CE usage, GDS API, ongoingGdsProcedures, GDS technique, GDS resources, GDS graph algorithms, GDS model discovery, Regression in GDS, Scaling with GDS, using GDS, GDS algorithms completion, Overview of GDS, GDS Fundamentals, GDS representation, Install GDS manually, Projecting into GDS, GDS Flight server, Common usage patterns in GDS, GDS versions 2.0 and later, GDS version 2.0 or higher, GDS version 2.2+, GDS 2.1+, GDS Python client connection, GDS client connection setup, GDS installed, GDS in cluster deployment, GDS cluster configuration, GDS plugin deployment, GDS configuration settings, Getting Started with GDS Client, GDS plugin configuration, GDS Manual, GDS library version 2.0, GDS preserves parallel relationships, GDS Algorithm Performance, GDS client, GDS license key, GDS support, GDS usage, standalone GDS database, GDS Python client syntax, GDS application, GDS export, GDS Python Client manual, Data into GDS server, Enable GDS, GDS, Recent version of GDS, GDS documentation, ...]"
3,170,"[Understanding Neo4j, Running Neo4j, error in neo4j.log, debug information in Neo4j, Default Neo4j user/group, Community influence in Neo4j, Local Neo4j usage, Accessing Neo4j Terminal, TypeError in Neo4j, Running Neo4j with APOC, Playing with Neo4j code, Reproducible Neo4j examples, Writing Neo4j code, Migration in Neo4j, Discord for Neo4j, Degrees in Neo4j, Instantiating Neo4j driver, Error message in Neo4j, Stop Neo4j, Creating Neo4j session, Exposing Neo4j database online, Monitoring Neo4j instance, ""Neo4j<3GRANDstack"", GRANDstack with Neo4j, Standalone Neo4j instance, Restart Neo4j after changes, Introduction to Neo4j, CPU requirements for Neo4j, Local Neo4j instance, Configure Neo4j with scripts, Enjoying Neo4j, New developer blog for Neo4j, Population of Neo4j database, Using Neo4j notebooks, Contact Neo4j, Monitoring Neo4j, Restart Neo4j, Starting Neo4j, Instantiating from Neo4j driver, Ideal for learning Neo4j, run Neo4j, System Requirements for Neo4j, Running Neo4j as console, Configure Neo4j auto-start, Running Neo4j interactively, Become a Neo4j Partner, Technology Partners for Neo4j, Careers at Neo4j, Support for Neo4j, Global Neo4j community, Scrape Neo4j documentation, Install Neo4j as root, Publishing with Neo4j, Get Started with Neo4j for Free, Free sandbox for Neo4j, Free Neo4j start, Invoke as neo4j user, Stop Neo4j command, Point type in Neo4j, Joining Neo4j servers, Start Neo4j, Practitioner's Guide to Neo4j, Starting with Neo4j, Blog articles about Neo4j, ""neo4j"", instantiation of Neo4j, Install Neo4j locally, Python driver for Neo4j, Intro to Neo4j, Writing a Neo4j book, Prioritize development in Neo4j, Classes in Neo4j, Technology stacks in Neo4j, Set of Neo4j classes, Developing with Neo4j, Bite Size Neo4j for Data Scientists, Practical sessions on Neo4j, grounding in Neo4j, Reading Neo4j research, Management Environment for Neo4j, Professional services for Neo4j, Create Neo4j service, Check Neo4j status, Monitor Neo4j, Using Neo4j applications, Solution Partners for Neo4j, Install Neo4j adapter, Running Neo4j examples, Managed Neo4j instances, Stop self-managed Neo4j database, Logging Neo4j Connector, Cypher and Neo4j, Standalone Neo4j DBMS, Feedback on Neo4j usage, Evolution of Neo4j applications, Monthly Neo4j events, Meetup for Neo4j, PropertyNeo4jperson, Questions about Neo4j Dash, Rows in Neo4j, ...]"
4,163,"[deleted nodes, duplicate nodes, Customer nodes, Candidate nodes, weighted nodes, sparse nodes, Two nodes connected, Original nodes, k unique nodes, Uniquely identifiable nodes, Connecting nodes, Connecting two nodes, merge nodes, connecting nodes, Merge nodes, connect nodes, Super nodes, Super Nodes, red nodes, green nodes, green nodes validated, Created nodes, Unique nodes, Filter nodes, relevant nodes, Indexed nodes, real nodes, Unique lists of nodes, Ordered list of nodes, Parent nodes, Root node, list of nodes, path of nodes, Specific nodes, Collection of nodes, List of documents to nodes, See all nodes, list of target nodes, Sets of nodes, Node feature information, node features, Distinct nodes, information about nodes, distinct nodes, Node nodes, Limited information on nodes, connected nodes, Well connected nodes, reachable nodes, Connected nodes, returns connected nodes, Disconnected nodes, Connected nodes issue, disconnected nodes, low-connection nodes, Highly connected nodes, unseen nodes, Finding connected nodes, Matching nodes, super nodes, missing nodes, Returns nodes individually, returns nodes, Merges nodes together, aggregation of nodes, Person nodes, Subset of nodes, clone nodes, clone nodes with properties, Clone nodes, collapse nodes, intermediate nodes, Intermediate nodes, Intermediary node, processed nodes, Tag nodes, classified nodes, DISTINCT Nodes, Green nodes, Stage nodes, 'k' nodes, Standin nodes, non-existent nodes, target nodes, Important nodes, composite nodes, subcategory nodes, Components of nodes, relevant nodes identification, Identifier nodes, Primary nodes, loading nodes, directly connected nodes, closer nodes, Bridge nodes, Connect nodes with label A, Breaking node connections, Stop nodes, node elements, anchoring nodes, ...]"
5,154,"[relationshipType attribute, relationship type properties, RelationshipType parameter, relationshipType String or Integer, Guidelines for relationship property values, Deciding on relationship properties, Relationship direction, Relationship direction pattern, Relationship implications, relationship directions, Relationship direction: in, Relationship directionality, Relationship significance, Relationship types with :TYPE, Relationship import, Relationship Types Count, Relationship types listing, relationshipTypeCount Integer, Relationship specification, relationship assumption, relationship direction, relationship information, Relationship importance, Relationship projections, Relationship type specification, relationship types analysis, relationship type usage, Relationship types exclusion, Relationship types filtering, relationships existence, relationship example, relationship RELATIONSHIP, relationships creation, relationships file, relationship expression, relationship pattern, relationship type NEXT, relationship type for similarity, relationship return type, relationship type column, relationship_types parameter, Relationship type indexing, relationship type predicates, relationship type customization, relationship type precedence, Relationship type PATH, relationship type retrieval, relationship predicates, relationship sequences, relationship predicate rules, relationship management, relationship generation, relationship analysis, relationship types inclusion, relationship_types, relationship existence check, Relationship type definition, Relationship type predicates, Relationship type predicate, relationshipTypes List, relationshipTypes list, RelationshipTypes, Relationship types limit, relationship weight, Relationship orientation, Relationship types, Relationship identification, relationship orientation, real relationship properties, relationship type and property combination, relationship property expression, Relationship operations, Get relationship property, relationshipTypes, relationship types, relationshipTypes List of String, relationshipType key, relationshipTypesAndDirections STRING, Relationship-types, relationship types defined, relationship types conversion, relationshipTypes filter, relationshipType, Relationship type properties, relationship types in database, relationship type READ, relationship patterns, relationship types not mentioned, relationship types mapping, relationship type, relationship specification, relationshipType Integer, Relationship type, relationship type specification, relationship type configuration, relationship type in projected graph, relationship type projections, Relation types, Relationship types count, Relationship type metrics, ...]"
6,153,"[Label every node, Using node labels, Dynamic node labeling, New nodes with labels, Preliminary labels for nodes, Add node labels in-memory, Filter by node labels, returns node labels, Wildcard expression for node labels, Sequences of node labels, Multi-label nodes, Specify node labels in queries, include node labels in result, using node labels, Specifying Sequences of node labels, excluding node labels, Labeled node pairs, nodes with specific label, Various node labels, Disjoint node labels, Source node label, Graph node labels, Number of node labels, Specific node labels, NODES label, Patient node label, Adding node labels dynamically, Fast Node-Counts by Label, map of label to nodes, Mapping labels to nodes, Matching node by labels, List of node labels, preserves node label distribution, return node labels, Movie node label, transform node labels, Single string node label, Rename node labels, remove node labels, Remove Node Labels, CREATE nodes with label, labels node labels, Matching nodes with labels, multi-labeled nodes, Renaming node labels, Adds new node label, adding node labels, Add node label, Optional node label, default all node labels, Labeling nodes, Attach one label per node, One label per node, Labels per node, Engineering label in nodes, retain node labels, Hardcoded node labels, Airport node label, Projected node label, Counter node label, List of labels, Person label nodes, Artist label nodes, remove labels from nodes, Multiple node labels, Projecting multiple node labels, multiple node labels, virtual node labels, Entity node label, End node label >, Create node labels, Associate labels with node, Multiple labels per node, alternating node labels, another-node-label-value, Dedicated node label, Selecting node labels, Assigning second node label, User node label, Creates node labels, multiple labels on nodes, Person node label, Single node label, Multiple node labels and relationships, Change node labels, clones nodes and labels, disjoint node labels, nodeLabels filter, TargetNodeLabels filter, Adds labels to nodes, add labels to nodes, adds labels to nodes, Set labels on a node, Set node labels, Project node labels, Remove node labels, sets labels to nodes, Source node labels, Projecting node labels, Write node labels to database, ...]"
7,152,"[Using functions in Neo4j, WRITE mode in Neo4j, Verbose output in Neo4j, Writing code for Neo4j, Text generation in Neo4j, Subsets in Neo4j, Set in Neo4j, disjoint sets in Neo4j, Collaboration in Neo4j, basic construct in Neo4j, Programmatic use in Neo4j, API usage in Neo4j, Triple stores to Neo4j, Triples loaded in Neo4j, Building pipelines in Neo4j, Building with Neo4j, Automated processing with Neo4j, Queries in Neo4j, Query Neo4j database, Query Neo4j, introduced in Neo4j 5.9, query Neo4j, working with Neo4j, data verification in Neo4j, Validate method in Neo4j, Write results to Neo4j, Using Neo4j, Followers in Neo4j, using Neo4j, Force Neo4j query solving, rapid development with Neo4j, Verify Neo4j operation, Programmatic use of Neo4j, Comparing Neo4j to other technologies, applying Neo4j in various domains, Primary key in Neo4j, Cache Neo4j data, Write to Neo4j from RDF, Writing a book on Neo4j, Annotation in Neo4j, Explicit description in Neo4j, Creative reasoning with Neo4j, Ad hoc logic in Neo4j, heuristic approach in Neo4j, inferencing in Neo4j, Path function in Neo4j, Import function in Neo4j, ML functions in Neo4j, Finding authors in Neo4j, Method returns in Neo4j, Defining classes in Neo4j, Creating expectations in Neo4j, Set definition in Neo4j, Business value of Neo4j, Heavy process in Neo4j, enriching data with Neo4j, Analysis in Neo4j, interest cluster analysis in Neo4j, semantic search, semantic search in Neo4j, Examples of Neo4j, Expressivity in Neo4j, subset in Neo4j, Applications of Neo4j, Standard implementation in Neo4j, aggregation in Neo4j, learning relationships in Neo4j, OWL classes in Neo4j, classes in Neo4j, synonyms in Neo4j, Mathematical approach in Neo4j, Adding elements in Neo4j, implementations in Neo4j, jobId in Neo4j, Retrieved information from Neo4j, Redundant information in Neo4j, Write mode in Neo4j, Deprecations in Neo4j, Access Neo4j graph data, Query Neo4j graph database, Handling JSON data in Neo4j, Storing results in Neo4j, AI integration with Neo4j, JSON processing in Neo4j, Workflow in Neo4j, Without writing to Neo4j, Available in Neo4j 5.2+, embedding in Neo4j, EDA in Neo4j, Specify Neo4j version, Building applications with Neo4j, Publishing in Neo4j, Introduced in Neo4j 5.3, procedure usage in Neo4j, Examples in Neo4j, Data exploration in Neo4j, Custom integrations in Neo4j, on graph neo4j, Introduced in Neo4j 5.7, Introduced in Neo4j 5.9, ...]"
8,145,"[Cypher settings, Cypher Shell credentials, Cypher usage, Cypher overview, Cypher version, Cypher basics, Cypher configuration settings, Cypher configuration, Cypher example, Cypher runtime hints, Cypher® runtimes, Cypher 3.1 fallback, PROFILE in Cypher, cypher. settings, Cypher® usage, Cypher features, Cypher signatures, Cypher usage example, Cypher Reference Card, Cypher error handling, Cypher named graph, Cypher counterparts, Cypher chain functionality, Cypher generating prompt, Cypher examples section, Cypher generation accuracy, Cypher integration, Cypher in Python integration, Cypher matching statement, Cypher mapping, Cypher Search, Cypher creativity appreciated, Cypher function names, Cypher Execution, Cypher pattern performance, cypher, Cypher execution monitoring, Cypher type system, Cypher script file, Cypher instructions generation, Cypher implementation, Cypher-based approach, Cypher selectors, Cypher coding, Syntax Deprecated in Cypher, Cypher popularity, Cypher for validation, Cypher-shell tool, Cypher-shell argument, Expressiveness of Cypher, Cypher feature availability, Cypher commands for security, Cypher version control, Cypher® supports, Cypher injection protection, Cypher data values, Cypher readability improvements, Cypher validation, technical aspects of Cypher, Selective in Cypher, Engineering role in Cypher, Cypher parameters, Cypher on GDS, Cypher list, Cypher syntax documentation, Cypher language enhancements, Cypher best practices, Cypher patterns, Cypher language evolution, Cypher executor selection, Cypher support for data types, Calling Cypher procedures, Cypher USE clause, Cypher Manual reference, Cypher update strategy, Cypher Client, Cypher functions list, Cypher security settings, Cypher endpoint, Cypher Cheat Sheet, Cypher command execution time, Cypher usage examples, Cypher documentation, Cypher transaction, Cypher Manual, BioCypher usage, Cypher planner, Cypher sleep operation, Cypher Manual references, Cypher compatibility, Cypher updates, Cypher manual reference, Cypher procedure API, Cypher API procedures, Cypher syntax not specified, Cypher limitations, Cypher commit operation, Cypher scripts, Cypher Transaction State, Cypher fundamentals, ...]"
9,140,"[Reserved for relationship types, Using relationship types, Specifying relationship types, variable:RELATIONSHIP_TYPE(property), filtering with relationship types, include relationship types, Changing relationship types, remainingRelationshipType parameter, Metadata for relationship types, db.relationshipTypes, Specifying multiple relationship types, Importance of relationship types, semantics of relationship types, target relationship type, excluding relationships by type, copy-only-relationships-with-types, One type per relationship, Single relationship type, Nested relationship type, Use provided relationship types, Indexed relationship type, Additional relationship types, multiple relationship types, retain relationship types, defining relationship types, using relationship types, Custom relationship types in projection, Custom relationship types, Undirected relationship types, project single relationship type, Relationships by type, Relationships by type with label, Number of relationships by type, connects with relationship type, Filtering on relationship types, Target relationship type, Applied to multiple relationship types, custom relationship types, filtering by relationship types, specifying relationship type, List<String> relationship types, Label or relationship type, Invalid for relationship types, Privilege to create relationship types, Enables relationship type creation, Mapping relationship types, Matching relationships by types, Create a new relationship type, Get relationship type, Cannot create relationship types, filter with relationship types, Filter by relationship types, alternate relationship types, :TYPE relationship type, exclude relationship types, exclude existing relationships, Specifying relationships in queries, Specified relationship type, Writes relationships, writing relationships, Write relationship type, Specify targetRelationshipType, testRelationshipType, trainRelationshipType removed, Metadata on relationships, Specifying relationship properties, Specified relationship type and property, Specifying multiple relationships, Alternate relationship types, Specified type relationships, Multiple relationships, Defining relationships, label/relationship type, Multiple relationship types, Use relationship types, match on multiple relationship types, Specify relationship types, List of relationship types, contextRelationshipTypes list, Map for relationship types, Returns distinct relationship types, targetRelationshipType, db.relationshipTypes procedure, Filter predicate for relationships, Disjoint relationship types, Distinct relationship types, ROUTE relationship type, Different relationship types, Specific relationship type, New relationship type, Sequences of relationship types, distinct relationship types, specific relationship types, Output relationship types, CONNECTED to relationship, specific relationship type, DIRECT relationship type, TYPE: relationship type, Fetch relationships by type, Query returns relationship type, ...]"


In [213]:
gamma_tests[64.0]['biggest_communities']

Unnamed: 0,themeCount,themes
0,122,"[Node specification in Cypher, endNode in Cypher, Virtual Nodes in Cypher, Add nodes in Cypher, --nodes in Cypher, Delete nodes in Cypher, node in Cypher, Field nodes in Cypher, Connecting nodes in Cypher, Identify specific unconnected nodes in Cypher, Updating nodes in Cypher, Nodes created in Cypher, Updating nodes in Neo4j, Lists of nodes in Cypher, Merging nodes in Cypher, Extract node in Cypher, Excluding nodes in Cypher, Product nodes in Cypher, target node in Cypher, Node connections in Cypher, Categorize in Cypher, Node classification in Cypher, AllNodesScan Query, AllNodeScan in Cypher, AllNodesScan, Finding nodes in Cypher, Match any node in Cypher, nodeStatements in Cypher, connects nodes in Cypher, node prediction in Cypher, node lookup in Cypher, Nodes in Cypher, nodes in Cypher, NODES in Cypher, sourceNodeId in Cypher, Read nodes in Cypher, Node in Cypher, sourceNodes in Cypher, -nodes parameter in Cypher, Node classification Cypher documentation, Starting node in Cypher, Nodes function in Cypher, targetNode in Cypher, node_property in Cypher, All Nodes Scan in Cypher, Node definition in Cypher, nodes(path) in Cypher, nodes() in Cypher, NODE() function in Cypher, Collapse Nodes in Cypher, Source node in Cypher, nodes(3) in Cypher, Nodes and labels in Cypher, Movie nodes in Cypher, sourceNode in Cypher, FOR nodes in Cypher, nodes() function in Cypher, Person nodes in Cypher, Person node in Cypher, nodes function in Cypher, Leftmost node in Cypher, node weights in Cypher, NODE in Cypher, Related nodes in Cypher, ""node"" in Cypher, virtual node in Cypher, sequence in Cypher, Improved Cypher chain, Selecting Order nodes in Cypher, ANY NODE in Cypher, Sales nodes in Cypher, Engineering nodes in Cypher, missing nodes in Cypher, `nodes()` function in Cypher, Class weights in Cypher, CONNECTED_TO in Cypher, startNode() function in Cypher, NODE UNIQUE in Cypher, Adjacent nodes in Cypher, Adding Class nodes in Cypher, $deletedNodes in Cypher, end node in Cypher, Source node name in Cypher, path of nodes in Cypher, Get all nodes in Cypher, Returning nodes in Cypher, actedIn.nodes.cypher, End nodes in Cypher, NODE? in Cypher, topN in Cypher, source node in Cypher, List Concatenation in Cypher, node creation in Cypher, Reaction nodes in Cypher, Connected nodes in Cypher, endNode() in Cypher, Disconnected nodes in Cypher, AllNodesScan in Cypher, Start and end nodes in Cypher, Linking entities in Cypher, ...]"
1,119,"[Customer nodes, Candidate nodes, Original nodes, k unique nodes, Uniquely identifiable nodes, Super nodes, Super Nodes, red nodes, green nodes, green nodes validated, Unique nodes, Filter nodes, relevant nodes, Indexed nodes, real nodes, Unique lists of nodes, Ordered list of nodes, Parent nodes, Root node, list of nodes, path of nodes, Specific nodes, Collection of nodes, List of documents to nodes, See all nodes, list of target nodes, Sets of nodes, Node feature information, node features, Distinct nodes, information about nodes, distinct nodes, Node nodes, Limited information on nodes, reachable nodes, super nodes, missing nodes, Returns nodes individually, returns nodes, aggregation of nodes, Person nodes, Subset of nodes, Intermediate nodes, processed nodes, Tag nodes, classified nodes, DISTINCT Nodes, Green nodes, Stage nodes, 'k' nodes, Standin nodes, non-existent nodes, target nodes, Important nodes, composite nodes, subcategory nodes, Components of nodes, relevant nodes identification, Identifier nodes, Primary nodes, Stop nodes, node elements, anchoring nodes, neighbor nodes, Adjacent nodes set, targetNodes parameter, Target nodes input, Database nodes, Checkpoint nodes, Anchor nodes, Adjacent nodes, collapse intermediate nodes, Section nodes, Subject nodes, Entity nodes, Entities as nodes, Entity node type, Page nodes, iterate through nodes, visited nodes, restrict nodes returned, Restricting nodes returned, Visited nodes count, Visited nodes order, Leaf nodes, List of target nodes, Groups of nodes, Nodes created, Visited nodes, targetNodes in Neo4j, targetNodes list, Collapse Nodes, Select nodes, Device nodes, Patient nodes, Objects as nodes, yield nodes, Product nodes, Document nodes, database node, ...]"
2,119,"[Node labels in GDS, Node labels usage, Node attribute key labels, Node labels naming, Series mapping node label, Node labels list, Node labels in JSON, Node labels in Camel-case, Node labels retrieval, Node labels property, Passing in contextNodeLabels, nodeLabel, TargetNodeLabel, nodeLabel parameter, nodeLabels, Node label stratification, Node label inspection, Nodes, Properties, Labels, Node label importance, Node label consideration, Node label lookup index, Node label lookup, create a node label lookup index, named node label lookup index, Node label LOOKUP index, node label lookup index, Node label predicate, Solving label predicates, Node label uniqueness, Node label specificity, specified label and properties, Node label properties, nodeLabels List of String, node labels, ContextNodeLabels, nodeLabels String, node labels option, graph labels, Edge labels, Node labels, Graph labels, Node labels and properties, node label mapping, Node label mapping, Node label, Node labels mapping, Nodes label mapping, node label, sourceNodeLabel, node labels inclusion, node label in projected graph, node label projections, Node label A, node label usage, node labeling, node labels creation, node labels in output, Node label constraints, nodeLabels list, Node labels in graphs, node label distribution, targetNodeLabels List, contextNodeLabels list, targetNodeLabels, targetNodeLabels key, Node label name, targetNodeLabel, sourceNodeLabel filter, Node label search, Node labels with :LABEL, Add/remove label, Node label removal, Node labels update, Adjacent labeling, Threshold-based labeling, Node labeling, Node labeling best practices, NodeLabels, NodeLabels example, node labels in all capitals, NODE LABELS, Node labels performance, Node label targeting, node_labels, SourceNodeLabel, NodeLabels parameter, nodeLabelsWritten Integer, Node labeled :A, node labels in database, Node labels in database, Node labels management, node label uniqueness, Node label considerations, node label management, Node label usage, Node label index, Node label filter, Node labels extraction, sourceNodeLabels key, NodeSelector labels, ...]"
3,118,"[GDS admins, GDS enabled feature, GDS community edition limitations, GDS library limitations, GDS project subgraph, GDS integration, GDS client installation, GDS compatibility, GDS schema, Relationship properties in GDS, GDS implementation, GDS Cypher procedure API, GDS procedure API, gds.graph.exists, GDS Graph Exists capabilities, GDS performance optimization, GDS workflow, GDS library compatibility, GDS performance tuning, GDS library configuration, GDS logging capabilities, GDS new approach, GDS workloads, server-side GDS, GDS beta functions, Install GDS library, GDS algorithms execution modes, Using GDS, Benefits of GDS, GDS users, GDS software, GDS administrator, GDS supports multiple users, Users familiar with GDS, Running GDS procedures, GDS write procedures, Access GDS resources, GDS CE usage, ongoingGdsProcedures, GDS technique, GDS resources, GDS graph algorithms, GDS model discovery, Regression in GDS, Scaling with GDS, using GDS, GDS algorithms completion, Overview of GDS, GDS Fundamentals, GDS representation, Projecting into GDS, Common usage patterns in GDS, GDS library version 2.0, GDS preserves parallel relationships, GDS client, GDS license key, GDS support, GDS usage, standalone GDS database, GDS application, GDS export, Data into GDS server, Enable GDS, GDS, Recent version of GDS, GDS documentation, GDS procedure naming, GDS guide, GDS manual, GDS execution control, GDS Guide, GDS export properties, GDS client connection, GDS library write functionality, Include GDS library procedures, Training methods in GDS, GDS procedures, gds-guide, Machine learning in GDS, gds-resources file, gds object, Getting started with GDS, Manager's Guide to GDS, GDS Community Edition, GDS requirements, GDS EE license, Node property in GDS graph, GDS procedure, mapping GDS syntax, Removed in GDS v2, GDS capabilities, GDS availability, GDS support for 4.x and 5.x, Sending messages in GDS, leveraging GDS, GDS numeric properties support, GDS write back, Core operations in GDS, GDS EE features, GDS Community Edition installation, ...]"
4,116,"[Graph Database Administration, Graph database architecture, Graph database implementation, Graph Database Analysis, Graph Database Overview, graph database insights, Graph database exploration, Graph Database Information, graph database fraud analysis, Graph database discovery, Graph database entities, Graph database design, Graph database utilities, Graph database utility, Graph database enrichment, Graph database principles, Graph databases knowledge, graph databases, graph database queries, Graph databases, Graph database, Graph databases overview, Graph databases for complex queries, Graph databases for relational data, Graph databases interaction, Graph database algorithms, Graph databases in healthcare, Graph databases in AI, Graph Databases, Graph database features, Graph Database Use Cases, Graph Database Concepts, Graph database examples, Graph database technology, Graph databases in logistics, Graph database integration, Graph database application in logistics, graph-database technology, Graph database techniques, graph database concepts, graph database, Graph databases in cloud environments, graph database integration, Graph database power, Traditional graph databases, Graph database schema, graph databases in recommendations, Graph-based Recommendation, Graph databases in Neo4j, Graph database deployment, Graph database configuration, Graph database sessions, Graph database properties, Graph database concepts, Graph database elements, Graph database applications, Graph database concept, Graph vs. database distinction, Graph database design considerations, Graph database validation, Graph database structure, database graph relationship, Graph database backup, Graph database service, Graph database services, Graph database creation, Graph Database Applications, Graph database operations, Graph database analysis, Graph database querying, Graph Database, Graph database output, Graph database insights, Graph database indexing, graph database technology, Graph database workspace, Graph database example, Graph database benefits, Graph database interaction, Graph database scoring, Graph database practice, Graph database setup, Graph database nodes, Graph Database Pricing, Graph database use cases, Graph database size, Graph Database Version, Topology graph for databases, Graph-database association, graph database maintenance, database graph, Graph database triggers, graph database updates, GraphDatabase class, Relational database to graph, Graph database for chatbots, Graph Database Implementation, Graph Database Retrieval, Graph database connection, Graph database application, ...]"
5,113,"[Cypher semantics, Cypher without begin, Cypher variable dependency, Cypher sequence patterns, No Cypher knowledge needed, Cypher syntax example, Cypher commit command, Cypher schema definition, apoc.meta.cypher.types, Cypher syntax understanding, Cypher syntax utilization, Cypher® syntax, Cypher syntax not covered, cypher-shell format, Cypher Shell format, Cypher properties setting, Cypher syntax for importing data, Cypher syntax in migration, Cypher syntax for arrays, Cypher syntax basics, Cypher Aggregation syntax, Cypher syntax for creating, apoc.meta.types replaced, Cypher syntax for error handling, Cypher syntax, Cypher RETURN statement, Cypher permissions syntax, Cypher syntax examples, Cypher property syntax, Cypher styleguide, Cypher syntax updates, Cypher syntax for properties, Cypher syntax for constraints, Cypher types and synonyms, Cypher command for constraints, Cypher syntax error, CypherSyntaxError exception, Cypher syntax changes, Cypher syntax features, Cypher syntax readability, Cypher code, Cypher syntax elements, Cypher Literals, Cypher literal inputs, Cypher syntax not applicable, Cypher administrative commands, Cypher commands for configuration, Cypher SET operation, Cypher expressions, Cypher Syntax, Cypher data types, Cypher data structures, Cypher syntax rules, Cypher syntax (deprecated), Cypher parser overview, Cypher property types, Cypher structure, Cypher language styling, Cypher syntax best practices, Cypher query pipelines, Cypher syntax for labels, Cypher label syntax, CypherFormat parameter, Cypher CREATE clause, Cypher index syntax, Cypher syntax hints, Cypher for server tags, Cypher syntax suggestions, Cypher syntax usage, Cypher syntax for security, Cypher syntax for roles, Cypher language evolution, Cypher syntax for server configuration, Cypher syntax for points, Cypher map functions, Cypher syntax for updates, Cypher syntax for updating, Cypher syntax generation, Cypher manual documentation, Cypher Clauses, Cypher instruction format, Cypher syntax for merging, Cypher syntax for pathfinding, Cypher for natural language generation, Cypher label search, Cypher syntax for nodes, Cypher import syntax, Cypher syntax for index management, Cypher keywords glossary, Cypher syntax for permissions, Cypher syntax for database management, Cypher literals construction, Cypher Manual labels, Cypher syntax for location, Cypher language semantics, Cypher syntax for subsets, Basic Cypher tutorial, Cypher syntax for traversal, Cypher types normalization, Cypher statement inference, ...]"
6,112,"[properties on nodes, sets node properties, String type node properties, Returning node properties, Calculate node properties, Based on node properties, comparing node properties, algorithms for node properties, Based on other node properties, numeric node properties, Scale node properties, Defining node properties, Unique properties on node, Comparing nodes, Configured node properties, Specifying node properties, compute node properties, Predicting node properties, Graph node properties, Write node properties, Streams node properties, write node properties, written node properties, streaming node properties, Writes node properties, writeNodeProperties config, Normalizing node properties, ignore node properties, Select node properties, Predict node property values, Generate node properties, stream node properties procedure, compress node properties, stream node properties, writeNodeProperties function, writing node properties, Deriving node properties, configurations for node properties, Query node properties, Supported node property types, Optional node properties, Transforming node properties, Removing node properties, Encoding node properties, binary node properties, Compute node properties, Loading node properties, Set node properties, Patient node properties, Target node properties, Uniform node properties, Drop node properties, Incorporate node properties, project node properties, Incrementing node properties, Scaled node properties, Training without node properties, virtual node properties, real node properties, distinct node property values, Numeric node properties, Remove node properties, Access to node properties, Setting node properties, Focus on node properties, Updating node properties, Incorporates node properties, Removed node properties, modifying node properties, Stream node properties, Delete node property, skip node properties, Transient node properties, copy node properties, Number of node properties written, Combining node properties, Duplicate Node Properties, Target node description, Binarize node properties, store values as node properties, Defining nodes with properties, Accessing node properties, Source node parameter, Generating node properties, Update properties, rename node property, Intermediate node properties, writeNodeProperties in config, Inspecting node properties, Time-based node properties, Copy properties between nodes, Querying with node properties, additionalNodeProperties, retrieve node properties, Selecting node properties, Person node, Patient node, User node, Guidelines for node property values, Property Values in Nodes, ...]"
7,111,"[Reserved for relationship types, Using relationship types, variable:RELATIONSHIP_TYPE(property), RELATIONSHIPS type, include relationship types, Changing relationship types, Metadata for relationship types, db.relationshipTypes, Specifying multiple relationship types, Importance of relationship types, semantics of relationship types, target relationship type, excluding relationships by type, copy-only-relationships-with-types, One type per relationship, Single relationship type, Nested relationship type, Indexed relationship type, Additional relationship types, multiple relationship types, retain relationship types, defining relationship types, using relationship types, Custom relationship types in projection, Custom relationship types, project single relationship type, Relationships by type, Relationships by type with label, Number of relationships by type, connects with relationship type, Target relationship type, Applied to multiple relationship types, custom relationship types, filtering by relationship types, specifying relationship type, List<String> relationship types, Label or relationship type, Invalid for relationship types, Privilege to create relationship types, Enables relationship type creation, Mapping relationship types, Matching relationships by types, Create a new relationship type, Get relationship type, Cannot create relationship types, filter with relationship types, alternate relationship types, :TYPE relationship type, exclude relationship types, exclude existing relationships, RELATIONSHIP PROPERTY TYPE, Writes relationships, writing relationships, Write relationship type, Metadata on relationships, Alternate relationship types, Specified type relationships, Defining relationships, label/relationship type, Multiple relationship types, Use relationship types, match on multiple relationship types, Specify relationship types, List of relationship types, contextRelationshipTypes list, Map for relationship types, Returns distinct relationship types, db.relationshipTypes procedure, Disjoint relationship types, Distinct relationship types, ROUTE relationship type, Different relationship types, Specific relationship type, New relationship type, Sequences of relationship types, distinct relationship types, specific relationship types, Output relationship types, specific relationship type, DIRECT relationship type, TYPE: relationship type, Relationships by multiple types, by relationship types, Change relationship types, Set relationship types, persist relationship types, PATH relationship type, HAS_ROUTE relationship type, Singular relationship type, WROTE relationship type, Filter relationship types, Filtering relationship types, Operations over relationship types, connected by relationship types, Label/relationship type, changes relationship type, READ relationship type, Explicit relationship description, RELATIONSHIP data type, Value for relationshipType, ...]"
8,103,"[Cypher Query, Cypher query result, Cypher query splitting, Cypher result rows, Standard Cypher reading queries, Cypher writing queries, Cypher query enhancements, Cypher query structure, Cypher query component, Cypher search, Cypher query for JSON export, Cypher query results, Cypher Search chain, Cypher queries generation, Cypher CASE statement, Cypher query logic, Cypher query output, Cypher query error, Cypher traversals, Cypher query storage, Cypher DELETE clause, Cypher queries conciseness, Cypher predicates, Cypher query restrictions, Cypher® lookups, Cypher query specificity, Cypher WHERE clause, Cypher query usage, Cypher queries mapping, Cypher query building, Cypher for data retrieval, Cypher query integration, Cypher query variable, Cypher search phrase, Cypher query parameters, Cypher rules in Neo4j, Cypher query writing, Cypher syntax for data integration, Cypher query for similarity, Cypher query demonstration, Cypher query for URIs, Passing URI in Cypher, Cypher query limitations, Cypher query language flexibility, Cypher Aggregation function, Cypher query reuse, Cypher query compilation, Cypher query engine, Cypher filtering, Cypher query, Cypher query for nodes, Cypher queries, Cypher query language, Cypher query export, Cypher query extension, Cypher query syntax, Cypher query example, Cypher query for timestamps, Cypher query options, Cypher queries for data transformation, Cypher Query Language, Cypher subquery syntax, Cypher subqueries, Cypher query in Cypher, Cypher query parameter, Cypher query planner, Cypher query option, Cypher query extraction, Cypher query generation, Cypher graph query language, Cypher query inputs, Cypher-shell queries not case-sensitive, Cypher queries as string arguments, Cypher query flexibility, Cypher queries in sequence, Cypher query performance, Cypher query as string, Cypher search phrases, Cypher temporal functions, Cypher LIST type, Cypher queries return types, Cypher query inclusion, Cypher syntax for data querying, Cypher queries for data science, Cypher CREATE statements, Cypher searching capabilities, Cypher query correction, Cypher query returns, Cypher queries as arguments, Semicolon in Cypher queries, Cypher vs SQL, Cypher queries examples, Cypher query examples, Cypher support for nested documents, Cypher query language extensions, Question to Cypher engine, Cypher statements for data ingestion, Cypher queries inference, Essential for Cypher queries, Cypher subquery, ...]"
9,101,"[labels in list, labelField String type, labelField, Rename labels, types, properties, labels and types, Applicable to multiple labels, Multiple labels, Multiple labels handling, UNIQUE IMPORT LABEL, Unique label, unique label, UNIQUE IMPORT LABEL label, Label assignment, Label names, Using labels, Identifier for labels, Using labels as indexes, Using labels in models, Apply labels later, Overloading labels, Labeling classes, Properties based on labels, Product labels, status labels, Specific label usage, Labels usage, preferred label usage, Indexed label, Indexed labels, Specified label, Custom labels in projection, Custom labels, Set Labels, Remove Labels, CREATE NEW labels, Labels limit, SET any label, missing label-field, Multi-label mode, Multi-label classification model, multi-label mode, dynamic labels, dynamic labels creation, Dynamic labels and properties, yield label, dynamic secondary labels, Dynamic values in labels, Different labels, Dynamic labels, dynamic label removal, Dynamically specified labels, Dynamic label assignment, determining labels, Selection of labels, Specifying labels, Cannot create new labels, Create new labels, set of labels, Explicit types and labels, Specify field for labels, Field label, unique label assignment, Species label, initial numeric label, create a label, Multiple labels support, Labels and properties in indexes, True labels, Combination of labels, Remove labels, create new labels, SetLabels on node, sets given labels, specified label inclusion, Specified labels, Sales label, Escaping dynamic labels, Class labels representation, Specifying terminator labels, Specifying end labels, Create a label, Specified labels in query, includeLabels, New label setting, comma-separated alternating label and relationship, comma-separated labels, :Person:Product label, Overlapping labels issue, contains all labels, Applied to multiple labels, Multiple constraints per label, Label-field name, Labels removed, single label, Distribution of labels, Label management, Multiple Labels/Types handling, Multiple labels projection, Projecting multiple labels, Projecting labels, ...]"


In [214]:
gamma_tests[128.0]['biggest_communities']

Unnamed: 0,themeCount,themes
0,86,"[GDS admins, GDS enabled feature, GDS community edition limitations, GDS library limitations, GDS integration, GDS client installation, GDS compatibility, GDS schema, Relationship properties in GDS, gds.graph.exists, GDS Graph Exists capabilities, GDS logging capabilities, GDS edition information, GDS new approach, GDS workloads, server-side GDS, Using GDS, Benefits of GDS, GDS users, GDS software, GDS administrator, GDS supports multiple users, Users familiar with GDS, Access GDS resources, GDS CE usage, GDS technique, GDS resources, GDS graph algorithms, GDS model discovery, Regression in GDS, Scaling with GDS, using GDS, GDS algorithms completion, Overview of GDS, GDS Fundamentals, GDS representation, Projecting into GDS, Common usage patterns in GDS, GDS library version 2.0, GDS preserves parallel relationships, GDS Algorithm Performance, GDS client, GDS license key, GDS support, GDS usage, standalone GDS database, GDS application, GDS export, Data into GDS server, Enable GDS, GDS, Recent version of GDS, GDS library write functionality, Training methods in GDS, gds-guide, Machine learning in GDS, gds-resources file, gds object, GDS Community Edition, GDS requirements, GDS EE license, gds.version procedure, GDS procedure, mapping GDS syntax, Removed in GDS v2, GDS capabilities, GDS availability, GDS support for 4.x and 5.x, Sending messages in GDS, leveraging GDS, GDS numeric properties support, GDS write back, Core operations in GDS, GDS EE features, GDS Community Edition installation, No GDS algorithms, GDS Model Catalog operations, GDS on Composite database, GDS manual reference, gds prefix, Load data into GDS, Running GDS, GDS Flight Server, GDS aggregate parallel relationships, gds.version() method, Similarity in GDS]"
1,81,"[Customer nodes, Candidate nodes, initial set of nodes, Original nodes, k unique nodes, Uniquely identifiable nodes, Super nodes, Super Nodes, red nodes, green nodes, green nodes validated, Unique nodes, Filter nodes, relevant nodes, Indexed nodes, real nodes, Parent nodes, Root node, Specific nodes, Node feature information, node features, Distinct nodes, information about nodes, distinct nodes, Node nodes, Limited information on nodes, reachable nodes, super nodes, missing nodes, Person nodes, Subset of nodes, intermediate nodes, Intermediate nodes, Intermediary node, processed nodes, Tag nodes, classified nodes, DISTINCT Nodes, Green nodes, 'k' nodes, non-existent nodes, Important nodes, composite nodes, subcategory nodes, Components of nodes, relevant nodes identification, Identifier nodes, Primary nodes, node elements, anchoring nodes, neighbor nodes, Adjacent nodes set, Database nodes, Anchor nodes, Adjacent nodes, collapse intermediate nodes, Section nodes, Subject nodes, Entity nodes, Entities as nodes, Entity node type, Page nodes, List of target nodes, Groups of nodes, Nodes created, Visited nodes, Device nodes, Patient nodes, Objects as nodes, Product nodes, Document nodes, database node, Student nodes, Location nodes, End nodes reachable, Product node, Super node concept, starting node concept, Super node issues, Explicitly created nodes, Airport location nodes]"
2,68,"[labels in list, labelField String type, labelField, Rename labels, types, properties, labels and types, Applicable to multiple labels, Multiple labels, Multiple labels handling, Label assignment, Label names, Using labels, Using labels as indexes, Using labels in models, Apply labels later, Overloading labels, Properties based on labels, Product labels, Specific label usage, Labels usage, preferred label usage, Specified label, Custom labels in projection, Custom labels, Set Labels, Remove Labels, Labels limit, specified label and properties, missing label-field, Multi-label mode, Multi-label classification model, multi-label mode, Different labels, determining labels, Selection of labels, Specifying labels, set of labels, Explicit types and labels, Specify field for labels, Field label, Species label, Labels and properties in indexes, Combination of labels, Remove labels, create new labels, SetLabels on node, sets given labels, specified label inclusion, Specified labels, Sales label, Specifying terminator labels, Specifying end labels, Specified labels in query, includeLabels, comma-separated alternating label and relationship, comma-separated labels, :Person:Product label, Overlapping labels issue, contains all labels, Applied to multiple labels, Multiple constraints per label, Label-field name, Labels removed, Distribution of labels, Label management, Multiple Labels/Types handling, Projecting multiple labels, Projecting labels, Projected labels as keys]"
3,68,"[Multiple roles assignment, Listing roles, Example of listing roles, Multiple roles mapping, user roles, Native users and roles, execution with assigned roles, custom role execution, custom role definition, Server role assignment, Role assignment, Assigning roles, Server roles, user role assignments, Allocator role, built-in roles, using roles, database roles, granting roles, manage roles, re-creating roles, Manage roles, Manage users and roles, Encoding user roles, Custom roles, Modify roles, Assign roles, User roles, Custom role management, remove roles, Managing roles, Setting user roles, Member role, Property: roles, User roles management, assign roles, User-role assignment, Renaming roles, Removing roles, rename roles, list roles, Multiple roles support, Specified roles, List of roles, Show roles, database role, show roles, Granting roles, Node roles identification, Node roles, Built-in roles, custom roles, Field role, Granting roles to users, ""roles"" property, Roles as relationships, Assign product roles, Comparison of roles, Assigning roles to users, Assign role to user, Assign user to admin role, grant role ability, assign doctor role, User roles and flags, Admin role mapping, Role assumption, Native roles overview, Required roles and permissions]"
4,66,"[Page cache configuration, Page cache setting, Page cache memory, page cache settings, Page cache usage, Page cache warmup, page cache, page cache profiling, page cache usage, page cache misses, Query cache size deprecated, Page cache size, page cache size, off-heap memory blocks cache, Page cache memory allocation, server.memory.pagecache.directio, Pagecache direct IO, server.memory.pagecache.size, Memory pagecache size, Query cache size, Page cache overhead, Shared cache, Page cache, Query cache, page cache hit, page cache page faults, Heap and pagecache settings, Size of page cache, Query caches memory consumption, Pagecache flush buffer, -pagecache parameter, increasing page cache size, Page cache metrics, Page cache efficiency, page cache memory usage, Page cache hits, page cache flushes, page cache merges, Page cache flushing, page cache limit, page cache unpins, Page cache page faults, page cache bytes written, page cache pages copied, page cache copies, page cache vs disk, page cache importance, page cache pins, page cache faults, page cache hits, page cache hit ratio, Page cache hit ratio, Page cache usage ratio, page cache usage ratio, Query cache management, page cache bytes read, page cache IO operations, page cache throttled times, Configure caches, query cache size, Exceeding query cache size, Pagecache eviction metric, Clear query caches, Estimating page cache size, default page cache memory, shared cache configuration]"
5,65,"[Turtle RDF format, RDF lib in Cypher, RDF queries, RDF data exchange, RDF data, RDF as a format, RDF format, RDF XML import, RDF store, RDF streaming, RDF data export, RDF schema, RDF input, Using RDFLib, RDF type in property graph, RDF graph operations, using RDFLib, RDFLib, RDF data exchange model, RDF layer addition, RDF community, Modeling with RDF, RDF and Semantic Web, RDF vs property graph, RDF basics, RDF labels, RDF limitations, RDF vs. property graph, RDF extensions for annotations, RDF extensions, import RDF, RDF users, RDF namespace usage, RDF semantics, RDFS vocabulary, RDF XML, RDF for data migration, RDF, RDF complexity, RDF world, Generic nature of RDF, RDF efficiency, RDF-ization, RDF generation, RDF standard, RDFLib extension, RDFLib in Python, RDFLib implementations, Proposal for RDF handling, RDF-izing, Presenting as RDF, RDF interoperability, RDF-Star expansion, Data exchange with RDF, RDF framework, RDF option, RDF persistence, RDF XML serialization, RDF and RDF-Star, Interoperability with RDF, RDF snippets in HTML, RDFs standard, RDF aware applications, Basic configuration for RDF, RDF triple concept]"
6,65,"[Reserved for relationship types, Using relationship types, include relationship types, Metadata for relationship types, Specifying multiple relationship types, Importance of relationship types, semantics of relationship types, target relationship type, excluding relationships by type, copy-only-relationships-with-types, Nested relationship type, Additional relationship types, multiple relationship types, retain relationship types, defining relationship types, using relationship types, project single relationship type, Relationships by type, Relationships by type with label, connects with relationship type, Applied to multiple relationship types, filtering by relationship types, specifying relationship type, List<String> relationship types, Enables relationship type creation, Matching relationships by types, Create a new relationship type, Get relationship type, Cannot create relationship types, filter with relationship types, exclude relationship types, exclude existing relationships, Metadata on relationships, Defining relationships, label/relationship type, Multiple relationship types, Use relationship types, match on multiple relationship types, Specify relationship types, List of relationship types, contextRelationshipTypes list, Map for relationship types, Returns distinct relationship types, ROUTE relationship type, Output relationship types, Relationships by multiple types, by relationship types, Change relationship types, Set relationship types, persist relationship types, Filter relationship types, Filtering relationship types, Operations over relationship types, connected by relationship types, Label/relationship type, changes relationship type, Value for relationshipType, returns list of relationship types, mutateRelationshipType, Associate type with relationship, Impact of relationship types, Filter with relationship types, Default relationship type, projecting multiple relationship types, Projected relationship types]"
7,65,"[Pattern parameter, Number parameter, Pattern input parameter, types parameter, timeout parameter, iterations parameter, id parameter, Rel parameter, -P parameter syntax, -to parameter, Levels parameter, _count parameter, collection parameter, Precision parameter, Offset parameter, properties parameter, predicate parameter, communityProperty parameter, propertiesWritten parameter, copy-only-node-properties parameter, apiKey parameter, prop parameter, expression parameter, range parameter, progress parameter, spread parameter, rel Rel parameter, keys parameter, key parameter, values parameter, value parameter, props parameter, Range in parameter, Values parameter, propertyName parameter, number parameter, to parameter, precision parameter, position parameter, offset parameter, orientation parameter, Orientation parameter, Keep values parameter, from parameter, index Integer parameter, TZ parameter, quote parameter, rel parameter, Properties parameter, Property names parameter, range specification, userId parameter, delimiter parameter, label parameter, TRUE parameter, sequences parameter, length parameter, duration parameter, size parameter, sample parameter, specifying sample parameter, target parameter, sequence parameter, operator parameter, Target parameter]"
8,63,"[String type node properties, Returning node properties, Calculate node properties, Based on node properties, comparing node properties, algorithms for node properties, Based on other node properties, numeric node properties, Scale node properties, Defining node properties, Unique properties on node, Comparing nodes, Configured node properties, Specifying node properties, Predicting node properties, Predicting node classes, Graph node properties, Normalizing node properties, Select node properties, Predict node property values, Generate node properties, Query node properties, Supported node property types, Optional node properties, Transforming node properties, Loading node properties, Patient node properties, Target node properties, Uniform node properties, Drop node properties, Incorporate node properties, project node properties, Incrementing node properties, Training without node properties, distinct node property values, Stream node properties, Number of node properties written, Combining node properties, Duplicate Node Properties, target node, Outcome node, Target node description, Defining nodes with properties, Accessing node properties, Intermediate node properties, Time-based node properties, Copy properties between nodes, Querying with node properties, additionalNodeProperties, retrieve node properties, Selecting node properties, Person node, Patient node, User node, Guidelines for node property values, Property Values in Nodes, access to node properties, Predict classes for nodes, Classify nodes, ID column as node properties, predicting class, Limited node property types, number of node properties exported]"
9,63,"[Add nodes in Cypher, Delete nodes in Cypher, node in Cypher, Field nodes in Cypher, Nodes created in Cypher, NODE_PATH in Cypher, Extract node in Cypher, Excluding nodes in Cypher, Product nodes in Cypher, sourceNodeId in Cypher, Read nodes in Cypher, Node in Cypher, Starting node in Cypher, targetNode in Cypher, node_property in Cypher, All Nodes Scan in Cypher, nodes(path) in Cypher, NODE() function in Cypher, Collapse Nodes in Cypher, Source node in Cypher, sourceNode in Cypher, FOR nodes in Cypher, Person nodes in Cypher, Person node in Cypher, NODE in Cypher, Related nodes in Cypher, ""node"" in Cypher, virtual node in Cypher, Selecting Order nodes in Cypher, ANY NODE in Cypher, Sales nodes in Cypher, Engineering nodes in Cypher, Rightmost node in Cypher, Replacing node properties in Cypher, missing nodes in Cypher, startNode() function in Cypher, Node objects in Cypher, NODE UNIQUE in Cypher, Adding Class nodes in Cypher, $deletedNodes in Cypher, end node in Cypher, Source node name in Cypher, path of nodes in Cypher, Get all nodes in Cypher, Returning nodes in Cypher, actedIn.nodes.cypher, End nodes in Cypher, topN in Cypher, source node in Cypher, sourceNode in Neo4j, targetNodeId in Cypher, Graph nodes in Cypher, Target node name in Cypher, target in Cypher, Node parameter in Cypher, JsonPathNode in Cypher, node parameter in Cypher, collapse nodes in Cypher, Number of nodes in Cypher, Reachable nodes in Cypher, Selecting nodes in Cypher, Start nodes in Cypher, NODE? null in Cypher]"


In [215]:
gamma_tests[256.0]['biggest_communities']

Unnamed: 0,themeCount,themes
0,49,"[distinct nodes and relationships, Query Nodes with Most Relationships, Nodes without relationships, grouping nodes and relationships, Handling nodes and relationships, Limited nodes and relationships, returns all distinct nodes and relationships, Projecting nodes and relationships, Multiple relationships between two nodes, Distinct nodes and relationships, Extracting nodes and relationships, Nodes and relationships export, export nodes and relationships, Nodes with more than 2 relationships, Checking nodes and relationships, Collapse nodes to relationships, Indexed nodes and relationships, Nodes with no incoming relationships, database nodes and relationships, Anonymous nodes and relationships, Alternating nodes and relationships, Separate nodes for relationships, Index nodes and relationships, Indexing nodes and relationships, index nodes and relationships, Virtual nodes and relationships, Query nodes and relationships, exports given nodes and relationships, Extracting data from nodes, Utilizing node and relationship types, Virtual Nodes, Delete nodes and relationships, relationships to missing nodes, importing nodes, Importing nodes with labels, Attributes in relationships, Attributes of nodes and relationships, Utilize node and relationship types, Virtual Nodes & Relationships, Load nodes for relationship, No nodes or relationships reference, collapse nodes into relationships, Limits on node and relationship counts, Split between nodes and relationships, returns distinct nodes and relationships, Import nodes and relationships, 34 Billion Nodes & Relationships, Grouping nodes by attributes, nodes, relationships, and maps]"
1,49,"[apoc.coll.reverse function, apoc.coll methods, apoc.coll, apoc.coll.set, apoc.coll.elements, apoc.coll.disjunction, apoc.coll.different, apoc.coll.set procedure, apoc.coll.avg function, apoc.coll.stdev function, apoc.coll.combinations function, apoc.coll.occurrences function, apoc.coll function, apoc.coll.pairs, apoc.coll.contains function, apoc.coll namespace, apoc.coll.indexOf, apoc.coll.different function, apoc.coll.subtract, apoc.coll.pairs function, apoc.coll.elements function, apoc.path.elements, apoc.coll.elements query, apoc.coll.fill function, apoc.coll.set function, apoc.coll.flatten function, apoc.coll.intersection function, apoc.coll.frequencies function, apoc.coll.frequenciesAsMap, apoc.coll.insert function, apoc.coll.min function, apoc.coll.partition function, apoc.coll.min, apoc.coll.indexOf procedure, apoc.coll.subtract procedure, apoc.coll.toSet function, apoc.coll.zip, apoc.coll.split, apoc.text.indexOf function, apoc.coll.stdev, apoc.coll.fill, apoc.coll.frequencies, apoc.coll.flatten, apoc.coll.sortMulti, apoc.coll.sum, apoc.coll.duplicates, apoc.coll.max, apoc.label.exists procedure, apoc.coll.elements Procedure]"
2,47,"[index updates, cached updates in pagecache, update statement, parallel update statements, Node updates, sequential updates, trigger update, Database updates, database updates, Automatic index updates, Index updates triggering, Update triggers, index updating in background, Syntax update, Index updates percentage for sampling trigger, Index sampling update percentage, Block dynamic updates, Batch updates, Update actions, Large updates handling, Continuous update, Time-based updates, Database updates prevention, Database update validation, Batch large updates, large updates, update nodes, Parallel updates, Single thread updates, Frequency of updates, Concurrent updates, Trigger update, Syntax updates, Prevent concurrent modifications, Concurrent modification issues, Concurrent updates prevention, Writing Update data, Database releases, Executing update commands, Transactional updates, Database modification, Database upgrade, database runtime updates, database changes impact, Updating queries, Transaction updates, Database updates communication]"
3,46,"[Returns Graph object, Retrieve graph information, Restore graph, returned graph, returns graph data, Complete graphs, Selecting and transforming graphs, Using a graph object, The graph object, Explore the graph, Concrete graph illustration, Updating graph data, selecting graph data, Explore graph dataset, Restore backed-up graph, Inspect graph content, original graph, entire graph, Generated graph, Simple graph definition, Simple graph example, Full graph application, Install graph application, Persons graph, imports graph, anonymous graph, graph called graphA, filtered graph, Streaming graph topology, Explore graph, graph called graphB, simple graph, Original graph, Updated graph, Graph 'myGraph', New graph example, State of the graph, Accurate graph representation, Verbose graph data, small graph, predict graph, Drop graph property, realistic graph properties, Navigating the graph, Updating named graph, social-graph]"
4,45,"[Transaction logging enabled, Transaction logging, Transaction log management, Transaction logs, Transaction logs storage location, transaction log files, Transaction log space usage, Transaction log data, Transaction logs for recovery, logical transaction logs, transaction logs data, Transaction log file size, Transaction logs directory, transaction log writer, transaction log purposes, Transaction log rotation, transaction logs backup, transaction logs pruning, Transaction logs recovery, Query transaction logging, Database and transaction logs, Transaction log files, Transaction log location configuration, transaction log rotation, Transaction log pruning, control transaction log pruning, Default transaction log path, huge transaction log issue, transaction log shipping, transaction logs, transaction logs size, transaction log rotations, transaction log flushes, Transaction log, Transaction logs size, Transaction logs replay, pruning transaction logs, Transaction log replay, Transaction logs availability, Local transaction logs, transaction logging configuration, transaction log entry, Transaction log synchronization, database transaction logs, deleting old transaction logs]"
5,44,"[Docker containers, Docker images, Docker usage, Docker, Docker-specific operations, Docker hosts, Docker official documentation, Docker deployment, Docker container, Docker documentation, Docker best practices, Docker for macOS, Windows, Linux, Docker tags, Official Docker documentation, Docker image, Load docker image, Dockerfile example, Docker containers management, Docker settings, Dockerfile, Introduction to Docker, Docker orchestration, Uses Docker, Docker healthcheck, Docker host deployment, Local deployment via Docker, Docker maintenance operations, Software deployment with Docker, Docker Compose setup, Docker compose up, Docker network orchestration, Using Docker services, Naming Docker container, Build Docker image, Run Docker container, Docker container image, Docker run command, Consult Docker documentation, Docker load command, Running Docker on macOS, Docker security practices, Docker container setup, Custom container image, Docker registry configuration]"
6,44,"[Ontologies and domains, Domain-specific ontologies, Matching ontologies, Using ontologies, Department level ontologies, Enterprise level ontologies, Schema and ontologies, Aligning with Ontology, Query Ontology, URL for Ontology, Ontologies usage, Managing ontologies, Combining Ontologies, Extending ontologies, Enriching with ontologies, External ontologies integration, Handling large Ontologies, managing multiple ontologies, Schema and ontology, Using ontologies and taxonomies, Value of ontologies and taxonomies, Combining ontologies, Semantics and ontologies, Defining ontologies, Ontologies and vocabularies, Web Protege for ontologies, Overlaying ontologies, Graph ontology management, Learning ontology from data, Making data smarter with ontology, Adding ontology and taxonomy, Graph ontology changes, Importing RDF ontologies, Ontologies enrichment, Wiki data ontologies, Extending ontology, Loading public ontologies, Matching entities to ontology, overlaying multiple ontologies, Semantic ontologies, Automating tasks with ontologies, Extracting ontology, Semantic definitions in ontology, Formalizing knowledge with ontologies]"
7,44,"[Generating graphs, Loading graphs, Creating sample graph, Creating small graphs, New to graphs, Built-in graph generators, Generate graphs, Generated graphs differ, Drop a graph, Introduction to graphs, saving graphs, Restore graphs and models, Updating graphs, Multi-graph definition, Multi-graph, Hypergraph concept, Large graphs challenge, Massive graphs handling, list all graphs, Graphs, Listing graphs, List graphs, Dropping graphs, Specific graphs, Life Sciences Graphs, Graphs importance, multiple graphs, Drop graphs, Collection of graphs, larger graphs, updating graph properties, Multiple graphs, Managing graphs, Multiple graphs management, Access multiple graphs, Larger graphs, Internally managed graphs, listing graphs, Project graphs, Filtered graphs, Temporary filtered graph, Graphs of the same name, Graphs not directly discussed, different graph application]"
8,43,"[Projecting a graph, Projecting a graph object, Persist graph projections, Projecting named graphs, projected myGraph, Mutate projected graph, Projecting graphs with queries, projecting graph patterns, Projecting virtual graph, Manage projected graphs, projecting full graph, Projecting named graph, projection 'myGraph', Projecting new graph, projecting a graph, projecting graph, projected graph relationships, citation graph projection, manage projected graphs, Projecting new graphs, Projecting subgraph, projecting the graph, projecting new graph, Projecting graph, added to projected graph, Projecting graphs, projecting graphs, Projecting empty graph, Filter projected graphs, projecting a second graph, Storing projected graphs, Access to projected graphs, project graph, Projecting datasets, resizing projected graph, project and store named graph, Projecting results, Anonymous graph projection, Anonymous graphs, Managing projected graphs, Projecting undirected graphs, release projected graph, New relationships in projected graph]"
9,42,"[Data import, Data importation, Data import example, Data import strategies, Data import considerations, Data import automation, Data import validation, Data importer tool, Data import tool, Data importer, Selective data import, Pre-existing data import, data import, data importing, data files imported, Fabric Importing data, Import data tutorial, Data Importing, Unauthorized data import, Data import code, Data Importer usage, Data import tools, Data import examples, Import data set, Data import syntax, Data import methods, Data-importer model, Manual data import, Graph import, Data import options, Two ways to import data, Data ready after import, Data import summary, Data export techniques, Data import query, data import and transformation, Data Importer tool, Graph import procedures, Data import from APIs, Data import techniques, Database import verification, Check imported data]"


In [216]:
gamma_tests[512.0]['biggest_communities']

Unnamed: 0,themeCount,themes
0,36,"[Domain-specific ontologies, Matching ontologies, Using ontologies, Department level ontologies, Enterprise level ontologies, Schema and ontologies, URL for Ontology, Managing ontologies, Combining Ontologies, Extending ontologies, Enriching with ontologies, External ontologies integration, Handling large Ontologies, managing multiple ontologies, Schema and ontology, Using ontologies and taxonomies, Value of ontologies and taxonomies, Combining ontologies, Semantics and ontologies, Defining ontologies, Web Protege for ontologies, Overlaying ontologies, Graph ontology management, Learning ontology from data, Making data smarter with ontology, importing RDF ontologies, Importing RDF ontologies, Extending ontology, Loading public ontologies, Matching entities to ontology, overlaying multiple ontologies, Semantic ontologies, Automating tasks with ontologies, Extracting ontology, Semantic definitions in ontology, Formalizing knowledge with ontologies]"
1,33,"[deprecated procedures, Deprecated procedures, Updated procedures and functions, expand procedures, restore procedures, Predict procedures, import procedure, procedures and functions, Blocking procedures, public procedures, Deprecated procedure, deprecated procedure, Enable procedures, works as procedure, import procedures, Removed procedures, configuration procedures, export procedures, Generate procedures, .write procedures, Generating procedures, built-in procedures, Generated procedure name, Enable necessary procedures, custom procedures, Best practice for procedures, Built-in procedure, Export procedures, List available procedures, Import procedure, SHOW PROCEDURES for available procedures, Officially supported procedures, Restore operations]"
2,32,"[Additional settings, Optional settings, Config settings, Configurable settings, connection settings, configuration settings overview, Configuration settings variation, Configuration settings removed, Configuration settings at runtime, Configuration settings reference, Configure default settings, Configuration settings moved, Upgrade Settings, configuration settings, Configuration settings, parameter configurations, Configuration Settings, effective settings, required settings, directory settings, import configuration settings, Properties setting, other settings, query configuration settings, Configuration settings overview, Configuration, Override settings, Description of settings, Import settings, Configuration settings unordered, configuration settings scopes, Runtime configuration changes]"
3,31,"[remaining relationships, Handling of relationships, Relationships on relationships, Relationships collection, Storing relationships, No matching relationships handling, Adding new relationships, PUT method for relationships, One relationship, maintaining relationships, required for relationships, --relationships option, Relationships file, People relationships, NEXT relationships, Relationships order, missing relationships, relationships written, ""relationships"", Relationships management, iterating over relationships, Define relationships, Relationships results, information about relationships, Relationships between people, positive relationships, Character relationships, context of relationships, relationships, Relationships, Self-relationships]"
4,30,"[Enable metrics, heap metrics, Collecting metrics, Compute metrics, Device metrics collection, Inserting metrics, Continuous metrics collection, Retrieving metrics, Monitoring graph metrics, Define metrics, Extracting metrics, extract metrics, collected metrics, Custom metrics in graphs, Server metrics configuration, Enable metrics logging, Server metrics prefix, Transaction metrics, Checkpoint metrics, Instance metrics, Device metrics, Expose metrics, expose metrics, Calculates specified metrics, Persist logs and metrics, Expose metrics with NOM, Endpoints and metrics, network metrics, Viewing metrics, Output of metrics]"
5,29,"[Backup support, Backup parameters configuration, backup configuration, Backup parameters, backup status, backup data, Backup success, Backup management, Back-up procedure, Backup run, Backup strategies, Backup procedures, Backup planning, Backup failure, Backup command behavior, Backup problems, Backup clients, Backup storage solutions, Backup failure handling, Backup not supported, Backup plans for coding, Backup folder management, Backup configuration, Backup process settings, Backup modes, Backup options, Backup server, Backup client, Backup volume]"
6,28,"[head(null) returns null, size(null) returns null, toFloat(null) returns null, labels(null) returns null, asin(null) returns null, atan(null) returns null, cot(null) returns null, abs(null) returns null, nodes(null) returns null, startNode(null) returns null, date(null) returns null, time(null) returns null, avg(null) returns null, datetime(null) returns null, sqrt(null) returns null, max(null) returns null, min(null) returns null, count(null) returns 0, type(null) returns null, log(null) returns null, sin(null) returns null, sum(null) returns 0, exp(null) returns null, cos(null) returns null, haversin(null) returns null, tan(null) returns null, Expression returns null, last(null) returns null]"
7,28,"[Proxy routing, request routing, Network routing, Configure routing rules, Adding routing rules, Routing applications, Routing information, Routing table, Control routing table, Initial routing table, Routing table error, Request routing, Query routing, Query routing decisions, Querying routes, Route optimization, Path optimization, Flight route optimization, Dynamic routing strategies, Routing flights, Automatic routing, Traffic routing, Proxy setup, Route read queries, Add routing rule, Server-policy routing, Routing purposes, Routing strategies]"
8,28,"[cluster servers, router servers, Cluster servers, Cluster server mapping, Cluster server communication, Cluster server services, Server management, Cluster server management, Cluster server processing, Cluster server roles, Core servers, Stop Core servers, Cluster service location, Cluster servers status, Standalone server management, Cluster servers configuration, Single server management, Server allocation, Cluster server allocation, Cluster server requirements, Drop servers, Separate servers, multi-server setup, Cluster hosting, Cluster server routing, Secondary hosting mode, grouping servers, Cluster servers uninstallation]"
9,28,"[Write operation, Write operation time, Elapsed time, read/view time, write time, Write time, Execution time in milliseconds, Execution time improvement, Returns a Time, Returns Time, execution time in milliseconds, Execution time, execution time, commitTime in milliseconds, Process time estimate, compute time, Compute time, Execution time parameters, estimating processing time, Processing time estimation, Execution time concerns, Execution time tracking, Execution time logging, Track query CPU time, Elapsed time tracking, Execution time in ms, elapsedTimeMs, cpu time]"


In [217]:
gamma_tests[1024.0]['biggest_communities']

Unnamed: 0,themeCount,themes
0,21,"[Hierarchical clustering, Advanced clustering, Clustering, Advanced Clustering, advanced clustering, Clustering documentation, clusterings, clustering algorithm, Clustering environments, Clustering in Enterprise, Clustering architecture, database clustering, Autonomous clustering, clustering documentation, Clustering problems, final clustering, Clustering determination, Flow clustering, node clustering, Clustering stability, DBMS clustering]"
1,20,"[scalability, Vertical scaling, Graph scalability, Scalability improvements, Efficiency and Scalability, Scalability issues, Production scaling challenges, Database scalability issues, Scalability, Scalable architectures, Scalability considerations, Scalability in graphs, Scalable manner, Highly scalable, algorithm scalability, Scalability and efficiency, Scalability solutions, Scalability in data science, scalability in databases, Read scalability]"
2,20,"[Large networks, Networks, network addresses, Network relationships, Network features, network features, Network connections, network connections, Network connectivity, Network connectivity evaluation, network, List all network connections, accepted network connections, Users' network connections, Open network connections, Router connections, Block network connections, Network conditions, establish connections, Station connectivity]"
3,19,"[JSON format data, JSON format for relationships, JSON format in databases, JSON format output, JSON-formatted list, JSON output interpretation, JSON format for data, Json format conversion, JSON formatting, JSON format, JSON representation, JSON structure, JSON document format, JSON result format, JSON format data integration, JSON format for query log, JSONL format, JSON format for nodes, JSON-like dump]"
4,19,"[gds.fastRP.stats.estimate, gds.wcc.stats.estimate, gds.alpha.sllpa.stats.estimate, gds.bellmanFord.stats.estimate, gds.betweenness.stats.estimate, gds.bfs.stats.estimate, gds.knn.stats.estimate, gds.triangleCount.stats.estimate, gds.beta.k1coloring.stats.estimate, gds.kcore.stats, gds.wcc.stats, gds.alpha.sllpa.stats, gds.beta.kmeans.stats, gds.labelPropagation.stats.estimate, gds.randomWalk.stats.estimate, gds.beta.k1coloring.stats, gds.alpha.nodeSimilarity.filtered.stats, gds.alpha.hits.stats, gds.alpha.modularity.stats]"
5,19,"[sum operator, avg operator, boolean operators, DropConstraint operator, Referencing operators, grouping operators, Argument operator, Right-hand side, Left-hand side operator, Left operator, Limit operator, Skip operator, Optional operator, Distinct operator, Distinct operator optimization, Merge operator, Delete operator, Extended set of logical operators, null logical operators]"
6,18,"[Encrypted connections, Connection encryption, Encrypted network communication, TLS encrypted, HTTPS encryption, Network communications encryption, Encryption via StartTLS, Enable encryption for https, SSL enforced, StartTLS encryption, Connection encryption options, Encryption (at rest, in transit), Data encryption, Encryption in security, Setting up encryption, Encrypted at rest, SSL encryption, Server-side encryption]"
7,18,"[Proxy routing, request routing, Configure routing rules, Adding routing rules, Request routing, Configuration for routing, Query routing, Query routing decisions, Querying routes, Dynamic routing strategies, Automatic routing, Traffic routing, Proxy setup, Route read queries, Add routing rule, Server-policy routing, Routing purposes, Routing strategies]"
8,18,"[Persistent Volume Claims (PVC), Persistent volume, Persisting data using Volumes, Persistent Volume, Persistent volume for plugins, persistent volume, persistent volume for backups, Persistent volumes, Persistent Volume Selector, Persistent volume creation, Persistent volume disks, Persistent Volume Claim (PVC), PersistentVolumeClaim object, Persistent Volume Claims in Cypher, Persistent Volume (PV) in Neo4j, configure persistent volume, Persistent volume selector configuration, Persistent volume selector]"
9,18,"[more communities, More communities, Number of communities, Higher resolutions, more communities, Communities, Intermediate communities, identify communities, communities assigned, Connecting communities, Bridge between communities, Connect communities, Well-connected communities, Breaking community, connect to communities, Constructing communities, Meeting local communities, Communities evaluation, Identifying new communities]"


# See what topics drop out from a cluster at various gamma values

In [218]:
def get_cluster_for_theme(property_name, topic):
    topic_df = gds.graph.nodeProperties.stream(g_topics2,
                                               [property_name],
                                               db_node_properties = ["description"],
                                               separate_property_columns=True
                                              )
    topic_cluster_id = topic_df.loc[topic_df['description']==topic, property_name].iloc[0]
    return topic_df.loc[topic_df[property_name]==topic_cluster_id, "description"].to_list()

In [219]:
virtual_relationships_64 = get_cluster_for_theme("leidenGamma64.0", "Virtual relationships")
virtual_relationships_64

['Relationships in Neo4j',
 'Relationship handling in Neo4j',
 'relationship creation in Neo4j',
 'Inverted relationships in Neo4j',
 'Relationship creation in Neo4j',
 'Relationships limitations in Neo4j',
 'existing relationships in Neo4j',
 'nodes and relationships in Neo4j',
 'Node relationships in Neo4j',
 'New relationships in Neo4j',
 'relationship management in Neo4j',
 'Nodes and relationships in Neo4j',
 'Incoming relationships in Neo4j',
 'Outgoing relationships in Neo4j',
 'Connecting relationships in Neo4j',
 'Type relationship in Neo4j',
 'relationship patterns in Neo4j',
 'Friend relationships in Neo4j',
 'Concepts and relationships in Neo4j',
 'Establishing relationships in Neo4j',
 'Contains relationship in Neo4j',
 'Naming relationships in Neo4j',
 'Creator relationship in Neo4j',
 'Authoring relationship in Neo4j',
 'Object-entity relationship in Neo4j',
 'Subcategory relationships in Neo4j',
 'Recreating relationships in Neo4j',
 'Directed relationships in Neo4j',
 

In [220]:
virtual_relationships_128 = get_cluster_for_theme("leidenGamma128.0", "Virtual relationships")
[topic for topic in optional_match_64 if topic not in optional_match_128] 

['subquery execution',
 'Subquery expressions',
 'Top-level subquery',
 'Subquery execution',
 'Subquery evaluation',
 'Subqueries execution order',
 'Subquery executions',
 'Subquery execution failure',
 'Subqueries execution',
 'Execute subqueries',
 'Returning subqueries',
 'unit subqueries',
 'Subquery modification',
 'Subquery variables',
 'Subquery RETURN clauses',
 'CALL subquery differences',
 'COLLECT subqueries']

In [221]:
virtual_relationships_256 = get_cluster_for_theme("leidenGamma256.0", "Virtual relationships")
[topic for topic in virtual_relationships_128 if topic not in virtual_relationships_256] 

['Writing relationships to Neo4j',
 'Neo4j relationship types',
 'Nodes and relationships extraction',
 'Counting relationships in Neo4j',
 'Relationships persisted natively in Neo4j',
 'Relationships in Neo4j',
 'Relationship handling in Neo4j',
 'relationship creation in Neo4j',
 'Inverted relationships in Neo4j',
 'Relationship creation in Neo4j',
 'Relationships limitations in Neo4j',
 'Node relationships in Neo4j',
 'New relationships in Neo4j',
 'Nodes and relationships in Neo4j',
 'Outgoing relationships in Neo4j',
 'Type relationship in Neo4j',
 'relationship patterns in Neo4j',
 'Friend relationships in Neo4j',
 'Concepts and relationships in Neo4j',
 'Contains relationship in Neo4j',
 'Creator relationship in Neo4j',
 'Directed relationships in Neo4j',
 'Describing relationships in Neo4j',
 'Describing relations in Neo4j',
 'Direct relationships in Neo4j',
 '"Broader than" relationship in Neo4j',
 'Interlocking relationships in Neo4j',
 'Association in Neo4j',
 'child relatio

In [222]:
virtual_relationships_512 = get_cluster_for_theme("leidenGamma512.0", "Virtual relationships")
[topic for topic in virtual_relationships_256 if topic not in virtual_relationships_512] 

['relationship management in Neo4j',
 'Directs relationship in Neo4j',
 'relationship file in Neo4j',
 'Actor and movie relationships in Neo4j',
 'Acted in relationship in Neo4j',
 'virtual entities in Neo4j',
 'relationships in Neo4j']

In [223]:
virtual_relationships_1024 = get_cluster_for_theme("leidenGamma1024.0", "Virtual relationships")
[topic for topic in virtual_relationships_512 if topic not in virtual_relationships_1024] 

['Friend relationships in Neo4j', 'Direct relationships in Neo4j']

In [224]:
virtual_relationships_1024

['Acts-In relationship in Neo4j',
 'Relation expression in Neo4j',
 'Virtual relationships',
 'Virtual Relationships in Neo4j',
 'Directed relationships in Neo4j',
 'Counting relationships in Neo4j']

In [225]:
backup_128 = get_cluster_for_theme("leidenGamma128.0", "Backup management")
backup_128

['Backup support',
 'Backup parameters configuration',
 'backup configuration',
 'Backup parameters',
 'backup status',
 'backup data',
 'Backup success',
 'Backup management',
 'Back-up procedure',
 'Backup run',
 'Backup strategies',
 'Backup procedures',
 'Backup planning',
 'Backup not supported',
 'Backup plans for coding',
 'Backup folder management',
 'Backup configuration',
 'Backup process settings',
 'Backup modes',
 'Backup options',
 'Backup server',
 'Backup client',
 'Backup volume',
 'Backup failure',
 'Broken backup risk',
 'Backup command behavior',
 'Backup problems',
 'Backup clients',
 'Backup storage solutions',
 'Backup',
 'Backup failure handling']

In [226]:
backup_256 = get_cluster_for_theme("leidenGamma256.0", "Backup management")
[topic for topic in backup_128 if topic not in backup_256] 

['Broken backup risk']

In [227]:
backup_512 = get_cluster_for_theme("leidenGamma512.0", "Backup management")
[topic for topic in backup_256 if topic not in backup_512] 

['Backup']

In [228]:
backup_1024 = get_cluster_for_theme("leidenGamma1024.0", "Backup management")
[topic for topic in backup_512 if topic not in backup_1024]

['Backup strategies',
 'Backup procedures',
 'Backup planning',
 'Backup not supported',
 'Backup plans for coding',
 'Backup modes',
 'Backup options',
 'Backup support',
 'Backup success',
 'Back-up procedure',
 'Backup run',
 'Backup failure',
 'Backup command behavior',
 'Backup problems']

In [230]:
backup_1024

['Backup folder management',
 'Backup configuration',
 'Backup process settings',
 'Backup server',
 'Backup client',
 'Backup volume',
 'Backup parameters configuration',
 'backup configuration',
 'Backup parameters',
 'backup status',
 'backup data',
 'Backup management',
 'Backup clients',
 'Backup storage solutions',
 'Backup failure handling']

# Write Leiden results to Neo4j

In [231]:
gds.set_database("neo4j")

In [232]:
gds.graph.nodeProperties.write(g_topics2, {"leidenGamma512.0": "leidenId"})

writeMillis                      459
graphName            reweight_topics
nodeProperties            [leidenId]
propertiesWritten              38152
Name: 0, dtype: object

In [233]:
gds.run_cypher("""CREATE CONSTRAINT topic_group_node_key IF NOT EXISTS FOR (n:TopicGroup) REQUIRE n.id IS NODE KEY""")

In [234]:
gds.run_cypher("""
MATCH (t:Topic)
WHERE t.leidenId is not null
WITH t ORDER BY t.description
WITH t.leidenId AS id, collect(t) AS topicNodes, collect(t.description) AS descriptions
MERGE (g:TopicGroup {id:id}) 
SET g.descriptions = descriptions
WITH g, topicNodes
FOREACH (t in topicNodes | MERGE (t)-[:IN_GROUP]->(g))
""")

In [235]:
gds.run_cypher("""
MATCH (g:TopicGroup)
WITH max(g.id) AS maxId
MATCH (t:Topic)
WHERE NOT EXISTS {(t)-[:IN_GROUP]->()}
AND COUNT {(t)<-[:HAS_TOPIC]-()} > 1
WITH maxId, COLLECT(t) AS needsGroup
UNWIND range(0, size(needsGroup)-1) AS idx
WITH maxId, idx, needsGroup[idx] AS t
MERGE (g:TopicGroup {id:maxId + idx + 1})
SET g.descriptions = [t.description]
WITH g, t
MERGE (t)-[:IN_GROUP]->(g)
""")

# Clean up

In [236]:
gds.graph.drop(g_topics)

graphName                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                topics
database                                                                                                                                                                                                                                                                                                                                                                                                                                                                

In [237]:
gds.graph.drop(g_topics2)

graphName                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       reweight_topics
database                                                                