In [18]:
from databaseconnection import DatabaseConnection
gds = DatabaseConnection().get_database_connection()
gds.version()

'2.3.2'

# Node Similarity Relationship

In [5]:
graph_name = 'studentSimilarity'

if gds.graph.exists(graph_name).any():
    gds.graph.drop(gds.graph.get(graph_name))

G, result = gds.graph.project(
    graph_name,
    "Student",
    { "NODESIMILARITY_POS_PROP": { "properties": "similarity" }}
)

print(f"The projection took {result['projectMillis']} ms")
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

The projection took 40 ms
Graph 'studentSimilarity' node count: 3039
Graph 'studentSimilarity' node labels: ['Student']


## Louvain

In [7]:
result = gds.louvain.stream(
    G,
    relationshipWeightProperty="similarity",
)
print(result)
print(len(set(result['communityId'])))

Louvain:   0%|          | 0/100 [00:00<?, ?%/s]

      nodeId  communityId intermediateCommunityIds
0       7483           46                     None
1       7771           46                     None
2       7829           46                     None
3       7894           46                     None
4       7957           46                     None
...      ...          ...                      ...
3034  231975         1418                     None
3035  232039         1418                     None
3036  232103         1418                     None
3037  232167         1111                     None
3038  232484         1418                     None

[3039 rows x 3 columns]
17


In [8]:
gds.louvain.write(
    G,
    relationshipWeightProperty="similarity",
    writeProperty="louvainId_similarity_pos_prop"
)

Louvain:   0%|          | 0/100 [00:00<?, ?%/s]

writeMillis                                                            123
nodePropertiesWritten                                                 3039
modularity                                                        0.544842
modularities                       [0.523752960697736, 0.5448421719678359]
ranLevels                                                                2
communityCount                                                          17
communityDistribution    {'p99': 1339, 'min': 1, 'max': 1339, 'mean': 1...
postProcessingMillis                                                     2
preProcessingMillis                                                      0
computeMillis                                                         1337
configuration            {'maxIterations': 10, 'writeConcurrency': 4, '...
Name: 0, dtype: object

In [9]:
result = gds.run_cypher("""
MATCH (s:Student) RETURN s.louvainId_similarity_pos_prop, count(s) as numberOfStudents
ORDER BY numberOfStudents DESC
""")
print(result)
# print("Outside majority:", (sum(result['numberOfStudents']) - result['numberOfStudents'][0]) /sum(result['numberOfStudents']))

    s.louvainId_similarity_pos_prop  numberOfStudents
0                              1418              1339
1                               712               977
2                                94               188
3                              1150               139
4                              2232               114
5                              2622               103
6                              2290                98
7                                46                72
8                                70                 1
9                                71                 1
10                              333                 1
11                              657                 1
12                              936                 1
13                             1223                 1
14                             1528                 1
15                             1885                 1
16                             2177                 1


## Label Propagation

In [10]:
result = gds.labelPropagation.stream(
    G,
    relationshipWeightProperty="similarity",
)
print(result)
print(len(set(result['communityId'])))

      nodeId  communityId
0       7483        11603
1       7771        11603
2       7829        11603
3       7894        11603
4       7957        11603
...      ...          ...
3034  231975        80325
3035  232039        80325
3036  232103        80325
3037  232167        85138
3038  232484        80325

[3039 rows x 2 columns]
22


In [11]:
gds.labelPropagation.write(
    G,
    relationshipWeightProperty="similarity",
    writeProperty="lpaId_similarity_pos_prop"
)

writeMillis                                                             23
nodePropertiesWritten                                                 3039
ranIterations                                                            8
didConverge                                                           True
communityCount                                                          22
communityDistribution    {'p99': 1824, 'min': 1, 'max': 1824, 'mean': 1...
postProcessingMillis                                                     4
preProcessingMillis                                                      0
computeMillis                                                           14
configuration            {'maxIterations': 10, 'writeConcurrency': 4, '...
Name: 0, dtype: object

In [12]:
result = gds.run_cypher("""
MATCH (s:Student) RETURN s.lpaId_similarity_pos_prop, count(s) as numberOfStudents
ORDER BY numberOfStudents DESC
""")
print(result)
# print("Outside majority:", (sum(result['numberOfStudents']) - result['numberOfStudents'][0]) /sum(result['numberOfStudents']))

    s.lpaId_similarity_pos_prop  numberOfStudents
0                         80325              1824
1                         85138               671
2                         60159               159
3                        111535                80
4                         11603                72
5                        158734                67
6                         30593                46
7                        155998                39
8                        162056                30
9                        191927                19
10                        68372                17
11                        66667                 4
12                        65590                 2
13                        12611                 1
14                        12674                 1
15                        34091                 1
16                        56495                 1
17                        78912                 1
18                       102001                 1


## Weakly Connected Components

In [14]:
result = gds.wcc.stream(
    G,
    relationshipWeightProperty="similarity",
)
print(result)
print(len(set(result['componentId'])))

      nodeId  componentId
0       7483            0
1       7771            0
2       7829            0
3       7894            0
4       7957            0
...      ...          ...
3034  231975           74
3035  232039           74
3036  232103           74
3037  232167           74
3038  232484           74

[3039 rows x 2 columns]
11


In [15]:
gds.wcc.write(
    G,
    relationshipWeightProperty="similarity",
    writeProperty="wccId_similarity_pos_prop"
)

writeMillis                                                             35
nodePropertiesWritten                                                 3039
componentCount                                                          11
componentDistribution    {'p99': 2958, 'min': 1, 'max': 2958, 'mean': 2...
postProcessingMillis                                                     5
preProcessingMillis                                                      0
computeMillis                                                            1
configuration            {'jobId': '1da17a33-a2c0-4c64-b177-809620a3e47...
Name: 0, dtype: object

In [16]:
result = gds.run_cypher("""
MATCH (s:Student) RETURN s.wccId_similarity_pos_prop, count(s) as numberOfStudents
ORDER BY numberOfStudents DESC
""")
print(result)
# print("Outside majority:", (sum(result['numberOfStudents']) - result['numberOfStudents'][0]) /sum(result['numberOfStudents']))

    s.wccId_similarity_pos_prop  numberOfStudents
0                            74              2958
1                             0                72
2                            70                 1
3                            71                 1
4                           333                 1
5                           657                 1
6                           936                 1
7                          1223                 1
8                          1528                 1
9                          1885                 1
10                         2177                 1


---

# kNN Similarity

In [20]:
graph_name = 'studentSimilarityKNN'

if gds.graph.exists(graph_name).any():
    gds.graph.drop(gds.graph.get(graph_name))

G, result = gds.graph.project(
    graph_name,
    "Student",
    { "KNN_SIMILARITY_POS_PROP": { "properties": "score" }}
)

print(f"The projection took {result['projectMillis']} ms")
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

The projection took 32 ms
Graph 'studentSimilarityKNN' node count: 3039
Graph 'studentSimilarityKNN' node labels: ['Student']
