In [1]:
from databaseconnection import DatabaseConnection
gds = DatabaseConnection().get_database_connection()
gds.version()

'2.3.2'

In [2]:
graph_name = 'respondentQuestionAlternativeOnlyStudents'

if gds.graph.exists(graph_name).any():
    gds.graph.drop(gds.graph.get(graph_name))

G, result = gds.graph.project(
    graph_name,
    {
        "Student": {},
        "QuestionAlternative": { "properties": { "position": { "defaultValue": 0 }}}
    },
    "CHOSE_ALT"
)

print(f"The projection took {result['projectMillis']} ms")
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

The projection took 36 ms
Graph 'respondentQuestionAlternativeOnlyStudents' node count: 4574
Graph 'respondentQuestionAlternativeOnlyStudents' node labels: ['Student', 'QuestionAlternative']


## Community detection without embedding or relationshipweight

In [3]:
result = gds.wcc.stream(
    G
)
print(result)
print(len(set(result['componentId'])))

      nodeId  componentId
0      54713            0
1      54777            0
2      54841            0
3      54905            0
4      54968            0
...      ...          ...
4569  231975            0
4570  232039            0
4571  232103            0
4572  232167            0
4573  232484            0

[4574 rows x 2 columns]
859


In [7]:
gds.wcc.write(
    G,
    writeProperty="wccId_pos_prop"
)

WCC:   0%|          | 0/100 [00:00<?, ?%/s]

writeMillis                                                             37
nodePropertiesWritten                                                 4574
componentCount                                                         859
componentDistribution    {'p99': 1, 'min': 1, 'max': 3302, 'mean': 5.32...
postProcessingMillis                                                     3
preProcessingMillis                                                      0
computeMillis                                                            3
configuration            {'jobId': '73b40afe-db0a-479d-83a8-0fb668144ab...
Name: 0, dtype: object

In [11]:
result = gds.run_cypher("""
MATCH (s:Student) RETURN s.wccId_pos_prop, count(s) as numberOfStudents
ORDER BY numberOfStudents DESC
""")
print(result)
# print("Outside majority:", (sum(result['numberOfStudents']) - result['numberOfStudents'][0]) /sum(result['numberOfStudents']))

    s.wccId_pos_prop  numberOfStudents
0                  0              2958
1                700                72
2               1471                 1
3               1472                 1
4               2568                 1
5                 28                 1
6                307                 1
7                594                 1
8               3063                 1
9               3420                 1
10              3712                 1


In [8]:
result = gds.louvain.stream(
    G
)
print(result)
print(len(set(result['communityId'])))

Louvain:   0%|          | 0/100 [00:00<?, ?%/s]

      nodeId  communityId intermediateCommunityIds
0      54713         1513                     None
1      54777         1513                     None
2      54841         1513                     None
3      54905         1513                     None
4      54968         1491                     None
...      ...          ...                      ...
4569  231975         1513                     None
4570  232039         1513                     None
4571  232103         1513                     None
4572  232167         1513                     None
4573  232484         1513                     None

[4574 rows x 3 columns]
1544


In [9]:
gds.louvain.write(
    G,
    writeProperty="louvainId_pos_prop"
)

writeMillis                                                             52
nodePropertiesWritten                                                 4574
modularity                                                       -0.483367
modularities                                        [-0.48336661574468154]
ranLevels                                                                1
communityCount                                                        1544
communityDistribution    {'p99': 4, 'min': 1, 'max': 2098, 'mean': 2.96...
postProcessingMillis                                                     2
preProcessingMillis                                                      0
computeMillis                                                           91
configuration            {'maxIterations': 10, 'writeConcurrency': 4, '...
Name: 0, dtype: object

In [10]:
result = gds.run_cypher("""
MATCH (s:Student) RETURN s.louvainId_pos_prop, count(s) as numberOfStudents
ORDER BY numberOfStudents DESC
""")
print(result)
# print("Outside majority:", (sum(result['numberOfStudents']) - result['numberOfStudents'][0]) /sum(result['numberOfStudents']))

    s.louvainId_pos_prop  numberOfStudents
0                   1513              2097
1                   1512               276
2                   1491               271
3                   1514               167
4                   1515                54
5                    718                24
6                   1510                24
7                    715                23
8                   1511                22
9                   1507                14
10                  1516                12
11                   720                 6
12                   709                 5
13                   719                 4
14                  1503                 4
15                   710                 3
16                   711                 3
17                  1502                 2
18                  2103                 2
19                   722                 1
20                   702                 1
21                   714                 1
22         

In [5]:
result = gds.labelPropagation.stream(
    G
)
print(result)
print(len(set(result['communityId'])))

      nodeId  communityId
0      54713        13086
1      54777        13086
2      54841        13085
3      54905        13086
4      54968        13085
...      ...          ...
4569  231975        13086
4570  232039        13086
4571  232103        13086
4572  232167        13086
4573  232484        13086

[4574 rows x 2 columns]
1544


In [13]:
gds.labelPropagation.write(
    G,
    writeProperty="lpaId_pos_prop",
)

writeMillis                                                             30
nodePropertiesWritten                                                 4574
ranIterations                                                            2
didConverge                                                           True
communityCount                                                        1544
communityDistribution    {'p99': 5, 'min': 1, 'max': 2097, 'mean': 2.96...
postProcessingMillis                                                     5
preProcessingMillis                                                      0
computeMillis                                                           17
configuration            {'jobId': '87b4f3d5-5f46-4899-ab2f-7e9aa981ca1...
Name: 0, dtype: object

In [14]:
result = gds.run_cypher("""
MATCH (s:Student) RETURN s.lpaId_pos_prop, count(s) as numberOfStudents
ORDER BY numberOfStudents DESC
""")
print(result)
# print("Outside majority:", (sum(result['numberOfStudents']) - result['numberOfStudents'][0]) /sum(result['numberOfStudents']))

    s.lpaId_pos_prop  numberOfStudents
0              13086              2096
1              13085               308
2              13083               154
3              13080               106
4              13081                96
5              13082                48
6              13102                36
7              13107                32
8              13106                27
9              13103                26
10              6965                24
11             13104                24
12              6966                18
13              6973                12
14              6978                 9
15              6985                 4
16              6977                 2
17             13110                 2
18              6967                 1
19              6987                 1
20              6968                 1
21             12611                 1
22             12674                 1
23             34091                 1
24             56495     

## FastRP

In [6]:
result = gds.fastRP.mutate(
    G,
    mutateProperty='embedding',
    randomSeed=42,
    embeddingDimension=128,
    iterationWeights=[0.8, 1, 1, 1, 1]
)
print(f"Number of embedding vectors produced: {result['nodePropertiesWritten']}")

Number of embedding vectors produced: 4574
