In [1]:
from databaseconnection import DatabaseConnection
gds = DatabaseConnection().get_database_connection()
gds.version()

'2.3.2'

In [19]:
graph_name = 'respondentQuestionAlternativeOnlyStudents'

if gds.graph.exists(graph_name).any():
    gds.graph.drop(gds.graph.get(graph_name))

G, result = gds.graph.project(
    graph_name,
    {
        "Student": {},
        "QuestionAlternative": { "properties": { "position": { "defaultValue": 0 }}}
    },
    "CHOSE_ALT"
)

print(f"The projection took {result['projectMillis']} ms")
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

The projection took 56 ms
Graph 'respondentQuestionAlternativeOnlyStudents' node count: 4574
Graph 'respondentQuestionAlternativeOnlyStudents' node labels: ['Student', 'QuestionAlternative']


In [20]:
result = gds.fastRP.mutate(
    G,
    mutateProperty='embedding',
    randomSeed=42,
    embeddingDimension=128,
    iterationWeights=[0.8, 1, 1, 1, 1, 1, 1, 1, 1, 1]
)
print(f"Number of embedding vectors produced: {result['nodePropertiesWritten']}")

Number of embedding vectors produced: 4574


In [10]:
gds.wcc.stream(
    G
)

Unnamed: 0,nodeId,componentId
0,54713,0
1,54777,0
2,54841,0
3,54905,0
4,54968,0
...,...,...
4569,231975,0
4570,232039,0
4571,232103,0
4572,232167,0


In [11]:
gds.nodeSimilarity.stream(
    G
)

NodeSimilarity:   0%|          | 0/100 [00:00<?, ?%/s]

Unnamed: 0,node1,node2,similarity
0,54713,54841,1.0
1,54713,54968,1.0
2,54713,55159,1.0
3,54713,55095,1.0
4,54713,56812,1.0
...,...,...,...
37325,232484,58226,1.0
37326,232484,58162,1.0
37327,232484,58033,1.0
37328,232484,57638,1.0


In [21]:
result = gds.knn.write(
    G,
    topK=2,
    nodeProperties=["embedding"],
    randomSeed=42,
    concurrency=1,
    sampleRate=1.0,
    deltaThreshold=0.0,
    writeRelationshipType="SIMILAR_STUDENTS",
    writeProperty="score",
)

print(f"Relationships produced: {result['relationshipsWritten']}")
print(f"Nodes compared: {result['nodesCompared']}")
print(f"Mean similarity: {result['similarityDistribution']['mean']}")

Relationships produced: 9148
Nodes compared: 4574
Mean similarity: 0.66431341100375
