Create .env file in the same directory as the notebook and add the following lines:

```env
NEO4J_USERNAME=your_username
NEO4J_PASSWORD=your_password
NEO4J_ENDPOINT=your_endpoint
```

Install python dependencies.

In [None]:
pip install -r requirements.txt

Connect to database.

In [None]:
from databaseconnection import DatabaseConnection
gds = DatabaseConnection().get_database_connection()
gds.version()

Should show a version number if connection to database is successful.

https://github.com/neo4j/graph-data-science-client/blob/main/examples/fastrp-and-knn.ipynb

In [None]:
node_projection = {
    "Respondent": {},
    "QuestionAlternative": { "properties": { "position": { "defaultValue": 0 }}}
}
relationship_projection = "CHOSE"

In [None]:
G, result = gds.graph.project("respondentAnswer", node_projection, relationship_projection)

print(f"The projection took {result['projectMillis']} ms")
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")

In [None]:
result = gds.fastRP.mutate(
    G,
    mutateProperty='embedding',
    randomSeed=42,
    embeddingDimension=128,
    iterationWeights=[0.8, 1, 1, 1, 1, 1, 1, 1, 1, 1]
)
print(f"Number of embedding vectors produced: {result['nodePropertiesWritten']}")

In [None]:
result = gds.knn.write(
    G,
    topK=2,
    nodeProperties=["embedding"],
    randomSeed=42,
    concurrency=1,
    sampleRate=1.0,
    deltaThreshold=0.0,
    writeRelationshipType="SIMILAR",
    writeProperty="score",
)

print(f"Relationships produced: {result['relationshipsWritten']}")
print(f"Nodes compared: {result['nodesCompared']}")
print(f"Mean similarity: {result['similarityDistribution']['mean']}")

In [None]:
gds.run_cypher(
    """
        MATCH (p1:Respondent)-[r:SIMILAR]->(p2:Respondent)
        WHERE r.score > 0.0
        RETURN p1.id AS person1, p2.id AS person2, r.score AS similarity
        ORDER BY similarity DESCENDING, person1, person2
    """
)

In [None]:
result = gds.run_cypher("""
    MATCH (r:Respondent)-[ha:HAS_ANSWERED]-(q:Question)
    WHERE r.id = '55134294-48bc-4e6a-8fa5-41d7ae3b7a70'
    OR r.id = 'b145c256-954c-4108-b1f5-a8eb19ee3e50'
    RETURN r.id as respondent, q.name AS question;
""")

for ix, row in enumerate(result['question']):
    print(ix, result['respondent'][ix], row)

In [None]:
for i, q in enumerate(result['question']):
    for j, p in enumerate(result['question']):
        if i == j:
            continue
        if q == p:
            print(f"""
            {i} and {j} have the same question:
            Question: {q}
            """)