# Setup

Install neo4j graphdatascience client ([Documentation](https://neo4j.com/docs/graph-data-science/current/))

In [None]:
%%capture
!pip install graphdatascience 

Import our usual suspects

In [None]:
import pandas as pd
from graphdatascience import GraphDataScience 

Register for a sandbox and create an empty sandbox  https://sandbox.neo4j.com

In [None]:
# Capture connection string and auth info
connectionUrl = input("Neo4j Database Url: ")
username = input("User name: ")
password = input("Password: ")


In [None]:
gds = GraphDataScience(connectionUrl, auth=(username, password))
sysinfo = gds.debug.sysInfo()
sysinfo[ sysinfo['key'] == 'gdsVersion']


# Graph creation

In [None]:
transaction_df = pd.DataFrame([
    {'name': 'Tom', 'merchant':'Amazon', 'amount': 100},
    {'name': 'Tom', 'merchant':'Dustin', 'amount': 50499},
    {'name': 'Tom', 'merchant':'eBay', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Amazon', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Dustin', 'amount': 399},
    {'name': 'Stefan', 'merchant':'eBay', 'amount': 1499},
    {'name': 'Stefan', 'merchant':'Bikes.de', 'amount': 22000},
    {'name': 'Kristof', 'merchant':'Amazon', 'amount': 423},
    {'name': 'Kristof', 'merchant':'Dustin', 'amount': 530},
    {'name': 'Kristof', 'merchant':'Hello Fresh', 'amount': 1050},
    {'name': 'Kristof', 'merchant':'Steam', 'amount': 230},
    {'name': 'Kristof', 'merchant':'Activision', 'amount': 783},
    {'name': 'Håkan', 'merchant':'Hello Fresh', 'amount': 2100},
    {'name': 'Håkan', 'merchant':'Steam', 'amount': 230},
    {'name': 'Håkan', 'merchant':'Activision', 'amount': 783},
    
], columns = ['name', 'merchant', 'amount'])
transaction_df.head(15)

In [None]:
# This is not really required for this small sample
gds.run_cypher("create constraint if not exists for (p:Person) require (p.name) is node key")
gds.run_cypher("create constraint if not exists for (p:Merchant) require (p.name) is node key")


In [None]:
# Create a graph for (:Person)-[:transacted_with]->(:Merchant)
# Tip: If we had more data, this would fail => Iterate over chunks of the dataframe.
gds.run_cypher(
    """
    unwind $transactions as transaction
    merge (p:Person{name: transaction['name']})
    merge (m:Merchant{name: transaction['merchant']})
    merge (p)-[tx:TRANSACTED_WITH]->(m)
       set tx.amount = transaction['amount']
    """,
    params = { 'transactions': transaction_df.to_dict(orient='records') }
)

# Node similarity

Let's get this party started

In [None]:
G, res = gds.graph.project(
    "shopping",                 #  Graph name
    ["Person", "Merchant"],   #  Node projection
    {"TRANSACTED_WITH": {"properties": "amount"}}              #  Relationship projection
)


In [None]:
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")


In [None]:
gds.nodeSimilarity.write(
    G,
    relationshipWeightProperty='amount', 
    writeRelationshipType='IS_SIMILAR_TO',
    writeProperty='sim_score'
)

In [None]:
# Drop the projection from the graph catalogue to free up resources
G.drop()

In [None]:
# Remove symetric relationships
gds.run_cypher("""
  MATCH (a:Person)-[r:IS_SIMILAR_TO]->(b:Person) 
    WHERE (b)-[:IS_SIMILAR_TO]->(a) 
    AND   id(a)<id(b)
  DELETE r
""")


# Graph embedding + knn 

In [None]:
# Let's make the same projection again
G, res = gds.graph.project(
    "shopping",                 #  Graph name
    ["Person", "Merchant"],   #  Node projection
    {"TRANSACTED_WITH": {"properties": "amount"}}              #  Relationship projection
)

In [None]:
# Mutate our projection (with collapse path to get a mono-partite graph)
# gds.beta.collapsePath.mutate(
#     G,
#     pathTemplates=[['TRANSACTED_WITH']],
#     mutateRelationshipType='TRANSACTS_WITH_SAME_MERCHANT'
# )

In [None]:
# Mutate our projection (by computing an embedding)
# gds.fastRP.mutate(
#     G,
#     embeddingDimension=3,
#     iterationWeights=[0.0, 1.0, 0.7],
#     #relationshipWeightProperty='amount',
#     #relationshipTypes=['TRANSACTS_WITH_SAME_MERCHANT'],
#     mutateProperty='embedding'
# )

In [None]:
# Mutate our projection (by computing an embedding)
gds.beta.node2vec.mutate(
    G,
    embeddingDimension=3,
    relationshipWeightProperty='amount',
    mutateProperty='embedding'
)

In [None]:
# What does our embeddings look like? Let's stream back and have a look
df_embeddings = gds.graph.nodeProperty.stream(
    G,
    node_properties='embedding',
    node_labels='Person'
)

In [None]:
pd.set_option('max_colwidth', None)
df_embeddings.head(10)

In [None]:
# Run knn
gds.knn.write(
    G,
    nodeLabels=['Person'],
    nodeProperties=['embedding'],
    topK=2,
    writeRelationshipType='SIMILAR_EMBEDDING',
    writeProperty='sim_score'
)

In [None]:
# Drop the projection from the graph catalogue to free up resources
G.drop()

In [None]:
# Again, let's remove symetric relationships
gds.run_cypher("""
  MATCH (a:Person)-[r:SIMILAR_EMBEDDING]->(b:Person) 
    WHERE (b)-[:SIMILAR_EMBEDDING]->(a) 
    AND   id(a)<id(b)
  DELETE r
""")

In [None]:
# Let's review
gds.run_cypher("""
  MATCH (p:Person)-[r:SIMILAR_EMBEDDING|IS_SIMILAR_TO]-(p2)
  RETURN p.name as person, 
         type(r) as type, 
         r.sim_score as score,
         p2.name as to_person
  ORDER by p.name, p2.name, type(r)
""").head(30)