Install neo4j graphdatascience client ([Documentation](https://neo4j.com/docs/graph-data-science/current/))

In [2]:
%%capture
!pip install graphdatascience 

Import our usual suspects

In [3]:
import pandas as pd
from graphdatascience import GraphDataScience 

Register for a sandbox and create an empty sandbox  https://sandbox.neo4j.com

In [4]:
# Capture connection string and auth info
connectionUrl = input("Neo4j Database Url: ")
username = input("User name: ")
password = input("Password: ")


Neo4j Database Url: bolt://35.171.22.224:7687
User name: neo4j
Password: laundry-balls-dips


In [5]:
gds = GraphDataScience(connectionUrl, auth=(username, password))
sysinfo = gds.debug.sysInfo()
sysinfo[ sysinfo['key'] == 'gdsVersion']


Unnamed: 0,key,value
0,gdsVersion,2.3.1


In [6]:
transaction_df = pd.DataFrame([
    {'name': 'Tom', 'merchant':'Amazon', 'amount': 100},
    {'name': 'Tom', 'merchant':'Dustin', 'amount': 50499},
    {'name': 'Tom', 'merchant':'eBay', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Amazon', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Dustin', 'amount': 399},
    {'name': 'Stefan', 'merchant':'eBay', 'amount': 1499},
    {'name': 'Stefan', 'merchant':'Bikes.de', 'amount': 22000},
    {'name': 'Kristof', 'merchant':'Amazon', 'amount': 423},
    {'name': 'Kristof', 'merchant':'Dustin', 'amount': 530},
    {'name': 'Kristof', 'merchant':'Hello Fresh', 'amount': 1050},
    {'name': 'Kristof', 'merchant':'Steam', 'amount': 230},
    {'name': 'Kristof', 'merchant':'Activision', 'amount': 783},
    {'name': 'Håkan', 'merchant':'Hello Fresh', 'amount': 2100},
    {'name': 'Håkan', 'merchant':'Steam', 'amount': 230},
    {'name': 'Håkan', 'merchant':'Activision', 'amount': 783},
    
], columns = ['name', 'merchant', 'amount'])
transaction_df.head(15)

Unnamed: 0,name,merchant,amount
0,Tom,Amazon,100
1,Tom,Dustin,50499
2,Tom,eBay,220
3,Stefan,Amazon,220
4,Stefan,Dustin,399
5,Stefan,eBay,1499
6,Stefan,Bikes.de,22000
7,Kristof,Amazon,423
8,Kristof,Dustin,530
9,Kristof,Hello Fresh,1050


In [7]:
# This is not really required for this small sample
gds.run_cypher("create constraint if not exists for (p:Person) require (p.name) is node key")
gds.run_cypher("create constraint if not exists for (p:Merchant) require (p.name) is node key")


In [8]:
# Create a graph for (:Person)-[:transacted_with]->(:Merchant)
# Tip: If we had more data, this would fail => Iterate over chunks of the dataframe.
gds.run_cypher(
    """
    unwind $transactions as transaction
    merge (p:Person{name: transaction['name']})
    merge (m:Merchant{name: transaction['merchant']})
    merge (p)-[tx:TRANSACTED_WITH]->(m)
       set tx.amount = transaction['amount']
    """,
    params = { 'transactions': transaction_df.to_dict(orient='records') }
)

Let's get this party started

In [9]:
G, res = gds.graph.project(
    "shopping",                 #  Graph name
    ["Person", "Merchant"],   #  Node projection
    {"TRANSACTED_WITH": {"properties": "amount"}}              #  Relationship projection
)


Loading:   0%|          | 0/100 [00:00<?, ?%/s]

In [10]:
print(f"Graph '{G.name()}' node count: {G.node_count()}")
print(f"Graph '{G.name()}' node labels: {G.node_labels()}")


Graph 'shopping' node count: 11
Graph 'shopping' node labels: ['Merchant', 'Person']


In [11]:
res = gds.nodeSimilarity.write(
    G,
    relationshipWeightProperty='amount', 
    writeRelationshipType='IS_SIMILAR_TO',
    writeProperty='sim_score'
)
display(res)

preProcessingMillis                                                       3
computeMillis                                                            82
writeMillis                                                             497
postProcessingMillis                                                     -1
nodesCompared                                                             4
relationshipsWritten                                                      8
similarityDistribution    {'p1': 0.009687662124633789, 'max': 0.50738137...
configuration             {'topK': 10, 'writeConcurrency': 4, 'similarit...
Name: 0, dtype: object

In [12]:
# Drop the projection from the graph catalogue to free up resources
G.drop()

graphName                                                         shopping
database                                                             neo4j
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                               11
relationshipCount                                                       15
configuration            {'relationshipProjection': {'TRANSACTED_WITH':...
density                                                           0.136364
creationTime                           2023-03-20T08:19:59.668081528+00:00
modificationTime                       2023-03-20T08:20:00.164288981+00:00
schema                   {'graphProperties': {}, 'relationships': {'TRA...
schemaWithOrientation    {'graphProperties': {}, 'relationships': {'TRA...
Name: 0, dtype: object

In [13]:
# Remove symetric relationships
gds.run_cypher("""
  MATCH (a:Person)-[r:IS_SIMILAR_TO]->(b:Person) 
    WHERE (b)-[:IS_SIMILAR_TO]->(a) 
    AND   id(a)<id(b)
  DELETE r
""")
