# Setup

Install neo4j graphdatascience client ([Documentation](https://neo4j.com/docs/graph-data-science/current/))

In [1]:
import os
import pandas as pd
from graphdatascience import GraphDataScience 
from graphdatascience.session import AuraAPICredentials, GdsSessions, DbmsConnectionInfo, AlgorithmCategory
from datetime import timedelta

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# How to create  Aura API Credentials: https://neo4j.com/docs/aura/classic/platform/api/authentication/#_creating_credentials
tenant_id = os.getenv('AURA_API_TENANT_ID')
client_id = os.getenv('AURA_API_CLIENT_ID')
client_secret = os.getenv('AURA_API_CLIENT_SECRET')
db_uri = os.getenv('AURA_DB_ADDRESS')
db_user = os.getenv('AURA_DB_USER')
db_pass = os.getenv('AURA_DB_PW')


In [3]:
sessions = GdsSessions(api_credentials=AuraAPICredentials(client_id, client_secret, tenant_id=tenant_id))
db_connection = DbmsConnectionInfo(
    uri=db_uri, username=db_user, password=db_pass
)

In [22]:
memory = sessions.estimate(
    node_count=20,
    relationship_count=50,
    algorithm_categories=[AlgorithmCategory.CENTRALITY, AlgorithmCategory.NODE_EMBEDDING],
)
memory

<SessionMemory.m_8GB: SessionMemoryValue(value='8GB')>

In [24]:
gds = sessions.get_or_create(
    session_name="my-shop-demo",
    memory=memory,
    db_connection=db_connection,
    ttl=timedelta(hours=2)
)

In [25]:
sessions.list()

[SessionInfo(id='d7e827fa-c0bb1be6', name='my-shop-demo', memory=SessionMemoryValue(value='8GB'), instance_id='d7e827fa', status='Ready', expiry_date=datetime.datetime(2025, 2, 20, 11, 45, 56, tzinfo=datetime.timezone.utc), created_at=datetime.datetime(2025, 1, 21, 11, 45, 56, tzinfo=datetime.timezone.utc), user_id='e1bfecbb-149b-4689-a84a-0ae712f4d49f', cloud_location=CloudLocation(provider='gcp', region='us-central1'), ttl=datetime.timedelta(seconds=7140), errors=None)]

# Graph creation

In [7]:
transaction_df = pd.DataFrame([
    {'name': 'Tom', 'merchant':'Amazon', 'amount': 100},
    {'name': 'Tom', 'merchant':'Dustin', 'amount': 50499},
    {'name': 'Tom', 'merchant':'eBay', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Amazon', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Dustin', 'amount': 399},
    {'name': 'Stefan', 'merchant':'eBay', 'amount': 1499},
    {'name': 'Stefan', 'merchant':'Bikes.de', 'amount': 22000},
    {'name': 'Kristof', 'merchant':'Amazon', 'amount': 423},
    {'name': 'Kristof', 'merchant':'Dustin', 'amount': 530},
    {'name': 'Kristof', 'merchant':'Hello Fresh', 'amount': 1050},
    {'name': 'Kristof', 'merchant':'Steam', 'amount': 230},
    {'name': 'Kristof', 'merchant':'Activision', 'amount': 783},
    {'name': 'Håkan', 'merchant':'Hello Fresh', 'amount': 2100},
    {'name': 'Håkan', 'merchant':'Steam', 'amount': 230},
    {'name': 'Håkan', 'merchant':'Activision', 'amount': 783},
    
], columns = ['name', 'merchant', 'amount'])
transaction_df.head(15)

Unnamed: 0,name,merchant,amount
0,Tom,Amazon,100
1,Tom,Dustin,50499
2,Tom,eBay,220
3,Stefan,Amazon,220
4,Stefan,Dustin,399
5,Stefan,eBay,1499
6,Stefan,Bikes.de,22000
7,Kristof,Amazon,423
8,Kristof,Dustin,530
9,Kristof,Hello Fresh,1050


In [8]:
# This is not really required for this small sample

gds.database = 'neo4j'
gds.run_cypher("create constraint if not exists for (p:Person) require (p.name) is node key")
gds.run_cypher("create constraint if not exists for (p:Merchant) require (p.name) is node key")


In [15]:
# Create a graph for (:Person)-[:transacted_with]->(:Merchant)
# Tip: If we had more data, this would fail => Iterate over chunks of the dataframe.
gds.run_cypher(
    """
    unwind $transactions as transaction
    merge (p:Person{name: transaction['name']})
    merge (m:Merchant{name: transaction['merchant']})
    merge (p)-[tx:TRANSACTED_WITH]->(m)
       set tx.amount = transaction['amount']
    """,
    params = { 'transactions': transaction_df.to_dict(orient='records') }
)

# Node similarity

In [26]:
G, result = gds.graph.project(
    "transaction_graph",
    """
    match (p:Person)-[r:TRANSACTED_WITH]->(m:Merchant)
    with
      p AS source, r AS rel, m AS target
    return
    gds.graph.project.remote(source, target, {
      sourceNodeLabels: labels(source),
      targetNodeLabels: labels(target),
      relationshipType: type(rel),
      relationshipProperties: rel{.amount}
    })
    """,
)
str(G)

'Graph(name=transaction_graph, node_count=11, relationship_count=15)'

In [29]:
gds.pageRank.stream(
    G
)

Unnamed: 0,nodeId,score
0,0,0.15
1,4,0.249875
2,5,0.249875
3,6,0.224375
4,1,0.15
5,7,0.181875
6,2,0.15
7,8,0.218
8,9,0.218
9,10,0.218


In [27]:
gds.nodeSimilarity.stream(
    G,
    relationshipWeightProperty='amount', 
)

Unnamed: 0,node1,node2,similarity
0,0,2,0.011841
1,0,1,0.009688
2,1,2,0.023345
3,1,0,0.009688
4,2,3,0.507378
5,2,1,0.023345
6,2,0,0.011841
7,3,2,0.507378


In [30]:
gds.nodeSimilarity.write(
    G,
    relationshipWeightProperty='amount', 
    writeRelationshipType='IS_SIMILAR_TO',
    writeProperty='sim_score'
)

preProcessingMillis                                                       0
computeMillis                                                             5
writeMillis                                                     2652.682066
postProcessingMillis                                                      0
nodesCompared                                                             4
relationshipsWritten                                                      8
similarityDistribution    {'min': 0.0, 'p5': 0.0, 'max': 5.0872912848509...
configuration             {'writeProperty': 'sim_score', 'writeRelations...
Name: 0, dtype: object

In [31]:
# Drop the projection from the graph catalogue to free up resources
G.drop()

graphName                                                transaction_graph
database                                                             neo4j
databaseLocation                                                    remote
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                               11
relationshipCount                                                       15
configuration            {'readConcurrency': 4, 'jobId': '4e0a5500-fb74...
density                                                           0.136364
creationTime                           2025-01-21T11:49:19.632633287+00:00
modificationTime                       2025-01-21T11:49:19.632633287+00:00
schema                   {'graphProperties': {}, 'nodes': {'Merchant': ...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Merchant': ...
Name: 0, dtype: object

In [32]:
# Remove symetric relationships
gds.run_cypher("""
  MATCH (a:Person)-[r:IS_SIMILAR_TO]->(b:Person) 
    WHERE (b)-[:IS_SIMILAR_TO]->(a) 
    AND   id(a)<id(b)
  DELETE r
""")


## When all is said and done
Delete the session 

In [34]:
gds.delete()

True

In [35]:
sessions.list()

[]