# Aura Graph Analytics
Documentation: https://neo4j.com/docs/aura/graph-analytics/

## Setup

1. Install neo4j graphdatascience client, see docs https://neo4j.com/docs/graph-data-science/current/
2. Install neo4j driver, see docs https://neo4j.com/docs/api/python-driver/current/
3. Install python graph visualization https://neo4j.com/docs/nvl-python/preview/
4. Install pandas (if it is not already there)

In [36]:
import os
from dotenv import load_dotenv
import pandas as pd
from graphdatascience import GraphDataScience
from graphdatascience.session import AuraAPICredentials, GdsSessions, DbmsConnectionInfo, AlgorithmCategory, SessionMemory, CloudLocation
from datetime import timedelta
from neo4j import Query, GraphDatabase, RoutingControl, Result
from neo4j_viz.neo4j import from_neo4j
from neo4j_viz.gds import from_gds

In [37]:
# How to create  Aura API Credentials: https://neo4j.com/docs/aura/classic/platform/api/authentication/#_creating_credentials
load_dotenv('.env', override=True)
project_id = os.getenv('AURA_API_PROJECT_ID')
client_id = os.getenv('AURA_API_CLIENT_ID')
client_secret = os.getenv('AURA_API_CLIENT_SECRET')
db_uri = os.getenv('AURA_DB_ADDRESS')
db_user = os.getenv('AURA_DB_USER')
db_pass = os.getenv('AURA_DB_PW')
db_name = os.getenv('AURA_DB_NAME')


## Create Aura Analytics compute session
Since this can take a minute or two, we do it first

In [38]:
sessions = GdsSessions(
    api_credentials=AuraAPICredentials(
        client_id, 
        client_secret,
        project_id=project_id
    )
)

db_connection = DbmsConnectionInfo(
    uri=db_uri, username=db_user, password=db_pass
)

In [39]:
memory = sessions.estimate(
    node_count=20,
    relationship_count=50,
    algorithm_categories=[AlgorithmCategory.CENTRALITY, AlgorithmCategory.NODE_EMBEDDING],
)
memory

<SessionMemory.m_8GB: SessionMemoryValue(value='8GB')>

In [40]:
gds = sessions.get_or_create(
    session_name="my-shop-demo",
    memory=SessionMemory.m_4GB, # You can also use the memory estimate above
    db_connection=db_connection,
    ttl=timedelta(hours=2)
)

In [41]:
sessions.list()

[SessionInfo(id='1a9ceb60-bfe9d1d6', name='my-shop-demo', memory=SessionMemoryValue(value='4GB'), instance_id='1a9ceb60', status='Ready', expiry_date=datetime.datetime(2025, 6, 4, 11, 16, 50, tzinfo=datetime.timezone.utc), created_at=datetime.datetime(2025, 5, 28, 11, 16, 50, tzinfo=datetime.timezone.utc), user_id='7d5345b3-cf8f-45d8-ba50-898ba3b7e18b', cloud_location=CloudLocation(provider='azure', region='uksouth'), ttl=datetime.timedelta(seconds=7140), errors=None)]

In [42]:
driver = GraphDatabase.driver(
    db_uri,
    auth=(db_user, db_pass)
)
driver.verify_connectivity()

### Reminder
Now is a good time to show Aura Console / Graph Analytics - Sessions

## Graph creation

In [43]:
transaction_df = pd.DataFrame([
    {'name': 'Tom', 'merchant':'Amazon', 'amount': 100},
    {'name': 'Tom', 'merchant':'Dustin', 'amount': 50499},
    {'name': 'Tom', 'merchant':'eBay', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Amazon', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Dustin', 'amount': 399},
    {'name': 'Stefan', 'merchant':'eBay', 'amount': 1499},
    {'name': 'Stefan', 'merchant':'Bikes.de', 'amount': 22000},
    {'name': 'Kristof', 'merchant':'Amazon', 'amount': 423},
    {'name': 'Kristof', 'merchant':'Dustin', 'amount': 530},
    {'name': 'Kristof', 'merchant':'Hello Fresh', 'amount': 1050},
    {'name': 'Kristof', 'merchant':'Steam', 'amount': 230},
    {'name': 'Kristof', 'merchant':'Activision', 'amount': 783},
    {'name': 'Håkan', 'merchant':'Hello Fresh', 'amount': 2100},
    {'name': 'Håkan', 'merchant':'Steam', 'amount': 230},
    {'name': 'Håkan', 'merchant':'Activision', 'amount': 783},
    
], columns = ['name', 'merchant', 'amount'])
transaction_df.head(15)

Unnamed: 0,name,merchant,amount
0,Tom,Amazon,100
1,Tom,Dustin,50499
2,Tom,eBay,220
3,Stefan,Amazon,220
4,Stefan,Dustin,399
5,Stefan,eBay,1499
6,Stefan,Bikes.de,22000
7,Kristof,Amazon,423
8,Kristof,Dustin,530
9,Kristof,Hello Fresh,1050


In [44]:
# This is not really required for this small sample
gds.database = db_name
gds.run_cypher(
    """//cypher 
    create constraint if not exists for (p:Person) require (p.name) is node key
    """)
gds.run_cypher(
    """//cypher
    create constraint if not exists for (p:Merchant) require (p.name) is node key
    """)

In [46]:
# Create a graph for (:Person)-[:transacted_with]->(:Merchant)
# Tip: If we had more data, this would fail => Iterate over chunks of the dataframe.
gds.run_cypher(
    """//cypher
    unwind $transactions as transaction
    merge (p:Person{name: transaction['name']})
    merge (m:Merchant{name: transaction['merchant']})
    merge (p)-[tx:TRANSACTED_WITH]->(m)
       set tx.amount = transaction['amount']
    """,
    params = { 'transactions': transaction_df.to_dict(orient='records') }
)

In [47]:
# Let's have a look at the graph
graph = driver.execute_query(
    """//cypher
    match (p:Person)-[tx:TRANSACTED_WITH]->(m:Merchant)
    return p, tx, m limit 100
    """,
    database_= db_name,
    routing_control=RoutingControl.READ,
    result_transformer_=Result.graph,
)
VG = from_neo4j(graph)
VG.color_nodes(field='caption')
VG.render()

## Graph projection (remote)

In [48]:
G, result = gds.graph.project(
    graph_name="transaction_graph",
    query="""//cypher
      match (p:Person)-[r:TRANSACTED_WITH]->(m:Merchant)
      with
        p AS source, r AS rel, m AS target
      return
      gds.graph.project.remote(source, target, {
        sourceNodeLabels: labels(source),
        targetNodeLabels: labels(target),
        relationshipType: type(rel),
        relationshipProperties: rel{.amount}
      })
      """,
)
str(G)

[#F3C2]  _: <CONNECTION> error: Failed to write data to connection IPv4Address(('1a9ceb60-bfe9d1d6-gds.production-orch-0477.neo4j.io', 7687)) (ResolvedIPv4Address(('51.140.120.148', 7687))): SSLError(5, '[SYS] unknown error (_ssl.c:2393)')
[#F3C1]  _: <CONNECTION> error: Failed to write data to connection ResolvedIPv4Address(('51.140.120.148', 7687)) (ResolvedIPv4Address(('51.140.120.148', 7687))): SSLError(5, '[SYS] unknown error (_ssl.c:2393)')
Unable to retrieve routing information


 Graph creation from Triplets:   0%|          | 0/100 [00:00<?, ?%/s]

'Graph(name=transaction_graph, node_count=11, relationship_count=15)'

In [49]:
gds.nodeSimilarity.stream(
    G
)

Unnamed: 0,node1,node2,similarity
0,0,1,0.75
1,0,2,0.333333
2,1,0,0.75
3,1,2,0.285714
4,2,3,0.6
5,2,0,0.333333
6,2,1,0.285714
7,3,2,0.6


In [50]:
gds.nodeSimilarity.write(
    G,
    writeRelationshipType='IS_SIMILAR_TO',
    similarityCutoff=0.5,
    writeProperty='sim_score'
)

 Node Similarity:   0%|          | 0/100 [00:00<?, ?%/s]

preProcessingMillis                                                       0
computeMillis                                                            67
writeMillis                                                      676.073074
postProcessingMillis                                                      0
nodesCompared                                                             4
relationshipsWritten                                                      4
similarityDistribution    {'min': 0.0, 'p5': 0.0, 'max': 5.0872912848509...
configuration             {'writeProperty': 'sim_score', 'writeRelations...
Name: 0, dtype: object

In [51]:
# Drop the projection to free up resources
G.drop()

graphName                                                transaction_graph
database                                                             neo4j
databaseLocation                                                    remote
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                               11
relationshipCount                                                       15
configuration            {'readConcurrency': 4, 'jobId': '3d6fe3a7-c7b3...
density                                                           0.136364
creationTime                           2025-05-28T11:22:27.753392699+00:00
modificationTime                       2025-05-28T11:22:27.753392699+00:00
schema                   {'graphProperties': {}, 'nodes': {'Merchant': ...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Merchant': ...
Name: 0, dtype: object

In [52]:
# Remove symetric relationships
gds.run_cypher("""//cypher
  MATCH (a:Person)-[r:IS_SIMILAR_TO]->(b:Person) 
    WHERE (b)-[:IS_SIMILAR_TO]->(a) 
    AND   id(a)<id(b)
  DELETE r
""")


## Clean up compute resources
Delete the session when we are done with it

In [53]:
gds.delete()

True

In [54]:
sessions.list()

[]

In [None]:
# You can also delete the session by name or by id
# sessions.delete(session_name="my-shop-demo")

## Run alog from any source

In [55]:
gds_no_db = sessions.get_or_create(
    session_name="my-shop-demo-no-db",
    memory=SessionMemory.m_4GB, 
    cloud_location=CloudLocation(provider="azure", region="uksouth"),
    ttl=timedelta(hours=2)
)   

In [None]:
sessions.list()

In [56]:
transaction_df

Unnamed: 0,name,merchant,amount
0,Tom,Amazon,100
1,Tom,Dustin,50499
2,Tom,eBay,220
3,Stefan,Amazon,220
4,Stefan,Dustin,399
5,Stefan,eBay,1499
6,Stefan,Bikes.de,22000
7,Kristof,Amazon,423
8,Kristof,Dustin,530
9,Kristof,Hello Fresh,1050


In [57]:
#keys = transaction_df['name'].drop_duplicates().to_list() + transaction_df['merchant'].drop_duplicates().to_list()
test = transaction_df['name'].drop_duplicates().rename('name').to_frame().assign( lablel='Person')
test

Unnamed: 0,name,lablel
0,Tom,Person
3,Stefan,Person
7,Kristof,Person
12,Håkan,Person


In [58]:
node_map = pd.concat([
    transaction_df['name'].drop_duplicates().rename('name').to_frame().assign( label='Person'),
    transaction_df['merchant'].drop_duplicates().rename('name').to_frame().assign( label='Merchant')
]).reset_index(drop=True)
node_map.head(15)


Unnamed: 0,name,label
0,Tom,Person
1,Stefan,Person
2,Kristof,Person
3,Håkan,Person
4,Amazon,Merchant
5,Dustin,Merchant
6,eBay,Merchant
7,Bikes.de,Merchant
8,Hello Fresh,Merchant
9,Steam,Merchant


In [60]:
node_map['idx'] = node_map.index
#node_map[ node_map['label'] =='Person' ].merge( transaction_df, on=['name'], how='right')

In [None]:
#node_map[ node_map['label'] =='Merchant' ].merge( transaction_df[['merchant']].rename(columns={'merchant':'name'}), on=['name'], how='right')

In [61]:
nodes = pd.DataFrame(
    {
        "nodeId": node_map.index,
        "labels": node_map['label'].tolist(),
    }
)
relationships = pd.DataFrame(
    {
        "sourceNodeId": node_map[ node_map['label'] =='Person' ].merge( transaction_df, on=['name'], how='right')['idx'].tolist(),
        "targetNodeId": node_map[ node_map['label'] =='Merchant' ].merge( transaction_df[['merchant']].rename(columns={'merchant':'name'}), on=['name'], how='right')['idx'].tolist(),
        "relationshipType": ["BOUGHT_FROM"] * len(transaction_df)
    })

In [62]:
G = gds_no_db.graph.construct(
    "my-graph-from-dataframe",
    nodes,
    relationships
)

Uploading Nodes:   0%|          | 0/11 [00:00<?, ?Records/s]

Uploading Relationships:   0%|          | 0/15 [00:00<?, ?Records/s]

In [63]:
gds_no_db.nodeSimilarity.stream(
    G
)

Unnamed: 0,node1,node2,similarity
0,0,1,0.75
1,0,2,0.333333
2,1,0,0.75
3,1,2,0.285714
4,2,3,0.6
5,2,0,0.333333
6,2,1,0.285714
7,3,2,0.6


In [64]:
gds_no_db.nodeSimilarity.mutate(
    G,
    similarityCutoff=0.5,
    mutateRelationshipType='IS_SIMILAR_TO',
    mutateProperty='sim_score'
)

preProcessingMillis                                                       0
computeMillis                                                             5
mutateMillis                                                            107
postProcessingMillis                                                      0
nodesCompared                                                             4
relationshipsWritten                                                      4
similarityDistribution    {'min': 0.5999984741210938, 'p5': 0.5999984741...
configuration             {'mutateProperty': 'sim_score', 'jobId': '4167...
Name: 0, dtype: object

In [65]:
gds_no_db.pageRank.mutate(
    G,
    mutateProperty='pagerank_score',
)

ranIterations                                                            10
didConverge                                                            True
centralityDistribution    {'min': 0.17656517028808594, 'max': 0.24733448...
preProcessingMillis                                                       1
computeMillis                                                           113
postProcessingMillis                                                      8
mutateMillis                                                              2
nodePropertiesWritten                                                    11
configuration             {'mutateProperty': 'pagerank_score', 'jobId': ...
Name: 0, dtype: object

In [66]:
VG = from_gds( 
    gds=gds_no_db,
    G=G,
    size_property="pagerank_score"
)

for node in VG.nodes:
    node.caption = node.properties['labels'][0]

for rel in VG.relationships:
    rel.caption =  rel.properties['relationshipType']
    
VG.color_nodes(field='caption')
VG.render()

In [67]:
# Drop the projection to free up resources
G.drop()

[#F459]  _: <CONNECTION> error: Failed to write data to connection ResolvedIPv4Address(('51.140.120.148', 7687)) (ResolvedIPv4Address(('51.140.120.148', 7687))): SSLError(5, '[SYS] unknown error (_ssl.c:2393)')
Unable to retrieve routing information
Unable to connect to the GDS Session. Trying again...


graphName                                          my-graph-from-dataframe
database                                                             neo4j
databaseLocation                                                      none
memoryUsage                                                               
sizeInBytes                                                             -1
nodeCount                                                               11
relationshipCount                                                       19
configuration            {'readConcurrency': 4, 'jobId': 'cfdd3c86-b419...
density                                                           0.172727
creationTime                           2025-05-28T11:30:05.635453992+00:00
modificationTime                       2025-05-28T11:30:40.042474025+00:00
schema                   {'graphProperties': {}, 'nodes': {'Merchant': ...
schemaWithOrientation    {'graphProperties': {}, 'nodes': {'Merchant': ...
Name: 0, dtype: object

## Clean up compute resources

In [68]:
gds_no_db.delete()

True

In [69]:
sessions.list()

[]