# Aura Graph Analytics
Documentation: https://neo4j.com/docs/aura/graph-analytics/

## Setup

1. Install neo4j graphdatascience client, see docs https://neo4j.com/docs/graph-data-science/current/
2. Install neo4j driver, see docs https://neo4j.com/docs/api/python-driver/current/
3. Install python graph visualization https://neo4j.com/docs/nvl-python/preview/
4. Install pandas (if it is not already there)

In [None]:
import os
from dotenv import load_dotenv
import pandas as pd
from graphdatascience import GraphDataScience
from graphdatascience.session import AuraAPICredentials, GdsSessions, DbmsConnectionInfo, AlgorithmCategory, SessionMemory, CloudLocation
from datetime import timedelta
from neo4j import Query, GraphDatabase, RoutingControl, Result
from neo4j_viz.neo4j import from_neo4j
from neo4j_viz.gds import from_gds

In [None]:
# How to create  Aura API Credentials: https://neo4j.com/docs/aura/classic/platform/api/authentication/#_creating_credentials
load_dotenv('.env', override=True)
project_id = os.getenv('AURA_API_PROJECT_ID')
client_id = os.getenv('AURA_API_CLIENT_ID')
client_secret = os.getenv('AURA_API_CLIENT_SECRET')
db_uri = os.getenv('AURA_DB_ADDRESS')
db_user = os.getenv('AURA_DB_USER')
db_pass = os.getenv('AURA_DB_PW')
db_name = os.getenv('AURA_DB_NAME')


## Create Aura Analytics compute session
Since this can take a minute or two, we do it first

In [None]:
sessions = GdsSessions(
    api_credentials=AuraAPICredentials(
        client_id, 
        client_secret,
        project_id=project_id
    )
)

db_connection = DbmsConnectionInfo(
    uri=db_uri, username=db_user, password=db_pass
)

In [None]:
memory = sessions.estimate(
    node_count=20,
    relationship_count=50,
    algorithm_categories=[AlgorithmCategory.CENTRALITY, AlgorithmCategory.NODE_EMBEDDING],
)
memory

In [None]:
gds = sessions.get_or_create(
    session_name="my-shop-demo",
    memory=SessionMemory.m_4GB, # You can also use the memory estimate above
    db_connection=db_connection,
    ttl=timedelta(hours=2)
)

In [None]:
sessions.list()

In [None]:
driver = GraphDatabase.driver(
    db_uri,
    auth=(db_user, db_pass)
)
driver.verify_connectivity()

## Graph creation

In [None]:
transaction_df = pd.DataFrame([
    {'name': 'Tom', 'merchant':'Amazon', 'amount': 100},
    {'name': 'Tom', 'merchant':'Dustin', 'amount': 50499},
    {'name': 'Tom', 'merchant':'eBay', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Amazon', 'amount': 220},
    {'name': 'Stefan', 'merchant':'Dustin', 'amount': 399},
    {'name': 'Stefan', 'merchant':'eBay', 'amount': 1499},
    {'name': 'Stefan', 'merchant':'Bikes.de', 'amount': 22000},
    {'name': 'Kristof', 'merchant':'Amazon', 'amount': 423},
    {'name': 'Kristof', 'merchant':'Dustin', 'amount': 530},
    {'name': 'Kristof', 'merchant':'Hello Fresh', 'amount': 1050},
    {'name': 'Kristof', 'merchant':'Steam', 'amount': 230},
    {'name': 'Kristof', 'merchant':'Activision', 'amount': 783},
    {'name': 'Håkan', 'merchant':'Hello Fresh', 'amount': 2100},
    {'name': 'Håkan', 'merchant':'Steam', 'amount': 230},
    {'name': 'Håkan', 'merchant':'Activision', 'amount': 783},
    
], columns = ['name', 'merchant', 'amount'])
transaction_df.head(15)

In [None]:
# This is not really required for this small sample
gds.database = db_name
gds.run_cypher("create constraint if not exists for (p:Person) require (p.name) is node key")
gds.run_cypher("create constraint if not exists for (p:Merchant) require (p.name) is node key")

In [None]:
# Create a graph for (:Person)-[:transacted_with]->(:Merchant)
# Tip: If we had more data, this would fail => Iterate over chunks of the dataframe.
gds.run_cypher(
    """
    unwind $transactions as transaction
    merge (p:Person{name: transaction['name']})
    merge (m:Merchant{name: transaction['merchant']})
    merge (p)-[tx:TRANSACTED_WITH]->(m)
       set tx.amount = transaction['amount']
    """,
    params = { 'transactions': transaction_df.to_dict(orient='records') }
)

In [None]:
# Let's have a look at the graph
graph = driver.execute_query(
    """
    match (p:Person)-[tx:TRANSACTED_WITH]->(m:Merchant)
    return p, tx, m
    """,
    database_= db_name,
    routing_control=RoutingControl.READ,
    result_transformer_=Result.graph,
)
VG = from_neo4j(graph)
VG.color_nodes(field='caption')
VG.render()

## Graph projection (remote)

In [None]:
G, result = gds.graph.project(
    graph_name="transaction_graph",
    query="""
      match (p:Person)-[r:TRANSACTED_WITH]->(m:Merchant)
      with
        p AS source, r AS rel, m AS target
      return
      gds.graph.project.remote(source, target, {
        sourceNodeLabels: labels(source),
        targetNodeLabels: labels(target),
        relationshipType: type(rel),
        relationshipProperties: rel{.amount}
      })
      """,
)
str(G)

In [None]:
gds.nodeSimilarity.stream(
    G
)

In [None]:
gds.nodeSimilarity.write(
    G,
    writeRelationshipType='IS_SIMILAR_TO',
    similarityCutoff=0.5,
    writeProperty='sim_score'
)

In [None]:
# Drop the projection to free up resources
G.drop()

In [None]:
# Remove symetric relationships
gds.run_cypher("""
  MATCH (a:Person)-[r:IS_SIMILAR_TO]->(b:Person) 
    WHERE (b)-[:IS_SIMILAR_TO]->(a) 
    AND   id(a)<id(b)
  DELETE r
""")


## Clean up compute resources
Delete the session when we are done with it

In [None]:
gds.delete()

In [None]:
sessions.list()

In [None]:
# You can also delete the session by name or by id
# sessions.delete(session_name="my-shop-demo")

## Run alog from any source

In [None]:
gds_no_db = sessions.get_or_create(
    session_name="my-shop-demo-no-db",
    memory=SessionMemory.m_4GB, 
    cloud_location=CloudLocation(provider="azure", region="uksouth"),
    ttl=timedelta(hours=2)
)   

In [None]:
sessions.list()

In [None]:
transaction_df

In [None]:
#keys = transaction_df['name'].drop_duplicates().to_list() + transaction_df['merchant'].drop_duplicates().to_list()
test = transaction_df['name'].drop_duplicates().rename('name').to_frame().assign( lablel='Person')
test

In [None]:
node_map = pd.concat([
    transaction_df['name'].drop_duplicates().rename('name').to_frame().assign( label='Person'),
    transaction_df['merchant'].drop_duplicates().rename('name').to_frame().assign( label='Merchant')
]).reset_index(drop=True)
node_map.head(15)


In [None]:
node_map['idx'] = node_map.index
#node_map[ node_map['label'] =='Person' ].merge( transaction_df, on=['name'], how='right')

In [None]:
#node_map[ node_map['label'] =='Merchant' ].merge( transaction_df[['merchant']].rename(columns={'merchant':'name'}), on=['name'], how='right')

In [None]:
nodes = pd.DataFrame(
    {
        "nodeId": node_map.index,
        "labels": node_map['label'].tolist(),
    }
)
relationships = pd.DataFrame(
    {
        "sourceNodeId": node_map[ node_map['label'] =='Person' ].merge( transaction_df, on=['name'], how='right')['idx'].tolist(),
        "targetNodeId": node_map[ node_map['label'] =='Merchant' ].merge( transaction_df[['merchant']].rename(columns={'merchant':'name'}), on=['name'], how='right')['idx'].tolist(),
        "relationshipType": ["BOUGHT_FROM"] * len(transaction_df)
    })

In [None]:
G = gds_no_db.graph.construct(
    "my-graph-from-dataframe",
    nodes,
    relationships
)

In [None]:
gds_no_db.nodeSimilarity.stream(
    G
)

In [None]:
gds_no_db.nodeSimilarity.mutate(
    G,
    similarityCutoff=0.5,
    mutateRelationshipType='IS_SIMILAR_TO',
    mutateProperty='sim_score'
)

In [None]:
gds_no_db.pageRank.mutate(
    G,
    mutateProperty='pagerank_score',
)

In [None]:
VG = from_gds( 
    gds=gds_no_db,
    G=G,
    size_property="pagerank_score"
)

for node in VG.nodes:
    node.caption = node.properties['labels'][0]

for rel in VG.relationships:
    rel.caption =  rel.properties['relationshipType']
    
VG.color_nodes(field='caption')
VG.render()

In [None]:
# Drop the projection to free up resources
G.drop()

## Clean up compute resources

In [None]:
gds_no_db.delete()

In [None]:
sessions.list()