In [3]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
import pandas as pd

from graphdatascience import GraphDataScience # Python GDS client

In [4]:
DB_ULR = "neo4j://localhost:7687"
DB_USER = "neo4j"
DB_PASS = "test1234"
DB_NAME = "paysim"
gds = GraphDataScience(DB_ULR, auth=(DB_USER, DB_PASS))

In [5]:
# Get the data

df = gds.run_cypher(
    """
    MATCH p=(c:Client)-[:PAYS*4]->()
    with p limit 100 // Adjust as needed
    with p, nodes(p) as clients, relationships(p) as rels
    with p, reduce(r=[], x in range(0,size(clients) -2) | r + [ {source: clients[x].id, target: clients[x+1].id, step: x, amount:rels[x].amount}] ) as data
    unwind data as d
    return d.source as source, d.target as target, d.step as step, sum(d.amount) as value
    order by step
    """, database=DB_NAME
)    
df.head()
    

Unnamed: 0,source,target,step,value
0,4541519469711557,4461880623376745,0,857617.1
1,4461880623376745,4873292177744272,1,189697000.0
2,4461880623376745,4915851486462783,1,39649660.0
3,4873292177744272,4073696787910099,2,4816744.0
4,4873292177744272,4702982143481234,2,3103048.0


In [6]:
# Define categories (sorce and target columns)
categories = {k: v for v, k in enumerate(df[['source','target']].stack().unique())}
print(categories)
df_c = df.replace(categories)
df_c.head()

{'4541519469711557': 0, '4461880623376745': 1, '4873292177744272': 2, '4915851486462783': 3, '4073696787910099': 4, '4702982143481234': 5, '4922625784792153': 6, '4517832403725371': 7, '4594973568093266': 8, '4326428361375688': 9, '4933891163763077': 10, '4412609840482634': 11, '4092051289662953': 12, '4951163819889830': 13, '4210103889475554': 14, '4330978531342590': 15, '4040224803675859': 16, '4429954302750487': 17, '4983703234999466': 18, '4336382847165148': 19, '4381208294992920': 20, '4339442189576778': 21, '4976830137227775': 22, '4252568240104185': 23, '4427334749411161': 24, '4863003585552344': 25, '4563332789808068': 26, '4441674945813377': 27, '4871664523419076': 28, '4299070204573740': 29, '4640153621890834': 30, '4238509849176348': 31, '4486330612405783': 32, '4544421732344422': 33, '4237690490773376': 34, '4647361658973554': 35, '4170764957800490': 36, '4017594459612888': 37, '4020065824869359': 38, '4114883698513458': 39, '4265470031385567': 40, '4360536129950076': 41, '

Unnamed: 0,source,target,step,value
0,0,1,0,857617.1
1,1,2,1,189697000.0
2,1,3,1,39649660.0
3,2,4,2,4816744.0
4,2,5,2,3103048.0


In [7]:
# Plot

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = list(categories.keys()),
      color = "blue"
    ),
    link = dict(
      source = df_c['source'].dropna(axis=0, how='any'),
      target = df_c['target'].dropna(axis=0, how='any'),
      value = df_c['value'].dropna(axis=0, how='any')
  ))])

fig.update_layout(title_text="Payment flow", font_size=10)
fig.show()