In [2]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
import pandas as pd

from neo4j import GraphDatabase

In [4]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "test1234"), encrypted=False)

In [9]:
# Get the data
with driver.session(database = "paysim") as session:
    data = session.read_transaction( lambda tx: 
        tx.run(
            """
            MATCH p=(c:Client)-[:PAYS*4]->()
            with p limit 100 // Adjust as needed
            with p, nodes(p) as clients, relationships(p) as rels
            with p, reduce(r=[], x in range(0,size(clients) -2) | r + [ {source: clients[x].id, target: clients[x+1].id, step: x, amount:rels[x].amount}] ) as data
            unwind data as d
            return d.source as source, d.target as target, d.step as step, sum(d.amount) as value
            order by step
            """
        ).data()
    )
    df = pd.DataFrame(data)
    print(df)
    

               source            target  step         value
0    4541519469711557  4271689535643583     0  6.892297e+07
1    4271689535643583  4663743382179989     1  1.544011e+08
2    4271689535643583  4212365907726817     1  1.742590e+07
3    4663743382179989  4211199137874860     2  4.577548e+06
4    4663743382179989  4993349958818363     2  7.655701e+06
..                ...               ...   ...           ...
114  4072102228437926  4367129986463541     3  8.551574e+05
115  4072102228437926  4607329278293263     3  5.198640e+05
116  4072102228437926  4143744421187058     3  3.582363e+06
117  4072102228437926  4630006152610249     3  6.581524e+05
118  4999952482219018  4056867795377610     3  8.175608e+05

[119 rows x 4 columns]


In [10]:
# Define categories (sorce and target columns)
categories = {k: v for v, k in enumerate(df[['source','target']].stack().unique())}
print(categories)
df_c = df.replace(categories)
print(df_c)

{'4541519469711557': 0, '4271689535643583': 1, '4663743382179989': 2, '4212365907726817': 3, '4211199137874860': 4, '4993349958818363': 5, '4648200548175997': 6, '4435507669454218': 7, '4820994996117804': 8, '4617446420130504': 9, '4397839163584802': 10, '4593919520227741': 11, '4297675478411344': 12, '4221421350864230': 13, '4127174178678929': 14, '4398278278375013': 15, '4140082149330079': 16, '4663257404725299': 17, '4072102228437926': 18, '4999952482219018': 19, '4694173328895378': 20, '4550084067406447': 21, '4284198015890826': 22, '4386641726564815': 23, '4861389922697831': 24, '4413882758856789': 25, '4134703761268531': 26, '4165463874786117': 27, '4570247595114215': 28, '4767739512867662': 29, '4414622680322958': 30, '4559134566143313': 31, '4534258076506071': 32, '4635803022627964': 33, '4193872235636058': 34, '4195567698480923': 35, '4308515977191889': 36, '4810719392198260': 37, '4621627690779302': 38, '4981389325011601': 39, '4480495354758859': 40, '4497426786802933': 41, '

In [11]:
# Plot

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = list(categories.keys()),
      color = "blue"
    ),
    link = dict(
      source = df_c['source'].dropna(axis=0, how='any'),
      target = df_c['target'].dropna(axis=0, how='any'),
      value = df_c['value'].dropna(axis=0, how='any')
  ))])

fig.update_layout(title_text="Payment flow", font_size=10)
fig.show()