In [7]:
import pandas as pd

df = pd.read_csv('./test.csv')
df.head()


Unnamed: 0,source,target,step,value
0,source1,f,1,50
1,source2,f,1,50
2,f,l,2,50
3,f,loss,2,50
4,l,w,3,30


In [8]:
# Determine Initial Sources 
# map to step == 0.
max_step = df.groupby('target')[['step']].max()
step_map = dict(zip(max_step.index, max_step.step))
df['source_step'] = df['source'].map(step_map).fillna(0).astype(int)


In [9]:
# Get a list of all unique labels..
# to index off of.
labels = pd.concat([
  df['source'],
  df['target']
]).drop_duplicates().to_frame().rename(columns={0:'label'})

labels = labels.reset_index().drop(columns=['index'])
labels

Unnamed: 0,label
0,source1
1,source2
2,f
3,l
4,loss
5,w
6,a


In [10]:
# Create a dict map from the target/source name to the 
# corresponding index of the name in the list of labels
label_map = dict(zip(labels['label'], labels.index))
print(label_map)


{'source1': 0, 'source2': 1, 'f': 2, 'l': 3, 'loss': 4, 'w': 5, 'a': 6}


In [11]:
# Map the label to the appropriate index
df['target_index'] = df['target'].map(label_map)
df['source_index'] = df['source'].map(label_map)



In [12]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = labels['label'],
      color = "blue",
      x=df['source_step'].apply(lambda x: x*0.2)
    ),
    link = dict(
      source = df['source_index'], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = df['target_index'],#targets,
      value = df['value'],
  ))])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()