# Create a Sankey diagram by using DataFrame.

In [1]:
import pandas as pd
from ipysankeywidget import SankeyWidget
from ipywidgets import Layout
import numpy as np

In [22]:
dirData = "Data\\"
inputCSV = "C5.csv"
inputRateCSV = "C5_rate.csv"
outputSVG = "svg_C5.svg"
outputPDF = "pdf_C5.pdf"
outputPNG = "png_C5.png"

## Define propotion of the Sankey diagram.

In [3]:
layout = Layout(width="800", height="600")
def sankey(margin_top=10, **value):
    return SankeyWidget(layout=layout,
                        margins=dict(top=margin_top, bottom=0, left=60, right=270),
                        **value)

## Import csv file into the DataFrame to create links on the Sankey diagram.
  1. The header must include "source", "target", "value" and "type".
  1. Define the headers by lower case leters in the DataFrame.

In [13]:
df = pd.DataFrame()
df = pd.read_csv(dirData + inputCSV)
df['value'] = df['value'].fillna(0).round().astype(int)
display(df.head(9))

Unnamed: 0,source,target,value,type
0,sR_1,R_0,4432,a
1,sR_1,R_1,139011,a
2,sR_1,R_2,12842,a
3,sR_1,R_3,795,a
4,sR_1,R_4,32381,a
5,sR_1,R_5,40641,a
6,sR_1,R_6,60791,a
7,sR_1,R_88,315,a
8,sR_1,R_99,2627,a


## Create links.
Create the links data by conbining the header and record.

In [14]:
links = []
for index, row in df.iterrows():
    link = dict(zip(df.columns, row))
    links.append(link)

display(links[0:9])
sankey(links=links)

[{'source': 'sR_1', 'target': 'R_0', 'value': 4432, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_1', 'value': 139011, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_2', 'value': 12842, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_3', 'value': 795, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_4', 'value': 32381, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_5', 'value': 40641, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_6', 'value': 60791, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_88', 'value': 315, 'type': 'a'},
 {'source': 'sR_1', 'target': 'R_99', 'value': 2627, 'type': 'a'}]

SankeyWidget(layout=Layout(height='600', width='800'), links=[{'source': 'sR_1', 'target': 'R_0', 'value': 443…

## Import csv file into the DataFrame to create nodes on the Sankey diagram.
I would like to show percentage of remained items from the upstream to the downstream of each flow on the diagram.

In [7]:
df2 = pd.DataFrame()
df2 = pd.read_csv(dirData + inputRateCSV)
display(df2)

Unnamed: 0,id,title
0,R_1,R_1 : sR_1 to R_1 (47.31%)
1,R_2,R_2 : sR_2 to R_2 (81.63%)
2,R_3,R_3 : sR_3 to R_3 (2.55%)
3,R_4,R_4 : sR_4 to R_4 (18.46%)
4,R_5,R_5 : sR_5 to R_5 (33.14%)
5,R_6,R_6 : sR_6 to R_6 (32.24%)
6,R_99,R_99 : sR_99 to R_99 (22.07%)
7,R_88,R_88 : sR_88 to R_88 (99.71%)
8,R_0,R_0 : sR_0 to R_0 (99.96%)


## Create nodes.
Create the nodes data by conbining the header and record.

In [15]:
nodes = []
for index, row in df2.iterrows():
    node = dict(zip(df2.columns, row))
    nodes.append(node)

display(nodes)
sankey(links=links, nodes=nodes)

[{'id': 'R_1', 'title': 'R_1 : sR_1 to R_1 (47.31%)'},
 {'id': 'R_2', 'title': 'R_2 : sR_2 to R_2 (81.63%)'},
 {'id': 'R_3', 'title': 'R_3 : sR_3 to R_3 (2.55%)'},
 {'id': 'R_4', 'title': 'R_4 : sR_4 to R_4 (18.46%)'},
 {'id': 'R_5', 'title': 'R_5 : sR_5 to R_5 (33.14%)'},
 {'id': 'R_6', 'title': 'R_6 : sR_6 to R_6 (32.24%)'},
 {'id': 'R_99', 'title': 'R_99 : sR_99 to R_99 (22.07%)'},
 {'id': 'R_88', 'title': 'R_88 : sR_88 to R_88 (99.71%)'},
 {'id': 'R_0', 'title': 'R_0 : sR_0 to R_0 (99.96%)'}]

SankeyWidget(layout=Layout(height='600', width='800'), links=[{'source': 'sR_1', 'target': 'R_0', 'value': 443…

## Set groups and show that names.

In [10]:
groups = [
    {'id': 'G1', 'title': '1st rating by AI', 'nodes': ['sR_0', 'sR_1', 'sR_2', 'sR_3', 'sR_4', 'sR_5', 'sR_6', 'sR_7', 'sR_88', 'sR_99']},
    {'id': 'G2', 'title': 'Result', 'nodes': ['R_0', 'R_1', 'R_2', 'R_3', 'R_4', 'R_5', 'R_6', 'R_7', 'R_88', 'R_99']},
]
s = sankey(links=links, nodes=nodes, groups=groups, margin_top=100)
s

SankeyWidget(groups=[{'id': 'G1', 'title': '1st rating by AI', 'nodes': ['sR_0', 'sR_1', 'sR_2', 'sR_3', 'sR_4…

## Outoput the diagram as a SVG file.

In [11]:
s.save_svg(dirData + outputSVG)

#### Output the diagram as a PNG file.

In [20]:
s.save_png(dirData + outputPNG)

#### The PNG image quality created the method above was not good. Convert SVG to PDF as a trial.

In [26]:
import cairosvg
cairosvg.svg2pdf(url= dirData + outputSVG, write_to= dirData + outputPDF)

#### Convert PDF to PNG as a trial.

In [27]:
import os
from pdf2image import convert_from_path

pdf = dirData + outputPDF
basename = os.path.basename(pdf)
basenameWoExt = os.path.splitext(basename)[0]

if os.path.exists(pdf):
    pages = convert_from_path(pdf)
    imgPath = dirData + outputPNG
    pages[0].save(imgPath, 'png')