In [None]:
# multilevel causality network diagram Sankey Plotly algorithm
import json #simple parsing of json data from the datasource to dataframe
import pandas as pd
import plotly.graph_objects as go
from datetime import date

import os

This network diagram illustrate relations between direct cause of deaths, root cause of deaths and indirect causes.

This is a population study. This compilation of data is not intended for patients.For health issues, please consult a medical doctor.
See all disclaimers and complet dataset descriptions in the source webpage (link below).
Reuse or rework are allowed but mentioning the link to this page and the source open data set as below, are mandatory:
Source: "Multi-Level causality relations of underlying causes of deaths by age, gender and country, Schicklin, C., https://doi.org/10.34740/kaggle/dsv/2161283"


In [None]:
df = pd.read_csv("/kaggle/input/multilevel-causality-relations-deaths-age-sex/multilevel_causality_deaths.csv")

In [None]:
# usage def
# arg1 = column name to compute deaths e.g female_yr_deaths_>=5yo,_<=64yo_FRANCE
# arg2 = text to be inject in the title for reflect the column selection
def render(column= "total_yr_deaths_FRANCE",filter_text = "country=FRANCE, subgroup=TOTAL"):
    
    nodes = []
    nodes_colors = []
    sources = []
    targets = []
    values = []
    count = 0
    dict_Label_Pos = {}
    death_index = 0
    for i, row in df.iterrows():
        if (pd.isna(row[column])):
            print("Error in source data set for this COUNTRY or SUBGROUP. Technical reference column="+column)
            return
        if (row.level_of_cause == "direct"): death_index = row[column] + death_index
    #if predecessor no empty, great node
    for i, row in df.iterrows():
        nodes.append(row.concept_id)
        dict_Label_Pos[row.concept_id] = count
        count+=1
        if row[column] > death_index*0.0614:
            color = 'red'
        elif row[column] > death_index*0.0614/3:
            color = 'orange'
        else :
            color = 'yellow'
        nodes_colors.append(color)
    #now that we have nodes, edges can be created
    for i, row in df.iterrows():
        if not pd.isna(row.predecessors_array) :
            #json formating
            #expected output format [{"predecessor":"ischemia","impact":"0.741"},{"predecessor":"haemorrhage","impact":"0.185"}]
            json_array = json.loads(str(row.predecessors_array))
            for j in json_array:
                sources.append(dict_Label_Pos[j['predecessor_id']])
                targets.append(dict_Label_Pos[row.concept_id])
                #values.append(1)
                values.append(float(j['impact'])*row[column] / 1200)

    for i, row in df.iterrows():
        nodes.append(row.concept_id)
        dict_Label_Pos[row.concept_id] = count

    fig = go.Figure(data=[go.Sankey(
        node = dict(
            pad = 9,
            thickness = 20,
            line = dict(color = "black", width = 0.5),
            label = nodes,
            color = nodes_colors
        ),
        link = dict(
            source = sources ,
            target = targets,
            value = values
        ))])

    fig.update_layout(title_text="From root to direct causes of deaths, filter "+filter_text+", total scale=" + str(int(death_index))+ "deaths/yr", title_x=0.5, font_size=10)
    today = date.today()
    fig.add_annotation(x = 0.5, y = 1.12, text = "mouse over to read vertices/nodes/edges, interactive menu and move nodes",
                       showarrow=False, font_size=10)
    fig.add_annotation(x = 1, y = 0, text = "<i>Source: Multi-Level causality relations of underlying causes of deaths by age, gender and country, Schicklin, C. <a href=https://doi.org/10.34740/kaggle/dsv/2161283>doi.org/10.34740/kaggle/dsv/2161283</a> " + date.today().strftime("%B %d, %Y")+ "</i>",
                       showarrow=False, font_size=7)
    fig.show()

render()

In [None]:
render("female_yr_deaths_=<_4yo_FRANCE" ,"country=FRANCE, subgroup=FEMALE <=4yo")

In [None]:
render("female_yr_deaths_>=5yo,_<=64yo_FRANCE" ,"country=FRANCE, subgroup=FEMALE 5 to 64yo")

In [None]:
render("female_yr_deaths_>=_65_FRANCE" ,"country=FRANCE, subgroup=FEMALE >=65yo")

In [None]:
render("male_yr_deaths_=<_4yo_FRANCE" ,"country=FRANCE, subgroup=MALE <=4yo")

In [None]:
render("male_yr_deaths_>=5yo,_<=64yo_FRANCE" ,"country=FRANCE, subgroup=MALE 5 to 64yo")

In [None]:
render("male_yr_deaths_>=_65_FRANCE" ,"country=FRANCE, subgroup=MALE 5 to 64yo")