In [None]:
# multilevel causality network diagram with Fruchterman-Reingold force-directed algorithm

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import json #simple parsing of json data from the datasource to dataframe
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

This network diagram illustrate relations between direct cause of deaths, root cause of deaths and indirect causes.

This is a population study. This compilation of data is not intended for patients.For health issues, please consult a medical doctor.
See all disclaimers and complet dataset descriptions in the source webpage (link below).
Source: "Multi-Level causality relations of underlying of deaths with age, sex and country stratifications, Schicklin, C., https://doi.org/10.34740/kaggle/dsv/2161283"


In [None]:
df = pd.read_csv("/kaggle/input/multilevel-causality-relations-deaths-age-sex/multilevel_causality_deaths.csv")
print(df.shape)

In [None]:
#create graph instance
G = nx.DiGraph()
for i, row in df.iterrows():
       if not pd.isna(row.predecessors_array) :
            #json formating
            json_array = json.loads(str(row.predecessors_array))
            edgeTemp = [[ j['predecessor_id'],row.concept_id, float(j['impact'])*row.total_yr_deaths_FRANCE / 1200] for j in json_array]
            G.add_weighted_edges_from(edgeTemp)

#dict for position of root and direct
count = 0
Y = 0
fixed_positions = {}
for i, row in df.iterrows():
    if (row.level_of_cause == "root" and row.total_yr_deaths_FRANCE > 0) :
        position  = (0,Y)
        fixed_positions[row.concept_id] = position
        count+=1
        Y+=12
count = 0
Y = 30
for i, row in df.iterrows():
    if (row.level_of_cause == "direct" and row.total_yr_deaths_FRANCE > 0) :
        position  = (500,Y)
        fixed_positions[row.concept_id] = position
        count+=1
        Y+=55

#Fruchterman-Reingold force-directed algorithm with fixed start and end
fixed_nodes = fixed_positions.keys()
pos = nx.spring_layout(G,pos=fixed_positions ,fixed = fixed_nodes , k = 0.1,iterations = 1)

#set node size
node_sizes = []
label_colors = []
label_transparency = []
for n in G.nodes(): #n is a concept_id value
    #df look up as nodes might not all be computed (e.g. death=0) and order might be different
    deaths = df.loc[df['concept_id'] == n,'total_yr_deaths_FRANCE'].iloc[0]
    size = deaths/10
    node_sizes.append(size)
    if deaths > 15000:
        color = 'red'
    elif deaths > 5000:
        color = 'orange'
    else :
        color = 'yellow'
    label_colors.append(color)

#rendering only
#set edge width based on weight
edge_widths = []
for edge in G.edges(data='weight'):
    edge_widths.append(edge[2])

plt.figure(num= 1, figsize=(22, 18), dpi=60, frameon = False , clear = True)
nx.draw_networkx_edges(G, pos,alpha=0.5, edge_color = 'grey', arrowsize  =17, width=edge_widths) #node_size=60,font_size=8)
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, alpha=0.3,node_color=label_colors) #node_size=60,font_size=8)
nx.draw_networkx_labels(G,pos)
plt.title('Multilevel causality deaths for COUNTRY=FRANCE, no subgroup', size=15)
plt.tight_layout()
plt.show()