Missing:
- Fix array to color for all nodes
- Fix array to size for all nodes
- Fix array to color for all lines
- Fix array to thickness for all lines
- Figure out implementation with looping rendering script
- Figure out interaction to change csv file and dict for rendering script

In [1]:
import pandas as pd
import numpy as np
import igraph as ig
import random
import csv
import warnings
warnings.filterwarnings('ignore')

In [2]:
# HELPER METHODS TO PERFORM CSV TO DICTIONARY READ

# Description:
# Typecasts certain strings to ints after they are read in from csv file.
#
# Params:
# dict_in is the compiled dictionary after being read from csv and processed
#
# Return:
# Returns dictionary in same condition as it would have been when it was written to csv with ints.
def dict_strings_to_ints(dict_in):
    
    # Output dictionary
    dict_out = {
        'nodes': [],
        'links': []
    }
    
    # Perform typecasting for all strings to ints when necessary
    for row in dict_in['nodes']:
        dict_out['nodes'].append({'name': row['name'], 'group': int(row['group'])})
    for row in dict_in['links']:
        dict_out['links'].append({'source': int(row['source']), 
                                  'target': int(row['target']), 'value': int(row['value'])})
    return dict_out

# Description:
# Reads in a csv file and compiles it into a list of dictionaries.
#
# Params:
# filename is the path to the csv file to read
# 
# Return:
# Returns a list of formatted dictionaries with the first row of the csv file being the dictionary headers.
def read_csv(filename):
    
    # Create a csv reader
    with open(filename, mode='r') as infile:
        reader = csv.reader(infile)
        first = -1 # first row flag
        headers = [] # headers for dictionaries
        list_out = [] # output list
        
        # Loop through csv file and use first row as header
        for row in reader:
            
            # Set flag and set header list
            if first == -1:
                headers = row
                first = 1
                continue
                
            # Append rows as dictionaries to output list
            new_dict = {}
            for i in range(len(headers)):
                new_dict[headers[i]] = row[i]
            list_out.append(new_dict)
            
    return list_out

# Description:
# Uses other helper functions to compile and process entire dictionary from csv read.
# 
# Params:
# keyword is the word used in the node and link csv filenames to read in
#
# Return:
# Returns processed dictionary as it would be before being converted into csv.
def csv_to_dict(keyword):
    
    # Get file paths
    csv_node = keyword + '_nodes.csv'
    csv_links = keyword + '_links.csv'
    
    # Compile dictionary
    dictionary = {
        'nodes': read_csv(csv_node),
        'links': read_csv(csv_links)
    }
    
    # Process strings before return
    return dict_strings_to_ints(dictionary)

Initial Setup

In [3]:
# Import initial formatted biometrics

df_biometrics = pd.read_csv("EDITED biometrics&vitals.csv")

In [4]:
# Create list of unique biometrics in dataframe

unique_biometrics = []
unique_biometrics.extend(df_biometrics.columns.tolist())

# Put all biometrics from dataframe to list
for col in df_biometrics.columns:
    elements = df_biometrics[col].dropna().tolist()
    for element in elements:
        if element == np.nan:
            continue
        else:
            unique_biometrics.append(element[3:])

# Remove duplicates from list
unique_biometrics = list(set(unique_biometrics))

In [5]:
# Create new dataframe with all unique biometrics in columns
# Note: this is important because nodes are only created for named nodes

df_biometrics_complete = df_biometrics.reindex(columns = unique_biometrics)
df_biometrics_complete

Unnamed: 0,MAP,Phosphate,Platelets,Lactate,BUN,HCT,CO2,SpO2,DBP,HCO3,...,BMI,Urea,Troponin T,K,Arterial pH,Chloride,Ca,Fibrinogen,Urine output,Na
0,090DBP,090Lactate,090BMI,090Arterial pH,090Lactate,090BMI,090HCO3,090SaO2,090MAP,090CO2,...,090HCT,090BUN,090Glucose,090Creatinine,090Lactate,,090Lactate,050Albumin,090Body temperature,090Urine output
1,090SBP,050BMI,090DBP,090BUN,090Urea,050Cholesterol,,050Cl,090Platelets,090Lactate,...,090Platelets,090Creatinine,090HDL,090Glucose,090RR,,050Albumin,050Bilirubin,090K,050BMI
2,090Urine output,050Ca,090HDL,090Ca,050Bilirubin,050DBP,,050Fibrinogen,090SBP,090RR,...,050Albumin,,090Troponin I,090Mg,050Albumin,,050ALP,050Creatinine,090MAP,050Cl
3,050HCT,050Cl,090SBP,090Cl,,050MAP,,050Glucose,050Albumin,050Albumin,...,050Arterial pH,,,090Urine output,050BMI,,050BMI,050SpO2,090Na,050Lactate
4,050Hemoglobin,050Creatinine,,090HCO3,,050SBP,,,050Ca,050Arterial pH,...,050Ca,,,,050Glucose,,050DBP,,,050Phosphate
5,,050DBP,,090Phosphate,,,,,050Cl,050Cholesterol,...,050Na,,,,050HCO3,,050Phosphate,,,050SBP
6,,050Glucose,,050Albumin,,,,,050Glucose,050Cl,...,050Phosphate,,,,050HR,,050SBP,,,
7,,050HDL,,050Cholesterol,,,,,050HCT,050Glucose,...,050WBC,,,,,,050Triglyceride,,,
8,,050Na,,050Hemoglobin,,,,,050Hemoglobin,050Hemoglobin,...,,,,,,,,,,
9,,050SBP,,050Na,,,,,050Na,,...,,,,,,,,,,


In [6]:
# Create dictionary of relationships from dataframe

dict_bio = {}
for col in df_biometrics_complete.columns:
    
    # Catch empty series case
    try:
        dropped = df_biometrics[col].dropna().tolist()
    except KeyError:
        dropped = []
        
    # Seperate biometrics and values
    dict_bio[col] = []
    for conn in dropped:
        dict_bio[col].append([conn[3:], int(conn[0:3])])
        
dict_bio

{'MAP': [['DBP', 90],
  ['SBP', 90],
  ['Urine output', 90],
  ['HCT', 50],
  ['Hemoglobin', 50]],
 'Phosphate': [['Lactate', 90],
  ['BMI', 50],
  ['Ca', 50],
  ['Cl', 50],
  ['Creatinine', 50],
  ['DBP', 50],
  ['Glucose', 50],
  ['HDL', 50],
  ['Na', 50],
  ['SBP', 50]],
 'Platelets': [['BMI', 90], ['DBP', 90], ['HDL', 90], ['SBP', 90]],
 'Lactate': [['Arterial pH', 90],
  ['BUN', 90],
  ['Ca', 90],
  ['Cl', 90],
  ['HCO3', 90],
  ['Phosphate', 90],
  ['Albumin', 50],
  ['Cholesterol', 50],
  ['Hemoglobin', 50],
  ['Na', 50]],
 'BUN': [['Lactate', 90], ['Urea', 90], ['Bilirubin', 50]],
 'HCT': [['BMI', 90],
  ['Cholesterol', 50],
  ['DBP', 50],
  ['MAP', 50],
  ['SBP', 50]],
 'CO2': [['HCO3', 90]],
 'SpO2': [['SaO2', 90], ['Cl', 50], ['Fibrinogen', 50], ['Glucose', 50]],
 'DBP': [['MAP', 90],
  ['Platelets', 90],
  ['SBP', 90],
  ['Albumin', 50],
  ['Ca', 50],
  ['Cl', 50],
  ['Glucose', 50],
  ['HCT', 50],
  ['Hemoglobin', 50],
  ['Na', 50],
  ['Phosphate', 50]],
 'HCO3': [['CO2', 

In [7]:
# Convert dictionary into nodes and links
# Note: the "group" for nodes currently does nothing and can be used as a value to insert

# counter is currently used for group and currently does nothing
counter = 0

# Nodes and links lists to go into dictionary
nodes = []
links = []

for biom in list(dict_bio.keys()):
    biom_name = {}
    biom_name['name'] = biom
    biom_name['group'] = list(dict_bio.keys()).index(biom)
    nodes.append(biom_name)
    for conn in dict_bio[biom]:
        target = list(dict_bio.keys()).index(conn[0])
        no_flag = False
        for link in links:
            if link["source"] == target:
                if link["target"] == counter:
                    no_flag = True
        if no_flag == True:
            continue
        biom_dict = {}
        biom_dict["source"] = counter
        biom_dict["target"] = target
        biom_dict["value"] = conn[1]
        links.append(biom_dict)
    counter = counter + 1

# Complete lists into biometrics dictionary
base_data = {
    "nodes": nodes,
    "links": links
}

base_data

{'nodes': [{'name': 'MAP', 'group': 0},
  {'name': 'Phosphate', 'group': 1},
  {'name': 'Platelets', 'group': 2},
  {'name': 'Lactate', 'group': 3},
  {'name': 'BUN', 'group': 4},
  {'name': 'HCT', 'group': 5},
  {'name': 'CO2', 'group': 6},
  {'name': 'SpO2', 'group': 7},
  {'name': 'DBP', 'group': 8},
  {'name': 'HCO3', 'group': 9},
  {'name': 'SaO2', 'group': 10},
  {'name': 'Body temperature', 'group': 11},
  {'name': 'Glucose', 'group': 12},
  {'name': 'ALP', 'group': 13},
  {'name': 'HR', 'group': 14},
  {'name': 'Hemoglobin', 'group': 15},
  {'name': 'Cl', 'group': 16},
  {'name': 'Mg', 'group': 17},
  {'name': 'Triglycerides', 'group': 18},
  {'name': 'Albumin', 'group': 19},
  {'name': 'Creatinine', 'group': 20},
  {'name': 'SBP', 'group': 21},
  {'name': 'ALT', 'group': 22},
  {'name': 'HDL', 'group': 23},
  {'name': 'RR', 'group': 24},
  {'name': 'LDL', 'group': 25},
  {'name': 'Troponin I', 'group': 26},
  {'name': 'AST', 'group': 27},
  {'name': 'Bilirubin', 'group': 28},


In [8]:
# Convert to csv

field_names=["name","group"]
with open('bio_nodes.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=field_names)
    writer.writeheader()
    writer.writerows(base_data["nodes"])
    
field_names=["source","target","value"]
with open('bio_links.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=field_names)
    writer.writeheader()
    writer.writerows(base_data["links"])

In [9]:
# Simulate diseases
# Note: This is only here for testing and simulates what diseases would look like

max_biometric = len(base_data["nodes"]) - 1 # max biometric value
num_dis = 50 # number of simulated diseases
num_dis_links = 2 # number of biometrics associated with each disease

arr_num_bio = np.arange(max_biometric + 1)
diseases = {'nodes':[], 'links':[]}

# Generate disease nodes and randomly linking to biometrics
for i in range(num_dis):
    dis_name = 'disease_'+ str(i)
    diseases['nodes'].append({'name': dis_name, 'group': i})
    cross_links = random.choices(arr_num_bio, k=num_dis_links)
    for link in cross_links:
        diseases['links'].append({'source': i, 'target': link, 'value': 50})

diseases

{'nodes': [{'name': 'disease_0', 'group': 0},
  {'name': 'disease_1', 'group': 1},
  {'name': 'disease_2', 'group': 2},
  {'name': 'disease_3', 'group': 3},
  {'name': 'disease_4', 'group': 4},
  {'name': 'disease_5', 'group': 5},
  {'name': 'disease_6', 'group': 6},
  {'name': 'disease_7', 'group': 7},
  {'name': 'disease_8', 'group': 8},
  {'name': 'disease_9', 'group': 9},
  {'name': 'disease_10', 'group': 10},
  {'name': 'disease_11', 'group': 11},
  {'name': 'disease_12', 'group': 12},
  {'name': 'disease_13', 'group': 13},
  {'name': 'disease_14', 'group': 14},
  {'name': 'disease_15', 'group': 15},
  {'name': 'disease_16', 'group': 16},
  {'name': 'disease_17', 'group': 17},
  {'name': 'disease_18', 'group': 18},
  {'name': 'disease_19', 'group': 19},
  {'name': 'disease_20', 'group': 20},
  {'name': 'disease_21', 'group': 21},
  {'name': 'disease_22', 'group': 22},
  {'name': 'disease_23', 'group': 23},
  {'name': 'disease_24', 'group': 24},
  {'name': 'disease_25', 'group': 25

In [10]:
# Convert to csv

field_names=["name","group"]
with open('dis_nodes.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=field_names)
    writer.writeheader()
    writer.writerows(diseases["nodes"])
    
field_names=["source","target","value"]
with open('dis_links.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=field_names)
    writer.writeheader()
    writer.writerows(diseases["links"])

Calculating Node and Edge locations

In [11]:
# Read in csv files with keywords

data = csv_to_dict('bio')
diseases = csv_to_dict('dis')

In [12]:
# Create shell for biometrics in 3D spherical pattern

N=len(data['nodes']) # number of nodes
L=len(data['links']) # number of links

Edges=[(data['links'][k]['source'], data['links'][k]['target']) for k in range(L)]

G=ig.Graph(Edges, directed=False)
layt=G.layout('sphere', dim=3)

In [13]:
# Create shell for diseases in 3D spherical pattern

N_dis=len(diseases['nodes']) # number of nodes
L_dis=len(diseases['links']) # number of links (links from disease to biometric)

Edges_dis=[(diseases['links'][k]['source'], diseases['links'][k]['target']) for k in range(L_dis)]

G_dis=ig.Graph(Edges_dis, directed=False)
layt_dis = G_dis.layout('sphere', dim=3)

In [14]:
# Assigning labels to nodes on actual graph

labels=[]
group=[]
line_col=[]
node_size=[]
all_nodes = data['nodes'] + diseases['nodes']
all_links = data['links'] + diseases['links']

for node in all_nodes:
    labels.append(node['name'])
    group.append(node['group'])
    node_size.append(6)

for conn in all_links:
    if conn["value"] >= 90:
        line_col.append(1)
        line_col.append(1)
        line_col.append(1)
    elif conn["value"] >= 50:
        line_col.append(0)
        line_col.append(0)
        line_col.append(0)
    else:
        line_col.append(0)
        line_col.append(0)
        line_col.append(0)

In [15]:
# Generate node coordinates

Xn=[layt[k][0] for k in range(N)]# x-coordinates of nodes
Yn=[layt[k][1] for k in range(N)]# y-coordinates
Zn=[layt[k][2] for k in range(N)]# z-coordinates
Xe=[]
Ye=[]
Ze=[]
for e in Edges:
    Xe+=[layt[e[0]][0],layt[e[1]][0], None]# x-coordinates of edge ends
    Ye+=[layt[e[0]][1],layt[e[1]][1], None]
    Ze+=[layt[e[0]][2],layt[e[1]][2], None]

In [16]:
# Generate node coordinates for diseases

Xn_dis=[layt_dis[k][0]*2 for k in range(N_dis)]# x-coordinates of nodes
Yn_dis=[layt_dis[k][1]*2 for k in range(N_dis)]# y-coordinates
Zn_dis=[layt_dis[k][2]*2 for k in range(N_dis)]# z-coordinates
Xe_dis=[]
Ye_dis=[]
Ze_dis=[]
for e in Edges_dis:
    Xe_dis+=[layt_dis[e[0]][0]*2,layt_dis[e[1]][0]*2, None]# x-coordinates of edge ends
    Ye_dis+=[layt_dis[e[0]][1]*2,layt_dis[e[1]][1]*2, None]
    Ze_dis+=[layt_dis[e[0]][2]*2,layt_dis[e[1]][2]*2, None]

In [17]:
# Generate cross links between shells

nodes_list = data["nodes"]
diseases_list = diseases["nodes"]
disease_links = diseases["links"]
cross_link = []
    
X_cross = []
Y_cross = []
Z_cross = []

for link in disease_links:
    X_cross.append(Xn[link['target']])
    X_cross.append(Xn_dis[link['source']])
    X_cross.append(None)
    Y_cross.append(Yn[link['target']])
    Y_cross.append(Yn_dis[link['source']])
    Y_cross.append(None)
    Z_cross.append(Zn[link['target']])
    Z_cross.append(Zn_dis[link['source']])
    Z_cross.append(None)

In [18]:
# Combine lists of links and nodes

Xn = Xn + Xn_dis
Yn = Yn + Yn_dis
Zn = Zn + Zn_dis

Xe = Xe + X_cross
Ye = Ye + Y_cross
Ze = Ze + Z_cross

Rendering

In [57]:
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.express as px

trace1=go.Scatter3d(x=Xe,
                    y=Ye,
                    z=Ze,
                    mode='lines',
                    line=dict(color=line_col, colorscale=[[0, '#5cd6f6'], [1, '#ff4b4b']], width=1),
                    hoverinfo='none',
                    opacity=0.7
                   )

trace2=go.Scatter3d(x=Xn,
                    y=Yn,
                    z=Zn,
                    mode='markers',
                    name='biometrics',
                    marker=dict(symbol='circle',
                                color=[x / max(group) for x in group],
                                colorscale=[[0, '#5cd6f6'], [1, '#ff4b4b']],
                                line=dict(color='rgb(50,50,50)', width=0.5)
                               ),
                   text=labels,
                   hoverinfo='text'
                   )

axis=dict(showbackground=False,
          showline=False,
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title=''
          )

layout = go.Layout(
        #title="Network of Biometrics",
        width=1000,
        height=1000,
        showlegend=False,
        scene=dict(
             xaxis=dict(axis),
             yaxis=dict(axis),
             zaxis=dict(axis),
        ),
        margin=dict(
        t=100
    ),
    hovermode='closest',
    annotations=[
            dict(
            showarrow=False,
            text="",
            xref='paper',
            yref='paper',
            x=0,
            y=0.1,
            xanchor='left',
            yanchor='bottom',
            font=dict(
            size=14
            )
            )
        ],    )

In [60]:
from plotly.offline import init_notebook_mode, iplot, plot

#init_notebook_mode(connected=True)

data=[trace1, trace2]
fig=go.Figure(data=data, layout=layout)

# Remove gridlines
fig.update_layout(scene=dict(xaxis_showspikes=False, yaxis_showspikes=False, zaxis_showspikes=False))
# Background color
fig.update_layout(paper_bgcolor='#0f202e')
# Text color
fig.update_layout(hoverlabel=dict(font_color="white"))

figwid=go.FigureWidget(fig)

shell = figwid.data[1]
figwid.update_traces(marker_size = 6)
figwid.data

(Scatter3d({
     'hoverinfo': 'none',
     'line': {'color': [1, 1, 1, ..., 0, 0, 0],
              'colorscale': [[0, '#5cd6f6'], [1, '#ff4b4b']],
              'width': 1},
     'marker': {'size': 6},
     'mode': 'lines',
     'opacity': 0.7,
     'uid': '11087a85-50dd-4c6b-a3f5-f8b295406f03',
     'x': [1.2246467991473532e-16, -0.24340656213938958, None, ...,
           -0.24653416743314502, 0.0, None],
     'y': [0.0, 0.7542882386692064, None, ..., -0.8759562939871668, 0.0, None],
     'z': [-1.0, -0.6097560975609756, None, ..., -0.4146341463414635, 2.0, None]
 }),
 Scatter3d({
     'hoverinfo': 'text',
     'marker': {'color': [0.0, 0.02040816326530612, 0.04081632653061224,
                          0.061224489795918366, 0.08163265306122448,
                          0.10204081632653061, 0.12244897959183673,
                          0.14285714285714285, 0.16326530612244897,
                          0.1836734693877551, 0.20408163265306123,
                          0.2244897959

In [61]:
iplot(figwid, filename='BiometricsNetwork')