In [17]:
import pandas as pd
import numpy as np
import ast
import math
import networkx as nx
from pyvis.network import Network
import plotly.graph_objects as go
import sys
sys.path.append('../')
from modules import utils, constants

#### The test df

In [18]:
test_df = pd.read_csv('../test_dfs/negative_reward/step_30_correct_1_incorrect_-1.csv').drop(['index'], axis=1)
test_df.head()

Unnamed: 0,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,17.0,0.515152,1.0,1.0,"['ana', 'pericardial_effusion', 'leukopenia', ...",0.0,1.0
1,11.0,0.69697,1.0,1.0,"['ana', 'pericardial_effusion', 'leukopenia', ...",0.0,1.0
2,16.0,0.545455,1.0,1.0,"['ana', 'pericardial_effusion', 'leukopenia', ...",0.0,1.0
3,15.0,0.575758,1.0,1.0,"['ana', 'pericardial_effusion', 'leukopenia', ...",0.0,1.0
4,9.0,0.757576,1.0,1.0,"['ana', 'pericardial_effusion', 'leukopenia', ...",0.0,1.0


In [19]:
utils.success_rate(test_df)[0]

95.37857142857142

In [23]:
test_df[(test_df.y_pred==0) & (test_df.episode_length!=2)].episode_length.mean()

14.65691489361702

#### The testing df

In [24]:
testing_df = pd.read_csv('../data/missingness/0/testing_set.csv')
testing_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,1,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,1
2,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,1,1,0,0,0,1
3,1,1,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,1
4,1,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,1


In [25]:
training_df = pd.read_csv('../data/missingness/0/training_set.csv')
training_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,1
3,1,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,0,0,1,1,1
4,1,0,0,1,1,1,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1


#### Possibly useful functions

In [26]:
def generate_tuple_dict(df):
    frequency_dict = {}
    for traj in df.trajectory:
        if traj in frequency_dict.keys():
            frequency_dict[traj] += 1
        else:
            frequency_dict[traj] = 1
    overall_tup_dict = {}
    for key, value in frequency_dict.items():
        new_key = ast.literal_eval(key)
        for tup in zip(new_key, new_key[1:]):
            if tup in overall_tup_dict.keys():
                overall_tup_dict[tup] += value
            else:
                overall_tup_dict[tup] = value
    return overall_tup_dict

In [27]:
def create_sankey_df(df):
    overall_tup_dict = generate_tuple_dict(df)
    sankey_df = pd.DataFrame()
    sankey_df['source'] = [i[0] for i in overall_tup_dict.keys()]
    sankey_df['target'] = [i[1] for i in overall_tup_dict.keys()]
    sankey_df['value'] = list(overall_tup_dict.values())
    sankey_df['link_type'] = sankey_df['target'].apply(lambda i: 'terminal' if i in ['No lupus', 'Lupus', 'Inconclusive diagnosis'] else 'non_terminal')
    return sankey_df

In [28]:
def draw_pyvis_network(pathways_df, pathway_type='network'): #second type is trajectory 
    pathways_df['type'] = 'directed'
    start_node = pathways_df.iloc[0]['source']
    all_nodes = list(set(pathways_df.source.unique().tolist() + pathways_df.target.unique().tolist()))
    terminal_nodes = list(set(pathways_df[pathways_df.link_type=='terminal'].target))
    non_terminal_nodes = [i for i in all_nodes if i not in terminal_nodes and i!=start_node]
    
    #change get_net to something else since it satnds for Game of Thrones
    got_net = Network(notebook=True, height='600px', width='100%', directed=True, cdn_resources='in_line')
    got_net.add_node(start_node, color='purple', size=20)
    got_net.add_nodes(non_terminal_nodes, size=[15]*len(non_terminal_nodes), color=['blue']*len(non_terminal_nodes))
    got_net.add_nodes(terminal_nodes, color=['green']*len(terminal_nodes), size=[20]*len(terminal_nodes))
    for src, target, value, edge_thresh in zip(pathways_df.source, pathways_df.target, pathways_df.value, pathways_df.edge_threshold):
        if pathway_type =='network':
            if value > threshold:
                if math.isnan(edge_thresh):
                    got_net.add_edge(src, target, value=value, color='red')
                else:
                    got_net.add_edge(src, target, value=value, color='red', label=str(int(edge_thresh)))
            else:
                got_net.add_edge(src, target, value=value, color='blue')
        elif pathway_type =='trajectory': #It's just one trajectory
            if isinstance(edge_thresh, str):
                got_net.add_edge(src, target, value=value, color='blue', label=edge_thresh)
            else:
                got_net.add_edge(src, target, value=value, color='blue', label=str(int(edge_thresh)))
        else:
            print('Unknown pathway type')
    return got_net

In [1]:
from stable_baselines3 import DQN

#### start from here

In [29]:
def get_threshold_value(df, source): #source e.g. 'ana'
    threshold_values = df[source].unique().tolist()
    if len(threshold_values) == 1:
        return int(threshold_values[0])
    else:
        return str(threshold_values)

In [30]:
def draw_one_trajectory(pred_df, trajectory): #pred_df = pred_no_lupus par exemple
    trajectory_df = pred_df[pred_df.trajectory == trajectory]
    trajectory_testing_df = testing_df.loc[trajectory_df.index]
    trajectory_pathways_df = create_sankey_df(trajectory_df)
    trajectory_pathways_df['edge_threshold'] = [get_threshold_value(trajectory_testing_df, source) for source in trajectory_pathways_df.source]
    trajectory_net = draw_pyvis_network(trajectory_pathways_df, 'trajectory')   
    return trajectory_net

#### No lupus diagnosis

In [31]:
pred_no_lupus = test_df[test_df['y_pred']==0]
no_lupus_pathway_1 = pred_no_lupus.trajectory.value_counts().index[1]
no_lupus_pathway_1

"['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'cutaneous_lupus', 'anti_cardioliphin_antibodies', 'non_scarring_alopecia', 'proteinuria', 'joint_involvement', 'anti_dsdna_antibody', 'No lupus']"

In [32]:
len(pred_no_lupus[pred_no_lupus.trajectory == no_lupus_pathway_1])

1328

In [33]:
pred_no_lupus.episode_length.unique()

array([11., 22.,  2., 15., 12., 18., 21., 14., 20., 19., 24., 16., 17.,
       23., 13.])

In [34]:
no_lupus_pathway1_net = draw_one_trajectory(pred_no_lupus, no_lupus_pathway_1)
no_lupus_pathway1_net.show('Example.html')

In [35]:
len(pred_no_lupus[pred_no_lupus.trajectory == no_lupus_pathway_1])

1328

In [36]:
pred_no_lupus[pred_no_lupus.episode_length==pred_no_lupus.episode_length.max()].trajectory.unique()

array(["['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'cutaneous_lupus', 'delirium', 'anti_cardioliphin_antibodies', 'low_c3', 'oral_ulcers', 'proteinuria', 'joint_involvement', 'non_scarring_alopecia', 'thrombocytopenia', 'acute_pericarditis', 'lupus_anti_coagulant', 'anti_smith_antibody', 'fever', 'anti_dsdna_antibody', 'pleural_effusion', 'anti_β2gp1_antibodies', 'low_c4', 'auto_immune_hemolysis', 'psychosis', 'No lupus']",
       "['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'non_scarring_alopecia', 'cutaneous_lupus', 'delirium', 'anti_dsdna_antibody', 'thrombocytopenia', 'low_c3', 'oral_ulcers', 'acute_pericarditis', 'proteinuria', 'joint_involvement', 'fever', 'low_c4', 'anti_smith_antibody', 'anti_cardioliphin_antibodies', 'pleural_effusion', 'anti_β2gp1_antibodies', 'lupus_anti_coagulant', 'auto_immune_hemolysis', 'psychosis', 'No lupus']",
       "['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'cutaneous_lupus', 'delirium', 'anti_cardioliphin_a

In [57]:
longest_no_lupus_pathway = pred_no_lupus[pred_no_lupus.episode_length == pred_no_lupus.episode_length.max()].trajectory.unique()[6]
longest_pathway1_net = draw_one_trajectory(pred_no_lupus, longest_no_lupus_pathway)
longest_pathway1_net.show('Example.html')

In [58]:
len(pred_no_lupus[pred_no_lupus.trajectory == longest_no_lupus_pathway])

3

In [None]:
commonest_no_lupus_pathway = pred_no_lupus.trajectory.value_counts().index[1]
commonest_no_lupus_pathway_net = draw_one_trajectory(pred_no_lupus, commonest_no_lupus_pathway)
commonest_no_lupus_pathway_net.show('Example.html')

In [None]:
len(pred_no_lupus[pred_no_lupus.trajectory == commonest_no_lupus_pathway])

#### Lupus diagnosis

In [61]:
pred_lupus = test_df[test_df['y_pred']==1]
pred_lupus.episode_length.mean()

14.336621062678969

In [63]:
commonest_lupus_pathway = pred_lupus.trajectory.value_counts().index[0]
commonest_lupus_pathway_net = draw_one_trajectory(pred_lupus, commonest_lupus_pathway)
commonest_lupus_pathway_net.show('Example.html')

In [65]:
len(pred_lupus[pred_lupus.trajectory == commonest_lupus_pathway])

262

In [66]:
pred_lupus[pred_lupus.episode_length==pred_lupus.episode_length.min()].trajectory.unique()

array(["['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'Lupus']"],
      dtype=object)

In [67]:
shortest_lupus_pathway = pred_lupus[pred_lupus.episode_length==pred_lupus.episode_length.min()].trajectory.unique()[0]
shortest_lupus_pathway_net = draw_one_trajectory(pred_lupus, shortest_lupus_pathway)
shortest_lupus_pathway_net.show('Example.html')

In [68]:
len(pred_lupus[pred_lupus.trajectory == shortest_lupus_pathway])

90

In [76]:
pred_lupus[pred_lupus.episode_length==pred_lupus.episode_length.max()].trajectory.value_counts()

['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'cutaneous_lupus', 'anti_cardioliphin_antibodies', 'non_scarring_alopecia', 'proteinuria', 'joint_involvement', 'anti_dsdna_antibody', 'thrombocytopenia', 'oral_ulcers', 'pleural_effusion', 'delirium', 'acute_pericarditis', 'low_c3', 'fever', 'anti_β2gp1_antibodies', 'lupus_anti_coagulant', 'anti_smith_antibody', 'low_c4', 'psychosis', 'auto_immune_hemolysis', 'Lupus']    9
['ana', 'pericardial_effusion', 'leukopenia', 'seizure', 'non_scarring_alopecia', 'anti_cardioliphin_antibodies', 'acute_pericarditis', 'low_c3', 'cutaneous_lupus', 'delirium', 'proteinuria', 'thrombocytopenia', 'oral_ulcers', 'fever', 'low_c4', 'joint_involvement', 'anti_dsdna_antibody', 'pleural_effusion', 'anti_β2gp1_antibodies', 'anti_smith_antibody', 'lupus_anti_coagulant', 'auto_immune_hemolysis', 'psychosis', 'Lupus']    2
['ana', 'pericardial_effusion', 'leukopenia', 'non_scarring_alopecia', 'seizure', 'cutaneous_lupus', 'low_c3', 'anti_cardioliphin_an

In [80]:
longest_lupus_pathway = pred_lupus[pred_lupus.episode_length==pred_lupus.episode_length.max()].trajectory.value_counts().index[2]
longest_lupus_pathway_net = draw_one_trajectory(pred_lupus, longest_lupus_pathway)
longest_lupus_pathway_net.show('Example.html')

In [81]:
len(pred_lupus[pred_lupus.trajectory == longest_lupus_pathway])

2

In [None]:
pred_lupus.trajectory.value_counts()

In [None]:
longest_lupus_pathway_net = draw_one_trajectory(pred_lupus, longest_lupus_pathway)
longest_lupus_pathway_net.show('Example.html')