In [1]:
import pandas as pd
import numpy as np
import ast
import math
import random
import collections
import networkx as nx
from pyvis.network import Network
import plotly.graph_objects as go
import sys
sys.path.append('../')
from modules import utils, constants

In [2]:
def draw_pyvis_network(pathways_df, pathway_type='network'): #second type is trajectory 
    pathways_df['type'] = 'directed'
    start_node = pathways_df.iloc[0]['source']
    all_nodes = list(set(pathways_df.source.unique().tolist() + pathways_df.target.unique().tolist()))
    terminal_nodes = list(set(pathways_df[pathways_df.link_type=='terminal'].target))
    non_terminal_nodes = [i for i in all_nodes if i not in terminal_nodes and i!=start_node]
    
    #change get_net to something else since it satnds for Game of Thrones
    got_net = Network(notebook=True, height='750px', width='100%', directed=True, cdn_resources='in_line')
    got_net.add_node(start_node, color='purple', size=20)
    got_net.add_nodes(non_terminal_nodes, size=[15]*len(non_terminal_nodes), color=['blue']*len(non_terminal_nodes))
    got_net.add_nodes(terminal_nodes, color=['green']*len(terminal_nodes), size=[20]*len(terminal_nodes))
    for src, target, value, edge_thresh in zip(pathways_df.source, pathways_df.target, pathways_df.value, pathways_df.edge_threshold):
        if pathway_type =='network':
            if value > threshold:
                if math.isnan(edge_thresh):
                    got_net.add_edge(src, target, value=value, color='red')
                else:
                    got_net.add_edge(src, target, value=value, color='red', label=str(int(edge_thresh)))
            else:
                got_net.add_edge(src, target, value=value, color='blue')
        elif pathway_type =='trajectory': #It's just one trajectory
            if math.isnan(edge_thresh):
                got_net.add_edge(src, target, value=value, color='blue')
            else:
                got_net.add_edge(src, target, value=value, color='blue', label=str(int(edge_thresh)))
        else:
            print('Unknown pathway type')
#     got_net.set_options("""
#                     "edges": {
#                               "arrowStrikethrough": false,
#                               "color": {
#                                         "inherit":true
#                               },
#                               "font": {
#                                       "size": 10,
#                                       "align": "top"
#                               },
#                               "smooth": false
#                             }
#                         """)
    return got_net

In [3]:
def generate_tuple_dict(df):
    frequency_dict = {}
    for traj in df.trajectory:
        if traj in frequency_dict.keys():
            frequency_dict[traj] += 1
        else:
            frequency_dict[traj] = 1
#     streamlined_frequency_dict = streamline_frequency_dict(frequency_dict)
    overall_tup_dict = {}
#     for key, value in streamlined_frequency_dict.items():
    for key, value in frequency_dict.items():
        new_key = ast.literal_eval(key)
        for tup in zip(new_key, new_key[1:]):
            #print(f'tup: {tup}')
            if tup in overall_tup_dict.keys():
                overall_tup_dict[tup] += value
            else:
                overall_tup_dict[tup] = value
    #print(f'overall_tup_dict: {overall_tup_dict}')
    return overall_tup_dict

In [4]:
def streamline_frequency_dict(frequency_dict):
    frequency_dict_list = [] #will hold dictonaries with 2 keys i.e. 'set' and 'value'
    for path_dict, value in frequency_dict.items():
        all_set_list = [i['set'] for i in frequency_dict_list] #get all the sets so far in the list 
        path_set = set(ast.literal_eval(path_dict)) #get the set of current path dict to check if its already in list
        if path_set in all_set_list: #increase value else insert it - work on this!!!!!!!!!!!!!!!
            for elem in frequency_dict_list: #look for if path_set is already in frequency_dict_list 
                if elem['set'] == path_set: #find the matching path_set in the frequency_dict_list
                    elem['value'] += value #increase the value for that set
        else:
            frequency_dict_list.append({'set':path_set, 'value':value})
    
    all_list = [list(path_dict['set']) for path_dict in frequency_dict_list]
    flat_list = [item for sublist in all_list for item in sublist]
    commonest_elements = dict(collections.Counter(flat_list))
    commonest_elements = {k: v for k, v in sorted(commonest_elements.items(), reverse=True, key=lambda item: item[1])}
    commonest_elements_list = list(commonest_elements.keys())
    commonest_elements_list = [i for i in commonest_elements_list if i not in ['Lupus', 'No lupus', 'Inconclusive diagnosis']] + ['Lupus', 'No lupus', 'Inconclusive diagnosis']
    
    for item in frequency_dict_list:
        item['set'] = sorted(list(item['set']), key=lambda x: commonest_elements_list.index(x))
    
    keys = [str(i['set']) for i in frequency_dict_list]
    values = [i['value'] for i in frequency_dict_list]
    final_frequency_dict = {k:v for (k,v) in zip(keys, values)}
    
    return final_frequency_dict

In [5]:
def get_threshold_value(row):
    substring = f"'{row.source}', '{row.target}'"
    substring_df = pd.DataFrame()
    for i, test_row in test_df.iterrows():
        if substring in test_row.trajectory:
            substring_df = substring_df.append(test_row)
    substring_testing_df = testing_df.loc[substring_df.index]
    
    threshold_values = substring_testing_df[row.source].unique().tolist()
    if len(threshold_values) == 1:
        return int(threshold_values[0])
    else:
#         print(f'There is {len(threshold_values)} threshold values for the edge between {row.source} and {row.target}')
        return np.nan

In [6]:
def create_sankey_df(df):
    overall_tup_dict = generate_tuple_dict(df)
    sankey_df = pd.DataFrame()
    sankey_df['source'] = [i[0] for i in overall_tup_dict.keys()]
    sankey_df['target'] = [i[1] for i in overall_tup_dict.keys()]
    sankey_df['value'] = list(overall_tup_dict.values())
    sankey_df['link_type'] = sankey_df['target'].apply(lambda i: 'terminal' if i in ['No lupus', 'Lupus', 'Inconclusive diagnosis'] else 'non_terminal')
    return sankey_df

#### The data

In [7]:
test_df = pd.read_csv('../test_dfs/negative_reward/step_reward_twelfth.csv').drop(['index'], axis=1)
test_df.head()

Unnamed: 0,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,4.0,0.75,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
1,3.0,0.833333,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'Lupus']",0.0,1.0
2,4.0,0.75,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
3,3.0,0.833333,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'Lupus']",0.0,1.0
4,4.0,0.75,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0


In [8]:
utils.success_rate(test_df)[0]

91.36428571428571

In [9]:
testing_df = pd.read_csv('../data/missingness/0/testing_set.csv')
testing_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,1,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,1
2,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,1,1,0,0,0,1
3,1,1,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,1
4,1,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,1


#### For entire network

In [10]:
# threshold = 0.1*len(test_df)
# pathways_df = create_sankey_df(test_df)
# pathways_df['edge_threshold'] = pathways_df.apply(lambda row: get_threshold_value(row), axis=1)

In [11]:
# got_net = draw_pyvis_network(pathways_df)    
# got_net.show('Example.html')

#### For just one trajecory

In [12]:
def draw_one_trajectory(df, trajectory):
    trajectory_df = df[df.trajectory == trajectory]
    trajectory_pathways_df = create_sankey_df(trajectory_df)
    trajectory_pathways_df['edge_threshold'] = trajectory_pathways_df.apply(lambda row: get_threshold_value(row), axis=1)
    trajectory_net = draw_pyvis_network(trajectory_pathways_df, 'trajectory')   
    return trajectory_net

In [13]:
pred_no_lupus = test_df[test_df['y_pred']==0]
no_lupus_pathway_1 = pred_no_lupus.trajectory.value_counts().index[1]
no_lupus_pathway_1

"['ana', 'anti_dsdna_antibody', 'joint_involvement', 'pericardial_effusion', 'proteinuria', 'anti_smith_antibody', 'lupus_anti_coagulant', 'delirium', 'leukopenia', 'low_c3', 'low_c4', 'No lupus']"

#### delete from here

In [15]:
trajectory_df = pred_no_lupus[pred_no_lupus.trajectory == no_lupus_pathway_1]
trajectory_df.head()

Unnamed: 0,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
41,12.0,0.083333,0.0,0.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
50,12.0,0.083333,0.0,0.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
56,12.0,0.083333,0.0,0.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
79,12.0,0.083333,0.0,0.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
89,12.0,0.083333,0.0,0.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0


In [19]:
trajectory_pathways_df = create_sankey_df(trajectory_df)
trajectory_pathways_df.head()

Unnamed: 0,source,target,value,link_type
0,ana,anti_dsdna_antibody,683,non_terminal
1,anti_dsdna_antibody,joint_involvement,683,non_terminal
2,joint_involvement,pericardial_effusion,683,non_terminal
3,pericardial_effusion,proteinuria,683,non_terminal
4,proteinuria,anti_smith_antibody,683,non_terminal


In [20]:
trajectory_pathways_df['edge_threshold'] = trajectory_pathways_df.apply(lambda row: get_threshold_value(row), axis=1)
trajectory_pathways_df.head()

Unnamed: 0,source,target,value,link_type,edge_threshold
0,ana,anti_dsdna_antibody,683,non_terminal,1.0
1,anti_dsdna_antibody,joint_involvement,683,non_terminal,0.0
2,joint_involvement,pericardial_effusion,683,non_terminal,0.0
3,pericardial_effusion,proteinuria,683,non_terminal,
4,proteinuria,anti_smith_antibody,683,non_terminal,0.0


#### end here

In [14]:
no_lupus_pathway1_net = draw_one_trajectory(pred_no_lupus, no_lupus_pathway_1)
no_lupus_pathway1_net.show('Example.html')

In [None]:
sample_trajectory = test_df[test_df.episode_length == test_df.episode_length.max()].iloc[0]['trajectory']
trajectory_df = test_df[test_df.trajectory == sample_trajectory]
trajectory_pathways_df = create_sankey_df(trajectory_df)
trajectory_pathways_df['edge_threshold'] = trajectory_pathways_df.apply(lambda row: get_threshold_value(row), axis=1)
trajectory_got_net = draw_pyvis_network(trajectory_pathways_df, 'trajectory')    
trajectory_got_net.show('Example.html')