In [68]:
import pandas as pd
import numpy as np
import collections 
import itertools
import pickle
import networkx as nx
from networkx.algorithms import similarity, isomorphism as iso
from pyvis.network import Network
import ast
import sys
sys.path.append('../')
from modules import utils
import matplotlib.pyplot as plt
%matplotlib inline

In [28]:
def generate_tuple_dict(df):
    frequency_dict = {}
    for traj in df.trajectory:
        if traj in frequency_dict.keys():
            frequency_dict[traj] += 1
        else:
            frequency_dict[traj] = 1
    overall_tup_dict = {}
    for key, value in frequency_dict.items():
        new_key = ast.literal_eval(key)
        for tup in zip(new_key, new_key[1:]):
            if tup in overall_tup_dict.keys():
                overall_tup_dict[tup] += value
            else:
                overall_tup_dict[tup] = value
    return overall_tup_dict

In [29]:
def create_sankey_df(df):
    overall_tup_dict = generate_tuple_dict(df)
    sankey_df = pd.DataFrame()
    sankey_df['source'] = [i[0] for i in overall_tup_dict.keys()]
    sankey_df['target'] = [i[1] for i in overall_tup_dict.keys()]
    sankey_df['value'] = list(overall_tup_dict.values())
    sankey_df['link_type'] = sankey_df['target'].apply(lambda i: 'terminal' if i in ['No lupus', 'Lupus', 'Inconclusive diagnosis'] else 'non_terminal')
    return sankey_df

In [30]:
def get_threshold_value(df, source): #source e.g. 'ana'
    threshold_values = df[source].unique().tolist()
    if len(threshold_values) == 1:
        return int(threshold_values[0])
    else:
        return str(threshold_values)

In [31]:
def draw_pyvis_network(pathways_df, pathway_type='network'): #second type is trajectory 
    pathways_df['type'] = 'directed'
    start_node = pathways_df.iloc[0]['source']
    all_nodes = list(set(pathways_df.source.unique().tolist() + pathways_df.target.unique().tolist()))
    terminal_nodes = list(set(pathways_df[pathways_df.link_type=='terminal'].target))
    non_terminal_nodes = [i for i in all_nodes if i not in terminal_nodes and i!=start_node]
    
    #change get_net to something else since it satnds for Game of Thrones
    got_net = Network(notebook=True, height='600px', width='100%', directed=True, cdn_resources='in_line')
    got_net.add_node(start_node, color='purple', size=20)
    got_net.add_nodes(non_terminal_nodes, size=[15]*len(non_terminal_nodes), color=['blue']*len(non_terminal_nodes))
    got_net.add_nodes(terminal_nodes, color=['green']*len(terminal_nodes), size=[20]*len(terminal_nodes))
    for src, target, value, edge_thresh in zip(pathways_df.source, pathways_df.target, pathways_df.value, pathways_df.edge_threshold):
        if pathway_type =='network':
            if value > threshold:
                if math.isnan(edge_thresh):
                    got_net.add_edge(src, target, value=value, color='red')
                else:
                    got_net.add_edge(src, target, value=value, color='red', label=str(int(edge_thresh)))
            else:
                got_net.add_edge(src, target, value=value, color='blue')
        elif pathway_type =='trajectory': #It's just one trajectory
            if isinstance(edge_thresh, str):
                got_net.add_edge(src, target, value=value, color='blue', label=edge_thresh)
            else:
                got_net.add_edge(src, target, value=value, color='blue', label=str(int(edge_thresh)))
        else:
            print('Unknown pathway type')
    return got_net

In [32]:
def draw_one_trajectory(pred_df, trajectory): #pred_df = pred_no_lupus par exemple
    trajectory_df = pred_df[pred_df.trajectory == trajectory]
    trajectory_testing_df = testing_df.loc[trajectory_df.index]
    trajectory_pathways_df = create_sankey_df(trajectory_df)
    trajectory_pathways_df['edge_threshold'] = [get_threshold_value(trajectory_testing_df, source) for source in trajectory_pathways_df.source]
    trajectory_net = draw_pyvis_network(trajectory_pathways_df, 'trajectory')   
    return trajectory_net

#### Preliminaries

In [33]:
model_name = 'dueling_dqn_per'
seed = 105
steps = int(10e7)
# threshold=1

In [34]:
testing_df = pd.read_csv('../data/test_set_constant.csv')
testing_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,joint_involvement,proteinuria,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1
1,1,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,1
2,1,0,0,0,0,0,1,0,0,0,...,1,0,0,0,1,1,0,0,0,1
3,1,1,0,0,0,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,1
4,1,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,1


In [35]:
test_df = pd.read_csv(f'../test_dfs/{model_name}_{seed}_{steps}.csv')
test_df.head()

Unnamed: 0,index,episode_length,reward,y_pred,y_actual,trajectory,terminated,is_success
0,0.0,16.0,0.5,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
1,1.0,20.0,0.366667,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
2,2.0,9.0,0.733333,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
3,3.0,15.0,0.533333,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0
4,4.0,5.0,0.866667,1.0,1.0,"['ana', 'anti_dsdna_antibody', 'joint_involvem...",0.0,1.0


In [36]:
utils.test(test_df.y_actual, test_df.y_pred)[0]

98.77142857142857

In [37]:
pred_lupus = test_df[test_df['y_pred']==1]

#### Graph 1

In [38]:
traj1 = test_df.iloc[0]['trajectory']
traj1_df = pred_lupus[pred_lupus.trajectory == traj1]
traj1_testing_df = testing_df.loc[traj1_df.index]
traj1_pathways_df = create_sankey_df(traj1_df)
traj1

"['ana', 'anti_dsdna_antibody', 'joint_involvement', 'pleural_effusion', 'proteinuria', 'low_c3', 'pericardial_effusion', 'delirium', 'lupus_anti_coagulant', 'anti_cardioliphin_antibodies', 'seizure', 'psychosis', 'oral_ulcers', 'anti_β2gp1_antibodies', 'low_c4', 'Lupus']"

In [39]:
traj1_pathways_df['edge_threshold'] = [get_threshold_value(traj1_testing_df, source) for source in traj1_pathways_df.source]
traj1_pathways_df.head()

Unnamed: 0,source,target,value,link_type,edge_threshold
0,ana,anti_dsdna_antibody,131,non_terminal,1
1,anti_dsdna_antibody,joint_involvement,131,non_terminal,0
2,joint_involvement,pleural_effusion,131,non_terminal,1
3,pleural_effusion,proteinuria,131,non_terminal,0
4,proteinuria,low_c3,131,non_terminal,0


In [40]:
traj1_nx_graph = nx.from_pandas_edgelist(traj1_pathways_df, 'source', 'target', edge_attr=['edge_threshold'],
                                        create_using=nx.DiGraph(),)
undirected_traj1_nx_graph = traj1_nx_graph.to_undirected()

In [43]:
# nx.draw(undirected_traj1_nx_graph, with_labels=True)

In [44]:
# draw_one_trajectory(pred_lupus, traj1).show('Example.html')

#### Graph 2

In [45]:
traj2 = test_df.iloc[1]['trajectory']
traj2_df = pred_lupus[pred_lupus.trajectory == traj2]
traj2_testing_df = testing_df.loc[traj2_df.index]
traj2_pathways_df = create_sankey_df(traj2_df)
traj2

"['ana', 'anti_dsdna_antibody', 'joint_involvement', 'low_c3', 'pericardial_effusion', 'pleural_effusion', 'proteinuria', 'lupus_anti_coagulant', 'delirium', 'thrombocytopenia', 'psychosis', 'oral_ulcers', 'anti_smith_antibody', 'seizure', 'leukopenia', 'anti_β2gp1_antibodies', 'low_c4', 'cutaneous_lupus', 'fever', 'Lupus']"

In [46]:
traj2_pathways_df['edge_threshold'] = [get_threshold_value(traj2_testing_df, source) for source in traj2_pathways_df.source]
traj2_pathways_df.head()

Unnamed: 0,source,target,value,link_type,edge_threshold
0,ana,anti_dsdna_antibody,9,non_terminal,1
1,anti_dsdna_antibody,joint_involvement,9,non_terminal,1
2,joint_involvement,low_c3,9,non_terminal,0
3,low_c3,pericardial_effusion,9,non_terminal,0
4,pericardial_effusion,pleural_effusion,9,non_terminal,0


In [48]:
traj2_nx_graph = nx.from_pandas_edgelist(traj2_pathways_df, 'source', 'target', edge_attr=['edge_threshold'],
                                        create_using=nx.DiGraph(),)
undirected_traj2_nx_graph = traj2_nx_graph.to_directed()

#### Comparing the graphs

In [49]:
nx.is_isomorphic(traj1_nx_graph, traj2_nx_graph)

False

In [53]:
similarity.graph_edit_distance(traj1_nx_graph, traj2_nx_graph)

8.0

#### Is graph A a sbgraph of graph B

In [86]:
def get_all_subgraphs(nx_graph): #maybe i remove the lupus node 
    all_subgraphs = []
    all_connected_subgraphs = []
    for nb_nodes in (2, nx_graph.number_of_nodes()):
        for subgraph in (nx_graph.subgraph(selected_nodes) for selected_nodes in itertools.combinations(nx_graph, nb_nodes)):
            all_subgraphs.append(subgraph)
#             if nx.is_connected(subgraph):
#                 all_connected_subgraphs.append(subgraph)
    return all_subgraphs#, all_connected_subgraphs  

In [87]:
traj1_subgraphs, traj1_connected_subgraphs = get_all_connected_subgraphs(traj1_nx_graph)

In [88]:
len(traj1_subgraphs), len(traj1_connected_subgraphs)

(121, 0)

In [91]:
def is_subgraph(graph1, graph2): #is graph1 a subset of graph2
    graph2_subgraphs = get_all_subgraphs(graph2)
    for graph2_subgraph in graph2_subgraphs:
#         print(type(graph1), type(graph2_subgraph))
        if nx.is_isomorphic(graph1, graph2_subgraph):
            return True
    return False

In [92]:
is_subgraph(traj1_nx_graph, traj2_nx_graph)

False