In [None]:
import torch
from torch import optim,nn
from torch.autograd import Variable
import numpy as np
import time
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import os
os.chdir('/home/mjc/github/EHRVis/')
from models.data_loader import DataLoader
from models.retain_bidirectional import RETAIN
from functions import get_classified_sickness, list_to_inputs_targets

In [None]:
# set CUDA device
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

In [None]:
# load model
model = RETAIN(128,128,268,True)
model.cuda()
model.load_state_dict(torch.load('data/saved_weights/retain_bi_14500.pth'))
model.eval()
model.release=True

### Aggregate of different patients related to a single disease

In [None]:
# load input file
import pickle
with open('data/preprocessed/2014_out_dict.pckl','rb') as f:
    out_dict = pickle.load(f)

In [None]:
# load patient list
import pickle
with open('data/preprocessed/jid_alzheimer.pckl','rb') as f:
    jids = pickle.load(f)

# get target disease
code = get_classified_sickness('G00')
print(code)

In [None]:
# create random list for coordinates
x_val = list(np.random.uniform(0,1,1400))
y_val = list(np.random.uniform(0,1,1400))
pos = dict()
for i in range(1400):
    pos[i]=[x_val[i],y_val[i]]

In [None]:
# create scores
scores = np.zeros((268,1400))
counts = np.zeros((268,1400),dtype=int)
for i,jid in enumerate(jids):
    print(i+1,jid)
    try:
        out_list = out_dict[jid]
        if len(out_list)>1:
            # get list and put into model to get outputs
            input_list,targets = list_to_inputs_targets(out_list)
            inputs = model.list_to_tensor([input_list])
            outputs = model(inputs)

            # add to scores
            for v, visit in enumerate(input_list):
                for item in visit:
#                     counts[code,item]+=1
                    val = model.interpret(0,v,item,code).data[0]
                    if val!=0.0:
                        counts[code,item]+=1
                    scores[code,item]+= val
                    
    except KeyError:
        continue

In [None]:
# normalize scores
normalized_scores = scores[code]
for i, val in enumerate(normalized_scores):
    c = counts[code,i]
    if c>0:
        normalized_scores[i] /= c
print(normalized_scores.max())
print(normalized_scores.min())

In [None]:
# create edgelist
edge_list = []
for i in range(1400):
    val = normalized_scores[i]
    if np.absolute(val)>0.001:
        edge_list.append((i,code+500,{'Weight':val}))
G = nx.DiGraph()
G.add_edges_from(edge_list)

In [None]:
# add features
edge_width = [1*np.absolute(G[u][v]['Weight']) for u,v in G.edges()]
edge_color = []
for u,v in G.edges():
    if G[u][v]['Weight']>0:
        edge_color.append('r')
    else:
        edge_color.append('b')
node_color = []
for n in G.nodes():
    if n<500:
        c = 'g'
    elif n<768:
        c = 'r'
    else:
        c - 'y'
    node_color.append(c)
node_size = [counts[:,i].sum()*0.05 for i in range(1400)]

In [None]:
# draw network
plt.figure(figsize=(10,7))
plt.axis('off')
plt.tight_layout()
nx.draw_networkx(G,pos, width=edge_width, edge_color=edge_color, 
                 node_color=node_color, node_size=node_size)

### Aggregate of different patients with different diseases

In [None]:
# load input file
import pickle
with open('data/preprocessed/2014_out_dict.pckl','rb') as f:
    out_dict = pickle.load(f)

In [None]:
# create scores
scores = np.zeros((268,1400))
counts = np.zeros((268,1400),dtype=int)
jids = np.arange(10000) # 100,000 patients
for i,jid in enumerate(jids):
    if i%100==0:
        print(i+1)
    try:
        out_list = out_dict[jid]
        if len(out_list)>1:
            # get list and put into model to get outputs
            input_list,targets = list_to_inputs_targets(out_list)
            inputs = model.list_to_tensor([input_list])
            outputs = model(inputs)
            code = targets[-1]

            # add to scores
            for v, visit in enumerate(input_list):
                for item in visit:
                    val = model.interpret(0,v,item,code).data[0]
                    if np.absolute(val)>=0.01:
                        counts[code,item]+=1
#                     scores[code,item]+= np.absolute(val)
                    scores[code,item]+= val
                    
    except KeyError:
        continue

In [None]:
# normalize scores
normalized_scores = np.array(scores)
row, col = normalized_scores.shape

for i in range(row):
    for j in range(col):
        c = counts[i,j]
        if c>0:
            normalized_scores[i,j] = scores[i,j]/c
print(normalized_scores.max())
print(normalized_scores.min())

In [None]:
with open('data/postprocessed/scores.pckl','wb') as f:
    pickle.dump(scores,f)
with open('data/postprocessed/counts.pckl','wb') as f:
    pickle.dump(counts,f)

In [None]:
# create edgelist
edge_list = []
for code in range(268):
    for i in range(1400):
        val = normalized_scores[code,i]
        if np.absolute(val)>1:
            edge_list.append((i,code+500,{'Weight':val}))
    G = nx.DiGraph()
    G.add_edges_from(edge_list)

In [None]:
# add features
edge_width = [1*np.absolute(G[u][v]['Weight']) for u,v in G.edges()]
edge_color = []
for u,v in G.edges():
    if G[u][v]['Weight']>0:
        edge_color.append('r')
    else:
        edge_color.append('b')
node_color = []
for n in G.nodes():
    if n<500:
        c = 'g'
    elif n<768:
        c = 'r'
    else:
        c - 'y'
    node_color.append(c)
node_size = [counts[:,i].sum()*0.02 for i in range(1400)]

In [None]:
# draw network
plt.figure(figsize=(20,15))
plt.axis('off')
plt.tight_layout()
nx.draw_networkx(G,pos, width=edge_width, edge_color=edge_color, 
                 node_color=node_color, node_size=node_size)

In [None]:
# visualization of a single patient
jid = 12
input_list = []
targets = []
for tup in out_dict[jid]:
    input_list.append(tup[2])
    targets.append(tup[3])
input_list = [input_list]
targets = [targets]

In [None]:
with open('data/dictionaries/sick_idx2desc.pckl','rb') as f:
    s2i = pickle.load(f)

In [None]:
inputs = model.list_to_tensor(input_list)
outputs = model(inputs)
targets = torch.LongTensor(targets).view(len(inputs),-1)[:,-1] # to only use last of each sequence
top5 = outputs.topk(5)[1].data
print("Top 5 predictions: ",top5,[s2i[x+500] for x in top5.cpu().tolist()[0]])
print("Answer: ", targets[0], s2i[targets[0]+500])

In [None]:
input_list

In [None]:
# create random list
x_val = list(np.random.uniform(0,1,1400))
y_val = list(np.random.uniform(0,1,1400))
pos = dict()
for i in range(1400):
    pos[i]=[x_val[i],y_val[i]]

In [None]:
# calculate scores for top5
import numpy as np
scores = np.zeros((5,1400))
for t,pred in enumerate(top5.tolist()[0]):
    for v,visit in enumerate(input_list[0]):
        for i in visit:
            val = model.interpret(0,v,i,pred).data[0]
            scores[t,i] += val

In [None]:
# create edge list
edge_list = []
for t,pred in enumerate(top5.tolist()[0][2:3]):
    pred += 500 # to fit to index of 1400 items
    for i in range(1400):
        val = scores[t,i]
        if np.absolute(val)>0.01:
            edge_list.append((i,pred,{'Weight':val}))
G = nx.Graph()
G.add_edges_from(edge_list)

In [None]:
# import networkx
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# add features
edge_width = [1*np.absolute(G[u][v]['Weight']) for u,v in G.edges()]
edge_color = []
for u,v in G.edges():
    if G[u][v]['Weight']>0:
        edge_color.append('r')
    else:
        edge_color.append('b')
node_color = []
for n in G.nodes():
    if n<500:
        c = 'g'
    elif n<768:
        c = 'r'
    else:
        c - 'y'
    node_color.append(c)

In [None]:
plt.figure(figsize=(10,7))
plt.axis('off')
plt.tight_layout()
nx.draw_networkx(G,pos, width=edge_width, edge_color=edge_color, node_color=node_color)

In [None]:
# interpret
print(s2i[601])
print(s2i[593])

In [None]:
# obtaining overall graph for Alzheimer's disease
import time
start = time.time()
import pickle
with open('data/preprocessed/jid_alzheimer.pckl','rb') as f:
    jids = pickle.load(f)

scores = np.zeros((268,1400))
counts = np.zeros((268,1400),dtype=int)
for jid in jids:
    print(jid)
    input_list = []
    targets = []
    try:
        for tup in out_dict[jid]:
            input_list.append(tup[2])
        if input_list == []:
            continue
        input_list = [input_list]
        target = tup[3]
        inputs = model.list_to_tensor(input_list)
        outputs = model(inputs)
        
        # add to scores
        for v,visit in enumerate(input_list[0]):
            for i in visit:
                counts[target,i]+=1
                val = model.interpret(0,v,i,target).data[0]
                scores[target,i] += val
    except KeyError:
        continue    

In [None]:
scores.shape

In [None]:
# create edge list
edge_list = []
t = 50
for i in range(1400):
    val = scores[t,i]
    if np.absolute(val)>0.01:
        edge_list.append((i,t+500,{'Weight':val}))
G = nx.Graph()
G.add_edges_from(edge_list)

In [None]:
# add features
edge_width = [1*np.absolute(G[u][v]['Weight']) for u,v in G.edges()]
edge_color = []
for u,v in G.edges():
    if G[u][v]['Weight']>0:
        edge_color.append('r')
    else:
        edge_color.append('b')
node_color = []
for n in G.nodes():
    if n<500:
        c = 'g'
    elif n<768:
        c = 'r'
    else:
        c - 'y'
    node_color.append(c)

In [None]:
plt.figure(figsize=(10,7))
plt.axis('off')
plt.tight_layout()
nx.draw_networkx(G,pos, width=edge_width, edge_color=edge_color, node_color=node_color)

In [None]:
normalized_weights = []
for i,row in enumerate(weights):
    normalized_weights.append(row/sqrt[i])
normalized_weights = np.array(normalized_weights)

In [None]:
# sim = np.matmul(weights,weights.transpose())
sim = np.matmul(normalized_weights,normalized_weights.transpose())

In [None]:
plt.figure(figsize=(20,15))
plt.imshow(sim,cmap='hot',interpolation='nearest')
plt.show()