In [None]:
# import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import precision_recall_curve, auc
from matplotlib.pyplot import figure, text
import networkx as nx

import json

import sys
sys.path.append('../')  
import utils

## 1. Analyze the created graph by setting fixed sigma and threshold value

In [None]:
folders = ["s1", "s2", "s3"]
c = 1
norm = "normPower2"

path = "./dtw_matrices/"+folders[c]+"/tr_noAMR_"+norm+".csv"
dtw_AMR  = pd.read_csv(path)

th_kernel = 0.25
th_adj = 0.99

In [None]:
A_AMR = utils.exp_kernel(dtw_AMR, th_kernel)

%matplotlib inline
fig, ax = plt.subplots(1,1, figsize=(12,12))
img = ax.imshow(A_AMR)
fig.colorbar(img)

In [None]:
amr = A_AMR.copy()
amr[np.abs(amr) < th_adj] = 0
print("Number of non-zero values after applying the threshold (AMR)", np.count_nonzero(amr))
print("%" + str((np.count_nonzero(amr) / (amr.shape[0]*amr.shape[0]))*100))

I = np.eye(80)

%matplotlib inline
fig, ax = plt.subplots(1,1, figsize=(12,12))
img = ax.imshow(amr)
fig.colorbar(img)

In [None]:
keys = dtw_AMR.keys()
GDead = utils.build_and_export_graph(np.abs(amr), keys)

pos = nx.spring_layout(GDead, scale=0.1)

edges, weights = zip(*nx.get_edge_attributes(GDead,'weight').items())

options = {
    "edgelist": edges,
    "edge_color": weights,
    "width": np.array(weights) * 1.5,
    "alpha": 1,
}


%matplotlib notebook
fig2, ax2 = plt.subplots(figsize=(10,10))

selfloop_edges = list(nx.selfloop_edges(GDead))

for edge in selfloop_edges:
    GDead.remove_edge(edge[0], edge[1])
    
nx.draw(GDead, pos, **options, ax=ax2)

diffx = 0
diffy = 0
for node, (x, y) in pos.items():
    text((x + diffx), (y + diffy), node, fontsize=15, ha='center', va='center')
plt.tight_layout()

## 2. Analyze based on sigma and threshold the final values of the adjacency matrix 

In [None]:
c = 0

sigma = np.arange(1.25,7.5,0.5)
thresholds = np.arange(0.6, 1, 0.01)
idx_sigma = 6
print("Sigma value:", sigma[idx_sigma])

norm = "normPower2"
path = "./dtw_matrices/"+folders[c]+"/tr_AMR_"+norm+".csv"
dtw_AMR  = pd.read_csv(path)
path = "./dtw_matrices/"+folders[c]+"/tr_noAMR_"+norm+".csv"
dtw_noAMR  = pd.read_csv(path)

A_AMR = utils.exp_kernel(dtw_AMR, sigma[idx_sigma])
A_AMR.columns = A_AMR.columns
A_AMR.index = A_AMR.columns

A_noAMR = utils.exp_kernel(dtw_noAMR, sigma[idx_sigma])
A_noAMR.columns = A_noAMR.columns
A_noAMR.index = A_noAMR.columns

all_values = A_AMR.values.flatten()
unique_values = list(set(all_values))

plt.figure()
plt.hist(all_values, bins=14, edgecolor='black')
plt.title('Histogram of DTW for train (MDR)')
plt.show()

plt.figure()
all_values = A_noAMR.values.flatten()
unique_values = list(set(all_values))
plt.hist(all_values, bins=14, edgecolor='black')
plt.title('Histogram of DTW for train (non-MDR)')
plt.show()

## 3. CREATE THE GRAPHS FOR THE DIFFERENT PROPOSED EXPERIMENTS

## Variables and classification based on the nature

In [None]:
keys = ['AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1', 'CF2', 'CF3', 'CF4', 'Falta',
       'GCC', 'GLI', 'LIN', 'LIP', 'MAC', 'MON', 'NTI', 'OTR', 'OXA', 'PAP',
       'PEN', 'POL', 'QUI', 'SUL', 'TTC', 'hoursVM', 'numberOfPatients_atb',
       'numberOfPatientsMR', 'neighbor_CAR', 'neighbor_PAP', 'neighbor_Falta',
       'neighbor_QUI', 'neighbor_ATF', 'neighbor_OXA', 'neighbor_PEN',
       'neighbor_CF3', 'neighbor_GLI', 'neighbor_CF4', 'neighbor_SUL',
       'neighbor_NTI', 'neighbor_LIN', 'neighbor_AMG', 'neighbor_MAC',
       'neighbor_CF1', 'neighbor_GCC', 'neighbor_POL', 'neighbor_ATI',
       'neighbor_MON', 'neighbor_LIP', 'neighbor_TTC', 'neighbor_OTR',
       'neighbor_CF2', 'neighbor_ATP', 'numberOfPatients_ttl',
       'cambio_postural', 'insulina', 'nutr_art', 'sedacion', 'relajacion',
       'fallo_hepatico', 'fallo_renal', 'fallo_coagulacion',
       'fallo_hemodinamico', 'fallo_respiratorio', 'fallo_multiorganico',
       'n_transf', 'farm_vasoactivos', 'dosis_nems', 'hoursTraqueo',
       'hoursConUlcera', 'hoursConHemo', 'C01 VC acceso periférico 1',
       'C01 VC acceso periférico 2', 'C02 Vía central - YD',
       'C02 Vía central - SD', 'C02 Vía central - SI', 'C02 Vía central - FD',
       'C02 Vía central - YI', 'C02 Vía central - FI', 'hasCatheters']

binary = ['AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1',
       'CF2', 'CF3', 'CF4', 'Falta', 'GCC', 'GLI', 'LIN', 'LIP', 'MAC',
       'MON', 'NTI', 'OTR', 'OXA', 'PAP', 'PEN', 'POL', 'QUI', 'SUL', 'TTC',
       'cambio_postural', 'insulina', 'nutr_art',
       'sedacion', 'relajacion', 'fallo_hepatico', 'fallo_renal',
       'fallo_coagulacion', 'fallo_hemodinamico', 'fallo_respiratorio',
       'fallo_multiorganico', 'hasCatheters', 'farm_vasoactivos', 'C01 VC acceso periférico 1', 'C01 VC acceso periférico 2',
       'C02 Vía central - YD', 'C02 Vía central - SD', 'C02 Vía central - SI',
       'C02 Vía central - FD', 'C02 Vía central - YI', 'C02 Vía central - FI']

continues = ['hoursVM','numberOfPatients_atb', 'numberOfPatientsMR',
       'neighbor_CAR', 'neighbor_PAP', 'neighbor_Falta', 'neighbor_QUI',
       'neighbor_ATF', 'neighbor_OXA', 'neighbor_PEN', 'neighbor_CF3',
       'neighbor_GLI', 'neighbor_CF4', 'neighbor_SUL', 'neighbor_NTI',
       'neighbor_LIN', 'neighbor_AMG', 'neighbor_MAC', 'neighbor_CF1',
       'neighbor_GCC', 'neighbor_POL', 'neighbor_ATI', 'neighbor_MON',
       'neighbor_LIP', 'neighbor_TTC', 'neighbor_OTR', 'neighbor_CF2', 'neighbor_ATP',
       'numberOfPatients_ttl',
       'n_transf', 'dosis_nems',
       'hoursTraqueo', 'hoursConUlcera', 'hoursConHemo']

len(keys)

## Exp. 1: normPower2; Without separating by variables' nature


### Optimizing threshold by class

In [None]:
# PARAMETERES TO OPTIMIZE
sigma = np.arange(3,7.5,0.5)
numberOfTimeStep = 14
thresholds_amr = np.arange(0.6, 1, 0.1)
thresholds_noamr = np.arange(0.6, 1, 0.1)
# lambda
mult_I = [0.01, 0.1, 2]
dicc_metric = {clave: [] for clave in np.round(np.arange(0.6, 1, 0.1),3)}

In [None]:
arr_bestCombination_exp1_2 = []

for c in range(len(folders)):
   
    val_metric = 0
    bestCombination = {'sigma': 0, 'threshold_amr': 0,'threshold_noamr': 0, 'ROC-AUC':0, 'I':0}

    path = "./dtw_matrices/"+folders[c]+"/tr_AMR_"+norm+".csv"
    dtw_AMR  = pd.read_csv(path)
    path = "./dtw_matrices/"+folders[c]+"/tr_noAMR_"+norm+".csv"
    dtw_noAMR  = pd.read_csv(path)

    X_val = np.load("../DATA/"+folders[c]+"/X_val_tensor_"+norm+".npy")
    y_val = pd.read_csv("../DATA/"+folders[c]+"/y_val_tensor_"+norm+".csv")[['individualMRGerm_stac']]
    print("X_val:", X_val.shape)
    print("y_val:", y_val.shape)

    for idx_sigma in range(len(sigma)):
        print("======================", sigma[idx_sigma], "========================================")
        A_AMR = utils.exp_kernel(dtw_AMR, sigma[idx_sigma])
        A_AMR.columns = A_AMR.columns
        A_AMR.index = A_AMR.columns

        A_noAMR = utils.exp_kernel(dtw_noAMR, sigma[idx_sigma])
        A_noAMR.columns = A_noAMR.columns
        A_noAMR.index = A_noAMR.columns

        for idx_thr_amr in range(len(thresholds_amr)):
            for idx_thr_noamr in range(len(thresholds_noamr)):
                I = np.eye(X_val.shape[2])
                amr = A_AMR.copy()
                amr[np.abs(amr) < thresholds_amr[idx_thr_amr]] = 0
                print("Number of non-zero values after applying the threshold (MDR)", np.count_nonzero(amr))
                print("%" + str((np.count_nonzero(amr) / (amr.shape[0]*amr.shape[0]))*100))

                noamr = A_noAMR.copy()
                noamr[np.abs(noamr) < thresholds_noamr[idx_thr_noamr]] = 0
                print("Number of non-zero values after applying the threshold (NO-MDR)", np.count_nonzero(noamr))
                print("%" + str((np.count_nonzero(noamr) / (noamr.shape[0]*noamr.shape[0]))*100))

                amr = amr - I
                noamr = noamr - I

                norma_2 = np.linalg.norm(amr)
                amr = amr / norma_2
                norma_2 = np.linalg.norm(noamr)
                noamr = noamr / norma_2

                for mult_I_idx in range(len(mult_I)):

                    result_by_pat = []

                    for idx_pat in range(X_val.shape[0]):
                        pat = pd.DataFrame(X_val[idx_pat])
                        pat = pat[(pat != 666).all(1)]
                        class_AMR = 0
                        class_noAMR = 0
                        for index in range(pat.shape[0]):
                            vals = pat.loc[index].values
                            class_AMR += np.dot(np.dot(vals, np.linalg.inv(mult_I[mult_I_idx]*I + amr)), vals.reshape(-1, 1))[0]
                            class_noAMR += np.dot(np.dot(vals, np.linalg.inv(mult_I[mult_I_idx]*I + noamr)), vals.reshape(-1, 1))[0]

                        result_by_pat.append([class_AMR/pat.shape[0], class_noAMR/pat.shape[0]])

                    metric_value, y = utils.get_metrics(y_val, result_by_pat, metric_train_val[0])

                    if val_metric < metric_value:
                        val_metric = metric_value
                        bestCombination['sigma'] = sigma[idx_sigma]
                        bestCombination['threshold_amr'] = thresholds_amr[idx_thr_amr]
                        bestCombination['threshold_noamr'] = thresholds_noamr[idx_thr_noamr]
                        bestCombination['I'] = mult_I[mult_I_idx]
                        bestCombination['ROC-AUC'] = val_metric

    arr_bestCombination_exp1_2.append(bestCombination)
    
with open('./best_hyper/results_specific_graphs.json', 'w') as archivo:
    json.dump(arr_bestCombination_exp1_2, archivo)

In [None]:
###########################################################################
# AFTER SELECTING THE BEST HYPERPARAMETERS FOR THE TRAIN/VAL SET
# WE DESIGN THE FINAL MODEL FOR TEST EVALUATION
###########################################################################

with open("./best_hyper/results_specific_graphs.json", "r") as f:
    arr_bestCombination_exp1_2 = json.load(f)
    
    
results_metrics_exp1_2 =  []
metric = ["ROC-AUC", "Sensitivity", "Specificity", "f1score"]

for c in range(len(folders)):
    
    path = "./dtw_matrices/"+folders[c]+"/tr_AMR_"+norm+".csv"
    dtw_AMR  = pd.read_csv(path)
    path = "./dtw_matrices/"+folders[c]+"/tr_noAMR_"+norm+".csv"
    dtw_noAMR  = pd.read_csv(path)

    X_test = np.load("../DATA/" + folders[c] + "/X_test_tensor_"+norm+".npy")
    y_test = pd.read_csv("../DATA/" + folders[c] + "/y_test_"+norm+".csv")    
    y_test = y_test.iloc[0:y_test.shape[0]:numberOfTimeStep].reset_index(drop=True)
   
    I = np.eye(X_test.shape[2])
    A_AMR = utils.exp_kernel(dtw_AMR, arr_bestCombination_exp1_2[c]['sigma'])
    A_AMR.columns = A_AMR.columns
    A_AMR.index = A_AMR.columns
    
    A_noAMR = utils.exp_kernel(dtw_noAMR, arr_bestCombination_exp1_2[c]['sigma'])
    A_noAMR.columns = A_noAMR.columns
    A_noAMR.index = A_noAMR.columns

    amr = A_AMR.copy()
    amr[np.abs(amr) < arr_bestCombination_exp1_2[c]['threshold_amr']] = 0
    noamr = A_noAMR.copy()
    noamr[np.abs(noamr) < arr_bestCombination_exp1_2[c]['threshold_noamr']] = 0
    
    amr = amr - I
    noamr = noamr - I

    norma_2 = np.linalg.norm(amr)
    amr = amr / norma_2
    norma_2 = np.linalg.norm(noamr)
    noamr = noamr / norma_2

    result_by_pat = []
    for idx_pat in range(X_test.shape[0]):
        pat = pd.DataFrame(X_test[idx_pat])
        pat = pat[(pat != 666).all(1)]
        class_AMR = 0
        class_noAMR = 0
        for i in range(pat.shape[0]):
            vals = pat.loc[i].values
            class_AMR += np.dot(np.dot(vals, np.linalg.inv(arr_bestCombination_exp1_2[c]['I']*I + amr)), vals.reshape(-1, 1))[0]
            class_noAMR += np.dot(np.dot(vals, np.linalg.inv(arr_bestCombination_exp1_2[c]['I']*I + noamr)), vals.reshape(-1, 1))[0]

        result_by_pat.append([class_AMR/pat.shape[0], class_noAMR/pat.shape[0]])

    arr_metrics = []
    for m in range(len(metric)):
        val_metric, _ = utils.get_metrics(y_test, result_by_pat, metric[m])
        arr_metrics.append(val_metric)
    
    results_metrics_exp1_2.append(arr_metrics)

with open("results_metrics_BASELINE1", "w") as f:
    json.dump(results_metrics_exp1_2, f)