In [1]:
import pandas as pd
import numpy as np

from scipy.sparse import kronsum

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure, text
import networkx as nx

## Generic functions

In [2]:
def exp_kernel(train, sigma):
    """
    Computes the exponential kernel matrix for the given data and sigma.
    
    Parameters:
    train (np.array or pd.DataFrame): The input data matrix.
    sigma (float): The kernel bandwidth parameter.
    
    Returns:
    pd.DataFrame: The computed kernel matrix, rounded to 6 decimal places.
    """
    matrix_train = np.exp(-(train**2) / (2 * (sigma**2)))

    x = pd.DataFrame(matrix_train)
    x = np.round(x, 6)

    eigenvalues = np.linalg.eig(x)
    matrix = np.array(x)
    if not (np.sum(np.abs(eigenvalues[0]) > 0) == x.shape[0]) and (np.array_equal(matrix, matrix.T)):
        print("==============================")
        print("DOES NOT satisfy kernel condition")
        print("==============================")

    return x


### Common Parameters

In [3]:
norm = "robustNorm"
numberOfTimeSteps = 14
debug_plot_figures = True


folders = ["s1", "s2", "s3", "s4"]

keys = ['AMG', 'ATF', 'CF2', 'CF4', 'GCC', 'LIN', 'NTI', 'PEN', 'TTC',
        'pc_enterob', 'pc_pseud', 'isVM', 'numberOfPatients', 'neighbor_ATF', 
        'neighbor_CAR', 'neighbor_CF1', 'neighbor_CF3', 'neighbor_CF4', 'neighbor_GLI',
        'neighbor_MON', 'neighbor_PAP', 'neighbor_SUL']

binary = ['pc_enterob', 'pc_pseud', 'isVM']

continues =  [variable for variable in keys if variable not in binary]

### Threshold value

In [4]:
# Based on the threshold value, you can choose between: 0.6, 0.725, 0.85 and 0.975
threshold_val_init = 0.975
th_folder = "th_0975"
save_results = False

# 1.DTW

In [5]:
buildGraph = "dtw"
numberOfFeatures = 22

### 1. Graph Constructed as tr[expK(DTW)]. Same Graph for Each Time Step
#### A Single Graph for all MTS

In [6]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    dtw = pd.read_csv("../step1_graphEstimation/estimatedGraphs/"+buildGraph+"/"+folders[c]+"/X_train_allMTS.csv")
    K = exp_kernel(dtw, 1.5)
    K = K - np.eye(K.shape[0])
    
    edges_bef = np.count_nonzero(K)
    print("Number of non-zero values before applying the threshold:", edges_bef)
    s = K.copy()
    min_value = s.min().min()
    max_value = s.max().max()
    s = (s - min_value) / (max_value - min_value)
    s[np.abs(s) < threshold_val_init] = 0
    edges_aft = np.count_nonzero(s)
    print("Number of non-zero values after applying the threshold:", edges_aft)
    print("%:", (edges_aft * 100) / (numberOfFeatures * numberOfFeatures))
    print(s.shape)
    
    pd.DataFrame(s).to_csv("./dtw/"+folders[c]+"/graph_Xtr_th_"+str(threshold_val_init)+".csv", index=False)

====> Folder:s1 <====
Number of non-zero values before applying the threshold: 373
Number of non-zero values after applying the threshold: 117
%: 24.173553719008265
(22, 22)
====> Folder:s2 <====
Number of non-zero values before applying the threshold: 368
Number of non-zero values after applying the threshold: 121
%: 25.0
(22, 22)
====> Folder:s3 <====
Number of non-zero values before applying the threshold: 376
Number of non-zero values after applying the threshold: 128
%: 26.446280991735538
(22, 22)
====> Folder:s4 <====
Number of non-zero values before applying the threshold: 377
Number of non-zero values after applying the threshold: 117
%: 24.173553719008265
(22, 22)


### 2. Graph Constructed as tr[expK(DTW)]. Same Graph for Each Time Step
#### A Single Graph for static data

In [7]:
threshold_val_init = 0.15
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    dtw = pd.read_csv("../step1_graphEstimation/estimatedGraphs/"+buildGraph+"/"+folders[c]+"/X_train_STATIC.csv")
    K = exp_kernel(dtw, 0.75)
    K = K - np.eye(K.shape[0])
    
    edges_bef = np.count_nonzero(K)
    print("Number of non-zero values before applying the threshold:", edges_bef)
    s = K.copy()
    min_value = s.min().min()
    max_value = s.max().max()
    s = (s - min_value) / (max_value - min_value)
    s[np.abs(s) < threshold_val_init] = 0
    edges_aft = np.count_nonzero(s)
    print("Number of non-zero values after applying the threshold:", edges_aft)
    print("%:", (edges_aft * 100) / (numberOfFeatures * numberOfFeatures))
    print(s.shape)
    
    pd.DataFrame(s).to_csv("./dtw/"+folders[c]+"/static_graph_Xtr_th_"+str(threshold_val_init)+".csv", index=False)

====> Folder:s1 <====
Number of non-zero values before applying the threshold: 12
Number of non-zero values after applying the threshold: 12
%: 2.479338842975207
(4, 4)
====> Folder:s2 <====
Number of non-zero values before applying the threshold: 12
Number of non-zero values after applying the threshold: 12
%: 2.479338842975207
(4, 4)
====> Folder:s3 <====
Number of non-zero values before applying the threshold: 12
Number of non-zero values after applying the threshold: 12
%: 2.479338842975207
(4, 4)
====> Folder:s4 <====
Number of non-zero values before applying the threshold: 12
Number of non-zero values after applying the threshold: 12
%: 2.479338842975207
(4, 4)


In [8]:
K

Unnamed: 0,Age,Gender,SAPSIIIScore,YearOfAdmission
0,0.0,0.695972,0.980113,0.979627
1,0.695972,0.0,0.832467,0.784231
2,0.980113,0.832467,0.0,0.964986
3,0.979627,0.784231,0.964986,0.0
