In [None]:
import pandas as pd
import numpy as np

from scipy.sparse import kronsum

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure, text
import networkx as nx

## Generic functions

In [None]:
def exp_kernel(train, sigma):
    """
    Computes the exponential kernel matrix for the given data and sigma.
    
    Parameters:
    train (np.array or pd.DataFrame): The input data matrix.
    sigma (float): The kernel bandwidth parameter.
    
    Returns:
    pd.DataFrame: The computed kernel matrix, rounded to 6 decimal places.
    """
    matrix_train = np.exp(-(train**2) / (2 * (sigma**2)))

    x = pd.DataFrame(matrix_train)
    x = np.round(x, 6)

    eigenvalues = np.linalg.eig(x)
    matrix = np.array(x)
    if not (np.sum(np.abs(eigenvalues[0]) > 0) == x.shape[0]) and (np.array_equal(matrix, matrix.T)):
        print("==============================")
        print("DOES NOT satisfy kernel condition")
        print("==============================")

    return x


def buildAndExportGraph(shiftOperator, params):
    """
    Builds a weighted undirected graph from the shift operator matrix and node labels.
    
    Parameters:
    shiftOperator (np.array or pd.DataFrame): The shift operator matrix representing the graph.
    params (list of str): List of node labels corresponding to the matrix indices.
    
    Returns:
    networkx.Graph: The constructed weighted undirected graph.
    """
    aux = pd.DataFrame(data=shiftOperator, columns=params)
    aux.index = params

    # Create an empty graph
    G = nx.Graph()
    
    for i in range(aux.shape[0]):
        for j in range(aux.shape[0]):
            # Add edges iteratively with weights
            G.add_edges_from([(params[i], params[j])], weight=aux.iloc[i, j])
            
    return G


### Common Parameters

In [None]:
norm = "normPower2"
numberOfTimeSteps = 14
debug_plot_figures = True

folders = ["s1", "s2", "s3"]


keys = ['AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1', 'CF2', 'CF3', 'CF4', 'Falta',
        'GCC', 'GLI', 'LIN', 'LIP', 'MAC', 'MON', 'NTI', 'OTR', 'OXA', 'PAP',
        'PEN', 'POL', 'QUI', 'SUL', 'TTC',
        'MV hours', '# pat$_{atb}$', '# pat$_{MDR}$',
        'CAR$_{n}$', 'PAP$_{n}$', 'Falta$_{n}$',
        'QUI$_{n}$', 'ATF$_{n}$', 'OXA$_{n}$', 'PEN$_{n}$',
        'CF3$_{n}$', 'GLI$_{n}$', 'CF4$_{n}$', 'SUL$_{n}$',
        'NTI$_{n}$', 'LIN$_{n}$', 'AMG$_{n}$', 'MAC$_{n}$',
        'CF1$_{n}$', 'GCC$_{n}$', 'POL$_{n}$', 'ATI$_{n}$',
        'MON$_{n}$', 'LIP$_{n}$', 'TTC$_{n}$', 'OTR$_{n}$',
        'CF2$_{n}$', 'ATP$_{n}$', 
        '# pat$_{tot}$',
        'Post change',
        'Insulin', 'Art nutrition', 'Sedation', 'Relax', 'Hepatic$_{fail}$',
        'Renal$_{fail}$', 'Coagulation$_{fail}$', 'Hemodynamic$_{fail}$',
        'Respiratory$_{fail}$', 'Multiorganic$_{fail}$',  '# transfusions',
        'Vasoactive drug', 'Dosis nems', 'Tracheo$_{hours}$', 'Ulcer$_{hours}$',
        'Hemo$_{hours}$', 'C01 PICC 1',
        'C01 PICC 2', 'C02 CVC - RJ',
        'C02 CVC - RS', 'C02 CVC - LS', 'C02 CVC - RF',
        'C02 CVC - LJ', 'C02 CVC - LF', '# catheters']

binary = ['AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1',
            'CF2', 'CF3', 'CF4', 'Falta', 'GCC', 'GLI', 'LIN', 'LIP', 'MAC',
            'MON', 'NTI', 'OTR', 'OXA', 'PAP', 'PEN', 'POL', 'QUI', 'SUL', 'TTC',
            'Post change',
            'Insulin', 'Art nutrition', 'Sedation', 'Relax', 'Hepatic$_{fail}$',
            'Renal$_{fail}$', 'Coagulation$_{fail}$', 'Hemodynamic$_{fail}$',
            'Respiratory$_{fail}$', 'Multiorganic$_{fail}$',  'Vasoactive drug']

continues =  [variable for variable in keys if variable not in binary]

### Threshold value

In [None]:
# Based on the threshold value, you can choose between: 0.6, 0.725, 0.85 and 0.975
threshold_val_init = 0.975
th_folder = "th_0975"
save_results = False

# 1.DTW-HGD

In [None]:
buildGraph = "dtw_matrices"
numberOfFeatures = 80

In [None]:
params = {"s1": {"sigma": 3.5, "threshold_amr": 0.6, "threshold_noamr": 0.8999999999999999, "ROC-AUC": 57.3574, "I": 0.01}, 
          "s2": {"sigma": 5.5, "threshold_amr": 0.7999999999999999, "threshold_noamr": 0.7, "ROC-AUC": 57.3134, "I": 0.1}, 
          "s3": {"sigma": 4.5, "threshold_amr": 0.7, "threshold_noamr": 0.8999999999999999, "ROC-AUC": 57.5851, "I": 0.1}
}

### 1. AMR population


In [None]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    dtw = pd.read_csv("./"+buildGraph+"/"+folders[c]+"/tr_AMR_"+norm+".csv")
    K = exp_kernel(dtw, params[folders[c]]['sigma'])
    K = K - np.eye(K.shape[0])
    
    edges_bef = np.count_nonzero(K)
    print("Number of non-zero values before applying the threshold:", edges_bef)
    s = K.copy()
    min_value = s.min().min()
    max_value = s.max().max()
    s = (s - min_value) / (max_value - min_value)
    s[np.abs(s) < params[folders[c]]['threshold_amr']] = 0
    edges_aft = np.count_nonzero(s)
    print("Number of non-zero values after applying the threshold:", edges_aft)
    print("%:", (edges_aft * 100) / (numberOfFeatures * numberOfFeatures))
    dicc_thresholds[folders[c]] = (edges_aft * 100) / (numberOfFeatures * numberOfFeatures)
    
    if debug_plot_figures:
        %matplotlib inline
        plt.figure()
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
        # Image configuration
        img = ax.imshow(s, extent=[0, numberOfFeatures, numberOfFeatures, 0])
        ax.set_xticks(range(0, numberOfFeatures+1, 5))  
        ax.set_yticks(range(0, numberOfFeatures+1, 5))  
        ax.set_xticklabels(range(0, numberOfFeatures+1, 5), rotation=90, fontsize=23) 
        ax.set_yticklabels(range(0, numberOfFeatures+1, 5), fontsize=23)
        cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
        cbar.ax.tick_params(labelsize=23)
        plt.tight_layout(pad=0)
        #plt.savefig("./Figures/" +th_folder +"/"+ folders[c] + "/"+buildGraph + "_AdjtoProdGraph.pdf", format='pdf', bbox_inches='tight')
        plt.show()
    
        # Create a NetworkX graph from the adjacency matrix
        G = nx.from_numpy_array(np.array(s), create_using=nx.DiGraph)
        # Determine the position of the nodes
        pos = nx.spring_layout(G, 0.85)
        # Get the nodes with at least one edge
        nodes_with_edges = [node for node, degree in G.degree() if degree > 0]
        # Create a subgraph with only the nodes that have edges
        G_sub = G.subgraph(nodes_with_edges)
        # Get the edge attributes in the subgraph
        edges, weights = zip(*nx.get_edge_attributes(G_sub, 'weight').items())

        print("Number of edges:", len(edges))

        options = {
            "edgelist": edges,
            "edge_color": weights,
            "width": np.array(weights) * 1,
            "alpha": 1,
        }

        node_colors = ['lightblue' if keys[node] in binary else 'lightgreen' for node in G_sub.nodes()]

        labels = {node: str(node) for node in G_sub.nodes()}

        pos = nx.spring_layout(G_sub, k=4.0, iterations=100)
        fig2, ax2 = plt.subplots(figsize=(20, 12))

        nx.draw_networkx(
            G_sub, 
            pos, 
            node_color=node_colors,
            node_size=3000,
            font_size=35,
            with_labels=True,  
            **options, 
            ax=ax2
        )
    
        ax2.set_xticks([])  
        ax2.set_yticks([])  
        ax2.set_frame_on(False) 
        plt.axis("off")  

        plt.tight_layout()
        plt.savefig("./figures/"+folders[c] + "_"+buildGraph + "_AMRGraph.pdf", format='pdf', bbox_inches='tight')
        plt.show()

## Non-AMR population

In [None]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    dtw = pd.read_csv("./"+buildGraph+"/"+folders[c]+"/tr_noAMR_"+norm+".csv")
    K = exp_kernel(dtw, params[folders[c]]['sigma'])
    K = K - np.eye(K.shape[0])
    
    edges_bef = np.count_nonzero(K)
    print("Number of non-zero values before applying the threshold:", edges_bef)
    s = K.copy()
    min_value = s.min().min()
    max_value = s.max().max()
    s = (s - min_value) / (max_value - min_value)
    s[np.abs(s) < params[folders[c]]['threshold_noamr']] = 0
    edges_aft = np.count_nonzero(s)
    print("Number of non-zero values after applying the threshold:", edges_aft)
    print("%:", (edges_aft * 100) / (numberOfFeatures * numberOfFeatures))
    dicc_thresholds[folders[c]] = (edges_aft * 100) / (numberOfFeatures * numberOfFeatures)
    
    if debug_plot_figures:
        %matplotlib inline
        plt.figure()
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
        # Image configuration
        img = ax.imshow(s, extent=[0, numberOfFeatures, numberOfFeatures, 0])
        ax.set_xticks(range(0, numberOfFeatures+1, 5))  
        ax.set_yticks(range(0, numberOfFeatures+1, 5))  
        ax.set_xticklabels(range(0, numberOfFeatures+1, 5), rotation=90, fontsize=23) 
        ax.set_yticklabels(range(0, numberOfFeatures+1, 5), fontsize=23)
        cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
        cbar.ax.tick_params(labelsize=23)
        plt.tight_layout(pad=0)
        #plt.savefig("./Figures/" +th_folder +"/"+ folders[c] + "/"+buildGraph + "_AdjtoProdGraph.pdf", format='pdf', bbox_inches='tight')
        plt.show()
    
        # Create a NetworkX graph from the adjacency matrix
        G = nx.from_numpy_array(np.array(s), create_using=nx.DiGraph)
        # Determine the position of the nodes
        pos = nx.spring_layout(G, 0.85)
        # Get the nodes with at least one edge
        nodes_with_edges = [node for node, degree in G.degree() if degree > 0]
        # Create a subgraph with only the nodes that have edges
        G_sub = G.subgraph(nodes_with_edges)
        # Get the edge attributes in the subgraph
        edges, weights = zip(*nx.get_edge_attributes(G_sub, 'weight').items())

        print("Number of edges:", len(edges))

        options = {
            "edgelist": edges,
            "edge_color": weights,
            "width": np.array(weights) * 1,
            "alpha": 1,
        }

        node_colors = ['lightblue' if keys[node] in binary else 'lightgreen' for node in G_sub.nodes()]

        labels = {node: str(node) for node in G_sub.nodes()}
        
        pos = nx.spring_layout(G_sub, k=4.0, iterations=100)
        fig2, ax2 = plt.subplots(figsize=(20, 12))

        nx.draw_networkx(
            G_sub, 
            pos, 
            node_color=node_colors,
            node_size=3000,
            font_size=35,
            with_labels=True, 
            **options, 
            ax=ax2
        )
    
        ax2.set_xticks([])  
        ax2.set_yticks([])  
        ax2.set_frame_on(False) 
        plt.axis("off")  

        plt.tight_layout()
        plt.savefig("./figures/"+folders[c] + "_"+buildGraph + "_nonAMRGraph.pdf", format='pdf', bbox_inches='tight')
        plt.show()

## Population graph 

In [None]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    s = pd.read_csv(f"../E2_Standard-GCN/dtw_matrices/"+folders[c]+"/tr_AMR_"+norm+"_sparse.csv")

    
    if debug_plot_figures:
        %matplotlib inline
        plt.figure()
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
        # Image configuration
        img = ax.imshow(s, extent=[0, numberOfFeatures, numberOfFeatures, 0])
        ax.set_xticks(range(0, numberOfFeatures+1, 5))  
        ax.set_yticks(range(0, numberOfFeatures+1, 5))  
        ax.set_xticklabels(range(0, numberOfFeatures+1, 5), rotation=90, fontsize=23) 
        ax.set_yticklabels(range(0, numberOfFeatures+1, 5), fontsize=23)
        cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
        cbar.ax.tick_params(labelsize=23)
        plt.tight_layout(pad=0)
        #plt.savefig("./Figures/" +th_folder +"/"+ folders[c] + "/"+buildGraph + "_AdjtoProdGraph.pdf", format='pdf', bbox_inches='tight')
        plt.show()
    
        # Create a NetworkX graph from the adjacency matrix
        G = nx.from_numpy_array(np.array(s), create_using=nx.DiGraph)
        # Determine the position of the nodes
        pos = nx.spring_layout(G, 0.85)
        # Get the nodes with at least one edge
        nodes_with_edges = [node for node, degree in G.degree() if degree > 0]
        # Create a subgraph with only the nodes that have edges
        G_sub = G.subgraph(nodes_with_edges)
        # Get the edge attributes in the subgraph
        edges, weights = zip(*nx.get_edge_attributes(G_sub, 'weight').items())

        print("Number of edges:", len(edges))

        options = {
            "edgelist": edges,
            "edge_color": weights,
            "width": np.array(weights) * 1,
            "alpha": 1,
        }

        node_colors = ['lightblue' if keys[node] in binary else 'lightgreen' for node in G_sub.nodes()]

        labels = {node: str(node) for node in G_sub.nodes()}

        pos = nx.spring_layout(G_sub, k=4.0, iterations=100)
        fig2, ax2 = plt.subplots(figsize=(20, 12))

        nx.draw_networkx(
            G_sub, 
            pos, 
            node_color=node_colors,
            node_size=3000,
            font_size=35,
            with_labels=True,  
            **options, 
            ax=ax2
        )
    
        ax2.set_xticks([])  
        ax2.set_yticks([])  
        ax2.set_frame_on(False) 
        plt.axis("off")  

        # Ajustar y guardar
        plt.tight_layout()
        plt.savefig("./figures/"+folders[c] + "_"+buildGraph + "_popGraph.pdf", format='pdf', bbox_inches='tight')

In [None]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    dtw = pd.read_csv("./"+buildGraph+"/"+folders[c]+"/tr_AMR_"+norm+".csv")
    K = exp_kernel(dtw, params[folders[c]]['sigma'])
    K = K - np.eye(K.shape[0])
    
    edges_bef = np.count_nonzero(K)
    print("Number of non-zero values before applying the threshold:", edges_bef)
    s = K.copy()
    min_value = s.min().min()
    max_value = s.max().max()
    s = (s - min_value) / (max_value - min_value)
    s[np.abs(s) < params[folders[c]]['threshold_amr']] = 0

    G = nx.from_numpy_array(np.array(s), create_using=nx.DiGraph)
    degree_centrality = nx.degree_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G)

    # Identify top important nodes
    top_nodes = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top important nodes by degree centrality:", top_nodes)


In [None]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    dtw = pd.read_csv("./"+buildGraph+"/"+folders[c]+"/tr_noAMR_"+norm+".csv")
    K = exp_kernel(dtw, params[folders[c]]['sigma'])
    K = K - np.eye(K.shape[0])
    
    edges_bef = np.count_nonzero(K)
    print("Number of non-zero values before applying the threshold:", edges_bef)
    s = K.copy()
    min_value = s.min().min()
    max_value = s.max().max()
    s = (s - min_value) / (max_value - min_value)
    s[np.abs(s) < params[folders[c]]['threshold_noamr']] = 0
    
    G = nx.from_numpy_array(np.array(s), create_using=nx.DiGraph)
    degree_centrality = nx.degree_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G)

    # Identify top important nodes
    top_nodes = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top important nodes by degree centrality:", top_nodes)


In [None]:
dicc_thresholds = {}
for c in range(len(folders)):
    print("====> Folder:" + str(folders[c]) + " <====")
    s = pd.read_csv(f"../E2_Standard-GCN/dtw_matrices/"+folders[c]+"/tr_AMR_"+norm+"_sparse.csv")

    G = nx.from_numpy_array(np.array(s), create_using=nx.DiGraph)
    degree_centrality = nx.degree_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G)

    # Identify top important nodes
    top_nodes = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print("Top important nodes by degree centrality:", top_nodes)
