In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from joblib import Parallel, delayed

import warnings
warnings.filterwarnings("ignore")
import random

import hgd_dtw

numberOfTimeStep = 14

## Heterogeneous Gower Distance
* IMPLEMENTATION OF GOWER DISTANCE CONSIDERING THE NATURE OF EACH VARIABLE, WITH CORRESPONDING NORMALIZATION AND RESCALING

## Features

In [None]:
folders = ["s1", "s2", "s3"]

keys = ['AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1', 'CF2', 'CF3', 'CF4', 'Falta',
        'GCC', 'GLI', 'LIN', 'LIP', 'MAC', 'MON', 'NTI', 'OTR', 'OXA', 'PAP',
        'PEN', 'POL', 'QUI', 'SUL', 'TTC',
        'MV hours', '# pat$_{atb}$', '# pat$_{MDR}$',
        'CAR$_{n}$', 'PAP$_{n}$', 'Falta$_{n}$',
        'QUI$_{n}$', 'ATF$_{n}$', 'OXA$_{n}$', 'PEN$_{n}$',
        'CF3$_{n}$', 'GLI$_{n}$', 'CF4$_{n}$', 'SUL$_{n}$',
        'NTI$_{n}$', 'LIN$_{n}$', 'AMG$_{n}$', 'MAC$_{n}$',
        'CF1$_{n}$', 'GCC$_{n}$', 'POL$_{n}$', 'ATI$_{n}$',
        'MON$_{n}$', 'LIP$_{n}$', 'TTC$_{n}$', 'OTR$_{n}$',
        'CF2$_{n}$', 'ATP$_{n}$', 
        '# pat$_{tot}$',
        'Post change',
        'Insulin', 'Art nutrition', 'Sedation', 'Relax', 'Hepatic$_{fail}$',
        'Renal$_{fail}$', 'Coagulation$_{fail}$', 'Hemodynamic$_{fail}$',
        'Respiratory$_{fail}$', 'Multiorganic$_{fail}$',  '# transfusions',
        'Vasoactive drug', 'Dosis nems', 'Tracheo$_{hours}$', 'Ulcer$_{hours}$',
        'Hemo$_{hours}$', 'C01 PIVC 1',
        'C01 PIVC 2', 'C02 CVC - RJ',
        'C02 CVC - RS', 'C02 CVC - LS', 'C02 CVC - RF',
        'C02 CVC - LJ', 'C02 CVC - LF', '# catheters']

binary = ['AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1',
            'CF2', 'CF3', 'CF4', 'Falta', 'GCC', 'GLI', 'LIN', 'LIP', 'MAC',
            'MON', 'NTI', 'OTR', 'OXA', 'PAP', 'PEN', 'POL', 'QUI', 'SUL', 'TTC',
            'Post change',
            'Insulin', 'Art nutrition', 'Sedation', 'Relax', 'Hepatic$_{fail}$',
            'Renal$_{fail}$', 'Coagulation$_{fail}$', 'Hemodynamic$_{fail}$',
            'Respiratory$_{fail}$', 'Multiorganic$_{fail}$',  'Vasoactive drug']


continuous =  [variable for variable in keys if variable not in binary]

## Considering all irregular MTS
* The entire temporal horizon is treated as a unified entity: CPG

In [None]:
for c in range(len(folders)):

    ########################
    ###### PARAMETERS ######
    ########################
    numberOfTimeStep = 14
    norm = "normPower2"
    ########################

    ####### BEGIN CODE ===>>
    # Load data
    X_train = np.load("../../DATA/" + folders[c] + "/X_train_tensor_" + norm + ".npy")
    
    print("X_train shape before reshape and val:", X_train.shape)
    
    # Reshape data by features --> PxTxF to FxTxP
    X_train = hgd_dtw.reshape_patients_by_features(X_train, keys, numberOfTimeStep)
    
    print("X_train shape after reshape:", X_train.shape)
    
    # Compute Heterogeneous Gower Distance
    result = Parallel(n_jobs=12)(
        delayed(hgd_dtw.compute_hgd_matrix)(X_train, X_train[f1, :, :], X_train[f2, :, :], keys[f1], keys[f2], binary, continuous)
        for f1 in range(X_train.shape[0]) for f2 in range(X_train.shape[0])
    )
    
    # Replace NaNs with zero
    lista_sin_nan = hgd_dtw.replace_nan_with_zero(result)
    
    # Compute DTW distance based on HGD matrix
    dtw_matrix_train = Parallel(n_jobs=96)(
        delayed(hgd_dtw.compute_new_dtw)(lista_sin_nan[i])
        for i in range(X_train.shape[0] * X_train.shape[0])
    )

    data = np.array(dtw_matrix_train).reshape((X_train.shape[0], X_train.shape[0]))
    
    # Save the results as CSV
    pd.DataFrame(data, columns=keys, index=keys).to_csv("./estimatedGraphs/dtw-hgd/" + folders[c] + "/X_train_allMTS_" + norm + ".csv", index=False)


## Considering by time step
* The relationships are assessed independently at each time step

In [None]:
for c in range(len(folders)):
    # Load data
    X_train = np.load("../../DATA/" + folders[c] + "/X_train_tensor_" + norm + ".npy")
    print("X_noAMR_tr shape before reshape and val:", X_train.shape)
    
    # Reshape data by features --> PxTxF to FxTxP
    X_train = hgd_dtw.reshape_patients_by_features(X_train, keys, numberOfTimeStep)
    print("X_train shape after reshape:", X_train.shape)

    for nt in range(numberOfTimeStep):
        # Option 1: Generate a correlation matrix by matrix
        x = X_train[:, nt, :] 
        
        # Initialize an 80x80 distance matrix
        mat_by_time_step = np.zeros((x.shape[0], x.shape[0]))

        for i in range(x.shape[0]):
            for j in range(x.shape[0]):
                mat_by_time_step[i, j] = hgd_dtw.hgd_distance(x[i], x[j], keys[i], keys[j], binary, continuous)

        S = hgd_dtw.diagonal_to_zero(mat_by_time_step)

        pd.DataFrame(S, columns=keys, index=keys).to_csv("./estimatedGraphs/dtw-hgd/" + folders[c] + "/X_train_TS_" + str(nt) + "_" + norm + ".csv", index=False)
