---
# 1. Data-Processing
---

In [8]:
import pandas as pd
import numpy as np

np.random.seed(42)

import warnings
warnings.filterwarnings('ignore')

from tensorflow.keras.models import load_model


import altair as alt

# Definir un tema personalizado "feedzai"
def feedzai_theme():
    return {
        "config": {
            "view": {"continuousWidth": 400, "continuousHeight": 300},
            "mark": {"color": "steelblue"},
        }
    }

# Registrar el tema
alt.themes.register("feedzai", feedzai_theme)
alt.themes.enable("feedzai")

import sys
sys.path.append("../src")

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [None]:
pip uninstall timeshap

Found existing installation: timeshap 1.0.4
Uninstalling timeshap-1.0.4:
  Would remove:
    /home/oescudero/.local/lib/python3.8/site-packages/timeshap-1.0.4.dist-info/*
    /home/oescudero/.local/lib/python3.8/site-packages/timeshap/*
Proceed (Y/n)? 

In [9]:
timeSteps = 14
norm = "robustNorm"


features =  ['timestamp', 'AMG', 'CAR', 'CF1', 'CF3', 'CF4',
            'Others', 'GLI', 'LIN', 'LIP', 'MAC', 'NTI', 'OXA', 'PAP', 'PEN', 'POL',
            'QUI', 'SUL', 'MV hours', 'Acinet$_{pc}$', 'Enterobac$_{pc}$', 'Enteroc$_{pc}$',
            'Pseud$_{pc}$', 'Staph$_{pc}$', 'Others$_{pc}$', 'ICU$_{hours}$',
            '# pat$_{atb}$', '# pat$_{MDR}$',
            'CAR$_{n}$', 'PAP$_{n}$', 
            'Others$_{n}$', 'QUI$_{n}$',
            'OXA$_{n}$', 'PEN$_{n}$', 'CF3$_{n}$', 'GLI$_{n}$',
            'CF4$_{n}$', 'SUL$_{n}$', 'NTI$_{n}$', 'LIN$_{n}$',
            'AMG$_{n}$', 'MAC$_{n}$', 'CF1$_{n}$', 'POL$_{n}$',
            'LIP$_{n}$', '# pat$_{tot}$' ,'Post change',
            'Insulin', 'Art nutrition', 'Sedation', 'Relax', 'Hepatic$_{fail}$',
            'Renal$_{fail}$', 'Coagulation$_{fail}$', 'Hemodynamic$_{fail}$',
            'Respiratory$_{fail}$', 'Multiorganic$_{fail}$',  '# transfusions',
            'Vasoactive drug', 'Dosis nems', 'Tracheo$_{hours}$', 'Ulcer$_{hours}$',
            'Hemo$_{hours}$', 'C01 PIVC 1',
            'C01 PIVC 2', 'C02 CVC - RJ',
            'C02 CVC - RS', 'C02 CVC - LS', 'C02 CVC - RF',
            'C02 CVC - LJ', 'C02 CVC - LF', '# catheters']

tf =  ['none', 'discreta', 'discreta', 'discreta', 'discreta', 'discreta', 
        'discreta', 'discreta', 'discreta', 'discreta', 'discreta',
        'discreta', 'discreta', 'discreta', 'discreta', 'discreta',
        'discreta', 'discreta', 'continua', 'discreta',  
        'discreta', 'discreta', 'discreta', 'discreta', 'discreta', 'continua',
        'continua', 'continua', 'continua',
        'continua', 'continua', 'continua', 'continua',
        'continua', 'continua', 'continua', 'continua',
        'continua', 'continua', 'continua',
        'continua', 'continua', 'continua', 'continua',
        'continua', 'continua', 'discreta', 'discreta',
        'discreta', 'discreta', 'discreta', 'discreta', 'discreta',
        'discreta', 'discreta', 'discreta',
        'discreta', 'continua', 'discreta', 'continua',
        'continua', 'continua', 'continua',
        'continua', 'continua', 'continua', 'continua', 'continua',
        'continua', 'continua', 'continua','continua']

idxs_cont = [i for i, value in enumerate(tf) if value == 'continua']
continuos_features = [features[idx] for idx in idxs_cont]
idxs_dis = [i for i, value in enumerate(tf) if value == 'discreta']
discrete_features = [features[idx] for idx in idxs_dis]
len(features)

72

---
# 2. Load RNN model
---

In [10]:
def weighted_binary_crossentropy(hyperparameters):
    w1 = hyperparameters["w1"]
    w2 = hyperparameters["w2"]
    """
    Binary form of weighted binary cross entropy.
      WBCE(p_t) = -w * (1 - p_t)* log(p_t)
      where p = sigmoid(x), p_t = p or 1 - p depending on if the label is 1 or 0, respectively.
    Usage:
     model.compile(loss=[weighted_binary_crossentropyv2(hyperparameters)], metrics=["accuracy"], optimizer=adam)
    """
    def loss(y_true, y_pred):
        """
        :param y_true: A tensor of the same shape as `y_pred`
        :param y_pred:  A tensor resulting from a sigmoid
        :return: Output tensor.
        """
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

        epsilon = K.epsilon()
        # clip to prevent NaN's and Inf's
        pt_1 = K.clip(pt_1, epsilon, 1. - epsilon)
        pt_0 = K.clip(pt_0, epsilon, 1. - epsilon)

        return -K.sum(w1 * (1. - pt_1) * K.log(pt_1)) \
               -K.sum(w2 * (pt_0) * K.log(1. - pt_0))

    return loss

In [11]:
y_pred_by_split = {}
for i in range(1,4):
    
    # Step 1. Load data
    X_train = np.load(f"../../DATA/MDR/s{i}/X_train_tensor_0{norm}.npy")
    X_test = np.load(f"../../DATA/MDR/s{i}/X_test_tensor_{norm}.npy")
    
    y_test = pd.read_csv(f"../../DATA/MDR/s{i}/y_test_tensor_"+norm+".csv")

    model = load_model('./model_split_1.h5', custom_objects={'loss': weighted_binary_crossentropy})

    model.summary()
    y_pred = model.predict(x=X_test)
    y_pred_by_split[str(i)] = y_pred
    
    break


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 14, 71)]          0         
                                                                 
 masking (Masking)           (None, 14, 71)            0         
                                                                 
 gru (GRU)                   (None, 3)                 666       
                                                                 
 dense (Dense)               (None, 1)                 3         
                                                                 
Total params: 669 (2.61 KB)
Trainable params: 669 (2.61 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [12]:
# Como timeSHAP no trata con irregular MTS, aplico zero padding
X_train[X_train == 666] = 0
# Creo una columna con los timestamp
new_column = np.tile(np.arange(timeSteps), (X_train.shape[0], 1)) 
# Expandir para alinearse con la nueva dimensión
new_column = new_column[..., np.newaxis]  # Forma (1961, 14, 1)
# Paso 3: Concatenar la nueva columna al tensor original
X_train = np.concatenate((new_column, X_train), axis=2) 

X_train_re = X_train.reshape(-1, X_train.shape[2])
d_train_normalized = pd.DataFrame(X_train_re, columns=features)
d_train_normalized['id'] = 'id'

---
# 3. TimeSHAP
---

In [13]:
import TFWrapper
from timeshap.wrappers import TorchModelWrapper
model_wrapped = TFWrapper.KerasModelWrapper(model)
f_hs = lambda x, y=None: model_wrapped.predict_last_hs(x, y)

### Baseline event

In [14]:
from timeshap.utils import calc_avg_event
average_event = calc_avg_event(d_train_normalized, numerical_feats=continuos_features, categorical_feats=discrete_features)

### Baseline Sequence

In [15]:
from timeshap.utils import calc_avg_sequence
average_sequence = calc_avg_sequence(d_train_normalized, numerical_feats=continuos_features, categorical_feats=discrete_features, model_features=features, entity_col=['id'])

## 3.1 Local Explanations

In [16]:
# AQUI DEBERIA SER DE TEST
pos_x_data = d_train_normalized.iloc[0:14]
# convert the instance to numpy so TimeSHAP receives it
pos_x_data = np.expand_dims(pos_x_data.to_numpy().copy(), axis=0)
pos_x_data = pos_x_data[:,:,1:-1]

from timeshap.explainer import local_report

### Local Report  API

In [17]:
def create_dicc(lista):
    dicc = {}
    for elemento in lista:
        dicc[elemento] = elemento
    return dicc

features_new = features[1:]
diccionario = create_dicc(features_new)

# local report parameters
pruning_dict = {'tol': 0.00} # I do not want to activate prunning 
event_dict = {'rs': 42, 'nsamples': 3200}
feature_dict = {'rs': 42, 'nsamples': 3200, 'feature_names': features_new, 'plot_features': diccionario}
cell_dict = {'rs': 42, 'nsamples': 3200, 'top_x_feats': 8, 'top_x_events': 80}

In [18]:
local_report(f_hs, pos_x_data, pruning_dict, event_dict, feature_dict,
             cell_dict=cell_dict, entity_uuid='adb', entity_col='id', baseline=average_event)

Assuming all features are model features
