In [None]:
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, GRU, Dropout, Dense
from tensorflow.keras import backend as K
import pickle
import sklearn 
from sklearn import metrics
from sklearn.metrics import confusion_matrix, f1_score

import random, os, json
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

from joblib import Parallel, delayed
import multiprocessing
import gc 
import sys
sys.path.append("../../../libraries/")
import utils

In [1]:
import tensorflow as tf
print(tf.__version__)

2024-11-26 11:48:19.730295: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-26 11:48:19.770219: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-26 11:48:19.771030: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.13.1


# Mask with optimization and training

In [None]:
class Mask:
    """This class allows to fit and interact with dynamic masks.
    Attributes:
        perturbation (attribution.perturbation.Perturbation):
            An object of the Perturbation class that uses the mask to generate perturbations.
        device: The device used to work with the torch tensors.
        verbose (bool): True is some messages should be displayed during optimization.
        random_seed (int): Random seed for reproducibility.
        deletion_mode (bool): True if the mask should identify the most impactful deletions.
        eps (float): Small number used for numerical stability.
        mask_tensor (torch.tensor): The tensor containing the mask coefficients.
        T (int): Number of time steps.
        N_features (int): Number of features.
        hist (torch.tensor): History tensor containing the metrics at different epochs.
        task (str): "classification" or "regression".
    """

    def __init__(
        self,
        verbose: bool = False,
        random_seed: int = 42,
        deletion_mode: bool = False,
        eps: float = 1.0e-7,
    ):

        self.verbose = verbose
        self.random_seed = random_seed
        self.deletion_mode = deletion_mode
        self.eps = eps
        self.mask_model  = None
        self.T = None
        self.N_features = None
        self.model = None
        self.n_epoch = None
        self.loss_function = None

        
    class FadeMovingAverageWindow(tf.keras.layers.Layer):
        def __init__(self, 
                     w_mask=None,
                     window_size=2,
                     mask_value=666,
                     initial_mask_coeff=1,
                     allow_training=False,
                     **kwargs) :
            self.w_mask = w_mask
            self.window_size = window_size
            self.mask_value = mask_value
            self.initial_mask_coeff = initial_mask_coeff

            self.allow_training = allow_training
            self.supports_masking = True

            assert allow_training or (w_vec is not None), \
                "ERROR: non-trainable w_vec must be initialized"
            super().__init__(**kwargs)
            return

        def get_config(self):
            config = super().get_config().copy()
            config.update({
                'w_mask': self.w_mask,
                'window_size': self.window_size,
                'mask_value': self.mask_value,
                'initial_mask_coeff': self.initial_mask_coeff,
                'allow_training': self.allow_training,
                'supports_masking': self.supports_masking,
            })
            return config

        
        def build(self, input_shape):
            batch_size, nun_time_steps, num_feats = input_shape
            initializer = tf.keras.initializers.glorot_uniform(seed=42)
            self.w_mask = self.add_weight(shape=(1, nun_time_steps, num_feats),
                                         name='w_mask',
                                         initializer=initializer,
                                         trainable=self.allow_training,
                                         constraint=lambda x: tf.clip_by_value(x, 0, 1))
            super().build(input_shape)
            return

        def call(self, x, mask=None):
            # Masking the missing values with 0s
            if mask != None:
                mask_matrix = tf.transpose((tf.ones([x.shape[2], 1, 1]) * tf.cast(mask, "float")), perm=[1, 2, 0])
                x_masked = tf.where(mask_matrix == 1.0, x, 0.0)
            else:
                x_masked = x.copy()


            #Creating the filter_coefs of the moving average window
            T = x_masked.shape[1]
            T_axis = tf.range(1.0, T + 1, delta=1)
            T1_tensor = tf.expand_dims(T_axis, axis=[1])
            T2_tensor = tf.expand_dims(T_axis, axis=[0])
            filter_coefs = (T1_tensor - T2_tensor)
            filter_coefs = (filter_coefs >= 0) & (filter_coefs <= self.window_size) 
            filter_coefs = tf.cast(filter_coefs, tf.float32)
            filter_coefs = filter_coefs / tf.transpose(tf.reshape(tf.math.reduce_sum(filter_coefs, axis=1), (1, filter_coefs.shape[0])))

            
            
            #Applying the filter of moving average window
            x_avg = tf.linalg.matmul(filter_coefs, x_masked)

            # The perturbation is an affine combination of the input and the previous tensor weighted by the mask
            x_pert = x_avg + self.w_mask * (x_masked - x_avg)


            if mask != None:
                x_pert_masked = tf.where(mask_matrix == 1.0, x_pert, self.mask_value * tf.ones_like(x_pert))
                return x_pert_masked
            else:
                return x_pert 
        
    def compute_loss(
        mask_model,
        T,
        N_features, 
        y_pred_pert, 
        y_pred,
        loss_fn, 
        error_factor,
        reg_factor, 
        time_reg_factor, 
        reg_ref
    ):    
        mask_values = mask_model.get_weights()[0]
        mask_sorted = tf.sort(tf.reshape(mask_values, shape=(T * N_features)))
        size_reg = tf.math.reduce_mean((reg_ref - mask_sorted) ** 2).numpy()
        time_reg = tf.math.reduce_mean(
            tf.abs(mask_values[0, 1:T, :] - mask_values[0, :(T-1), :])
        )
        error = loss_fn(y_pred_pert, y_pred)
        loss = error_factor * error + reg_factor * size_reg + time_reg_factor * time_reg                    
    
        return loss
    
    def build_model(
        initial_mask_coeff, 
        n_time_steps, 
        n_features, 
        mask_value,
        window_size,
        sigma_max=2.0,
        eps=1.0e-7
    ):
        inputs = tf.keras.Input(shape=(n_time_steps, n_features), name="original_input")
        masked = tf.keras.layers.Masking(mask_value=mask_value)(inputs)
        perturbed_output = Mask.FadeMovingAverageWindow(w_mask=None,
                                                        window_size = window_size,
                                                        mask_value = mask_value,
                                                        initial_mask_coeff=initial_mask_coeff,
                                                        allow_training=True)(masked)
        model = tf.keras.Model(inputs=inputs, outputs=[inputs, perturbed_output])
        
        return model

    # Mask Optimization
    def fit(
        self,
        x_train,
        x_val,
        model,
        n_epochs: int = 500,
        keep_ratio: float = 0.5,
        initial_mask_coeff: float = 0.5,
        size_reg_factor_init: float = 0.5,
        size_reg_factor_dilation: float = 100,
        time_reg_factor: float = 0,
        
        sigma_max: float = 2.0,
        eps: float = 1.0e-7,
        window_size=2,
        
        mask_value = 666,
        peek_duration = 5,
        min_delta = 0.00001,
        learning_rate: float = 1.0e-3,
        momentum: float = 0.9,
    ):
        """This method fits a mask to the input X for the black-box function f.
        Args:
            X: Input matrix (as a T*N_features torch tensor).
            f: Black-box (as a map compatible with torch tensors).
            target: If the output to approximate is different from f(X), it can be specified optionally.
            n_epoch: Number of steps for the optimization.
            keep_ratio: Fraction of elements in X that should be kept by the mask (called a in the paper).
            initial_mask_coeff: Initial value for the mask coefficient (called lambda_0 in the paper).
            size_reg_factor_init: Initial coefficient for the regulator part of the total loss.
            size_reg_factor_dilation: Ratio between the final and the initial size regulation factor
                (called delta in the paper).
            time_reg_factor: Regulation factor for the variation in time (called lambda_a in the paper).
            learning_rate: Learning rate for the torch SGD optimizer.
            momentum: Momentum for the SGD optimizer.
        Returns:
            None
        """
        self.model = model
        self.n_epochs = n_epochs
        _, self.T, self.N_features = x_train._flat_shapes[0]
        
        reg_factor = size_reg_factor_init
        error_factor = 1 - 2 * self.deletion_mode  # In deletion mode, the error has to be maximized
        reg_multiplicator = np.exp(np.log(size_reg_factor_dilation) / n_epochs)
        # print(reg_multiplicator)
        # Initializing the reference vector used in the size regulator (called r_a in the paper)
        reg_ref = tf.zeros(int((1 - keep_ratio) * self.T * self.N_features))
        reg_ref = tf.concat(
            (reg_ref, tf.ones(self.T * self.N_features - reg_ref.shape[0])), axis=0
        )      
        # The initial mask model is defined with the initial mask coefficient
        mask_model = Mask.build_model(initial_mask_coeff, 
                                      self.T, self.N_features,
                                      window_size=window_size,
                                      mask_value=mask_value)
        # Instantiate an optimizer.
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)
        # Instantiate a loss function.
        loss_fn = tf.keras.losses.BinaryCrossentropy()
        
        # Run the optimization
        hist_train = []
        hist_bce_train = []
        hist_val = []
        hist_bce_val = []
        hist_bce_val_peek = []

        stop_training = False
        flag_peek = False
        past_loss_bce = np.inf
        min_value = np.inf
        counter=0
        for epoch in range(n_epochs):
            # Iterate over the batches of the dataset.
            tf.keras.backend.clear_session()
            v_loss_train = []
            v_loss_bce_train = []
            for step, (x_batch_train, y_batch_train) in enumerate(x_train):
                # Open a GradientTape to record the operations run during the forward pass, which enables auto-differentiation.
                with tf.GradientTape() as tape:
                    # Apply the mask_model to correct the perturbations.
                    x_batch, x_batch_pert = mask_model(x_batch_train, training=True)
                    
                    # Get the predictions using the black-box model
                    y_pred_pert = self.model(x_batch_pert, training=False)
                    y_pred = self.model(x_batch_train, training=False)
                                        
                    # Compute the loss value for this batch.
                    loss = Mask.compute_loss(mask_model, 
                                             self.T, self.N_features, 
                                             y_pred_pert, y_pred, 
                                             loss_fn, error_factor, reg_factor, time_reg_factor, reg_ref)
                    v_loss_train.append(loss)
                    
                    loss_bce = loss_fn(y_pred_pert, y_pred)                
                    v_loss_bce_train.append(loss_bce)
                # Use the gradient tape to automatically retrieve
                # the gradients of the trainable variables with respect to the loss.
                grads = tape.gradient(loss, mask_model.trainable_weights)
                # Run one step of gradient descent by updating
                # the value of the variables to minimize the loss.
                optimizer.apply_gradients(zip(grads, mask_model.trainable_weights))
                # Memory cleanup after processing the batch
                del x_batch_train, y_batch_train
                gc.collect()  # Force garbage collection
            
            v_loss_val = []
            v_loss_bce_val = []
            for step, (x_batch_val, y_batch_val) in enumerate(x_val):
                # Apply the mask_model to correct the perturbations.
                x_batch, x_batch_pert = mask_model(x_batch_val, training=False)
                # Get the predictions using the black-box model
                y_pred_pert = self.model(x_batch_pert, training=False)
                y_pred = self.model(x_batch_val, training=False)
                
                # Compute the loss value for this batch.
                loss = Mask.compute_loss(mask_model, 
                                         self.T, self.N_features, 
                                         y_pred_pert, y_pred, 
                                         loss_fn, error_factor, reg_factor, time_reg_factor, reg_ref)
                v_loss_val.append(loss)
                
                loss_bce = loss_fn(y_pred_pert, y_pred)                
                v_loss_bce_val.append(loss_bce)
                # Memory cleanup after validation
                del x_batch_val, y_batch_val
                gc.collect()  # Force garbage collection
                
            self.mask_model = mask_model

            # Memory cleanup and garbage collection after training and validation
            gc.collect()  # Force garbage collection
            tf.keras.backend.clear_session()  # Clear session to free up memory

            stop_training, flag_peek,  past_loss_bce, min_value, hist_bce_val, hist_bce_val_peek, counter = check_early_stopping(
                np.array(v_loss_bce_val).mean(), past_loss_bce, min_value, hist_bce_val, hist_bce_val_peek,
                flag_peek, peek_duration, counter, min_delta,
                mask_model
            )
            
            if stop_training:
                break
            
            # print("reg_factor", reg_factor)
            reg_factor *= reg_multiplicator
            
            hist_train.append(np.array(v_loss_train).mean())
            hist_val.append(np.array(v_loss_val).mean())
            hist_bce_train.append(np.array(v_loss_bce_train).mean())     
        
        return np.array(hist_train), np.array(hist_val), np.array(hist_bce_train), np.array(hist_bce_val)

In [None]:
def check_early_stopping(
    loss_bce, past_loss_bce, min_value, v_loss_bce, v_loss_bce_peek,
    flag_peek, peek_duration, counter, min_delta,
    mask_model
):
    stop_training = False
    if not flag_peek:
        v_loss_bce.append(loss_bce) 
        diff = past_loss_bce - loss_bce
        if (diff < 0) and (abs(diff) > min_delta):
            flag_peek = True
        else:
            past_loss_bce = loss_bce
            if loss_bce < min_value:
                min_value = loss_bce
    else:
        if counter < peek_duration:
            v_loss_bce_peek.append(loss_bce)
            counter += 1
        else:
            flag_peek = False
            best_value = v_loss_bce[-2]  
            minimum_peek_value = min(v_loss_bce_peek)
            if minimum_peek_value < best_value:
                index = v_loss_bce_peek.index(minimum_peek_value)
                v_loss_bce.extend(v_loss_bce_peek[:index + 1])  
                if minimum_peek_value < min_value:
                    min_value = minimum_peek_value
                counter = 0
                v_loss_bce_peek.clear()
                gc.collect()  
            else:
                stop_training = True

    return stop_training, flag_peek, past_loss_bce, min_value, v_loss_bce, v_loss_bce_peek, counter

# Hyperparameters

In [None]:
seeds = [20, 30, 45, 70]

n_epochs = 500
size_reg_factor_dilation = n_epochs / 4

learning_rate = [0.0001, 0.001, 0.01, 0.1]
keep_ratio = [0.2, 0.4, 0.6]
size_reg_factor_init = [0.2, 0.4, 0.6]

w2 = 0.18
w1 = 0.82

inputShape = 56
n_time_steps = 14
batch_size = 32
epochs = 500
patience = 10

hyperparameters = {
    "epochs": epochs,
    "n_dynamic_features": inputShape,
    "n_timesteps": n_time_steps,
    'batch_size': batch_size,
    'patience':10,
    "w1":w1, "w2":w2, 
    'mask_value':666,
    'monitor': 'val_loss', "mindelta": 0,
    'verbose':0,
    'level':3
}

## Code

In [None]:
import gc
import logging
import numpy as np
import pandas as pd
import tensorflow as tf

logging.basicConfig(
    filename='dynamask.txt', 
    filemode='a',                 
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO             
)

def load_data(split, fold):
    X_train = np.load(f"../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_{split}/X_train_tensor_{fold}.npy")
    y_train = pd.read_csv(f"../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_{split}/y_train_{fold}.csv", index_col=0)
    X_val = np.load(f"../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_{split}/X_val_tensor_{fold}.npy")
    y_val = pd.read_csv(f"../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_{split}/y_val_{fold}.csv", index_col=0)
    return X_train, y_train, X_val, y_val

def myCVGrid(hyperparameters, learning_rate, keep_ratio, size_reg_factor_init, size_reg_factor_dilation, model_GRU, split, seed):
    
    best_hyperparameters = {}
    lowest_mean_val_loss = np.inf  

    logging.info("Starting the search...")  

    log_interval = 5  
    iteration_count = 0

    for i in range(len(learning_rate)):
        for j in range(len(keep_ratio)):
            for k in range(len(size_reg_factor_init)):
                    
                iteration_count += 1
                if iteration_count % log_interval == 0:
                    logging.info(f"Still running... Evaluated {iteration_count} combinations.")  

                hyperparameters_copy = hyperparameters.copy()
                hyperparameters_copy['learning_rate'] = learning_rate[i]
                hyperparameters_copy['keep_ratio'] = keep_ratio[j]
                hyperparameters_copy['size_reg_factor_init'] = size_reg_factor_init[k]


                total_val_loss = 0.0

                for fold in range(5):
                    X_train, y_train, X_val, y_val = load_data(split, fold)

                    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1024).batch(32)
                    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).shuffle(1024).batch(32)

                    mask = Mask()  
                    _, _, _, bce_val = mask.fit(
                        train_dataset, val_dataset,
                        model_GRU,
                        n_epochs=hyperparameters_copy['epochs'],
                        learning_rate=hyperparameters_copy['learning_rate'],
                        peek_duration=4,
                        min_delta=hyperparameters_copy['mindelta'],
                        keep_ratio=hyperparameters_copy['keep_ratio'],
                        size_reg_factor_init=hyperparameters_copy['size_reg_factor_init'],
                        size_reg_factor_dilation=size_reg_factor_dilation,
                        time_reg_factor=0,
                        window_size=5
                    )

                    total_val_loss += bce_val.min() 

                    del X_train, y_train, X_val, y_val, train_dataset, val_dataset
                    gc.collect()  

                mean_val_loss = total_val_loss / 5 

                if mean_val_loss < lowest_mean_val_loss:
                    lowest_mean_val_loss = mean_val_loss
                    best_hyperparameters = {
                        'learning_rate': learning_rate[i],
                        'keep_ratio': keep_ratio[j],
                        'size_reg_factor_init': size_reg_factor_init[k],
                    }

                # Garbage collection 
                gc.collect()

    return best_hyperparameters


In [None]:
from tensorflow.keras.models import load_model
def load_keras_model(filepath):
    custom_loss = utils.weighted_binary_crossentropy(hyperparameters)
    return load_model(filepath, custom_objects={'loss': custom_loss})

run_model = False
n = 4
if run_model:
    v_early = []
    loss_dev = []
    v_models = []
    bestHyperparameters_bySplit = {}
    y_pred_by_split = {}
    weights = []

    for i in [1, 2, 3]:
        X_test = np.load("../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_" + str(i) + "/X_test_tensor.npy")
        y_test = pd.read_csv("../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_" + str(i) + "/y_test.csv",
                            index_col=0)
        
        X_train = np.load("../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_" + str(i) +
                              "/X_train_tensor_" + str(n)+ ".npy")
        y_train = pd.read_csv("../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_" + str(i) +
                              "/y_train_" + str(n)+ ".csv",
                             index_col=0)

        X_val = np.load("../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_" + str(i) +
                            "/X_val_tensor_" + str(n)+ ".npy")
        y_val = pd.read_csv("../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_" + str(i) +
                            "/y_val_" + str(n)+ ".csv",
                           index_col=0)


        # We take for each split, the best model that we obtained in 0_WITHOUT_FS
        model_GRU = load_keras_model(os.path.join(f'../../../experiments/MDR/considering_all_features/Results_GRU/split_{i}/', f"model_split_{i}.h5"))
        
        best_hyperparameters = myCVGrid(hyperparameters, 
                                        learning_rate, 
                                        keep_ratio, 
                                        size_reg_factor_init, 
                                        size_reg_factor_dilation,
                                        model_GRU, 
                                        i,
                                        seeds[i])

                
        bestHyperparameters_bySplit[str(i)] = best_hyperparameters
    
        # Save best hyperparameters for current split
        split_directory = './Results_Dynamask/split_' + str(i)
        if not os.path.exists(split_directory):
            os.makedirs(split_directory)
    
        with open(os.path.join(split_directory, f"bestHyperparameters_split_{i}.pkl"), 'wb') as f:
            pickle.dump(best_hyperparameters, f)
    
    
        hyperparameters.update({
            'learning_rate': best_hyperparameters["learning_rate"],
            'keep_ratio': best_hyperparameters["keep_ratio"],
            'size_reg_factor_init': best_hyperparameters["size_reg_factor_init"],
        })
    

        train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1024).batch(32)
        val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).shuffle(1024).batch(32)
        
        mask = Mask()  
        hist_train, hist_val, bce_train, bce_val = mask.fit(
            train_dataset, val_dataset,
            model_GRU,
            n_epochs=hyperparameters['epochs'],
            learning_rate=hyperparameters['learning_rate'],
            peek_duration=4,
            min_delta=hyperparameters['mindelta'],
            keep_ratio=hyperparameters['keep_ratio'],
            size_reg_factor_init=hyperparameters['size_reg_factor_init'],
            size_reg_factor_dilation=size_reg_factor_dilation,
            time_reg_factor=0,
            window_size=5
        )
        mask_weights = mask.mask_model.get_weights()[0][0, :, :]
        with open(os.path.join(split_directory, f"weights_split_{i}.pkl"), 'wb') as f:
            pickle.dump(mask_weights, f) 
    
        y_pred = model_GRU.predict(x=[X_test])
        y_pred_by_split[str(i)] = y_pred
        
        with open(os.path.join(split_directory, f"y_pred_split_{i}.pkl"), 'wb') as f:
            pickle.dump(y_pred, f)
    
        # Calculate metrics
        metrics_dict = utils.calculate_and_save_metrics(
        y_test.individualMRGerm.values, 
        y_pred, 
        split_directory, 
        split_index=i
        )

In [None]:
from matplotlib import rcParams
rcParams['font.size'] = 38

keys = [
    'AMG', 'ATF', 'CAR', 'CF1', 'CF2', 'CF3', 'CF4', 'Others', 'GCC', 'GLI', 'LIN', 'LIP', 'MAC',
    'MON', 'NTI',  'OTR', 'OXA', 'PAP', 'PEN', 'POL', 'QUI', 'SUL', 'TTC',
    
    r'$acinet._{pc}$', r'$enterob._{pc}$', r'$enteroc._{pc}$', 
    r'$pseudo._{pc}$', r'$staph._{pc}$', r'$stenot._{pc}$', r'$others_{pc}$',
    
    'Mech. Vent.',

    '# pat.', '# MDR pat.',
    
    r'$AMG_{n}$', r'$ATF_{n}$', r'$CAR_{n}$', r'$CF1_{n}$', r'$CF2_{n}$', r'$CF3_{n}$',
    r'$CF4_{n}$', r'$Others_{n}$', r'$GCC_{n}$', r'$GLI_{n}$', r'$LIN_{n}$', r'$LIP_{n}$', r'$MAC_{n}$',
    r'$MON_{n}$', r'$NTI_{n}$', r'$OTR_{n}$', r'$OXA_{n}$', r'$PAP_{n}$',
    r'$PEN_{n}$', r'$POL_{n}$', r'$QUI_{n}$', r'$SUL_{n}$', r'$TTC_{n}$'   
]

In [None]:
import pickle
def load_from_pickle(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

for i in [1, 2, 3]:
    directory = f'./Results_Dynamask/split_{i}'
    mask_weights = load_from_pickle(os.path.join(directory, f"weights_split_{i}.pkl"))
    
    plt.subplots(figsize=(50, 20))
    heatmap = sns.heatmap(mask_weights,  cmap='viridis')
    heatmap.axes.set_xticklabels(keys, rotation=90)
    plt.tight_layout()
    plt.savefig('./Figures/heatmap_Dynamask_s'+str(i)+'.pdf', bbox_inches='tight', pad_inches=0)
    plt.show()