# Pimera experimentación: estudios de ablación

1. Sin normalizar:
    1. top_db = 40
    2. top_db = 60
    3. top_db = 80 


2. Normalización por canal:
    1. top_db = 40
    2. top_db = 60
    3. top_db = 80 


3. Normalización global:
    1. top_db = 40
    2. top_db = 60
    3. top_db = 80 


Thalamus

- PIT-RNS0427 -- Train
- PIT-RNS1713 -- Train
- PIT-RNS8326 -- Test
- PIT-RNS3016 -- Test
- PIT-RNS7168 -- Val
- PIT-RNS6762 -- Val

1. grupo de entrenamiento de: 18 pacietes


2. grupo de testeo de: 5 pacientes


3. grupo de validación: 7 pacientes

## 1 estudio de ablación

In [None]:
import sys
import os
import torch

import torchaudio.transforms    as T
import torch.optim              as optim
import pandas                   as pd
import numpy                    as np

from torchvision       import transforms

sys.path.append(os.path.abspath(os.path.join('..','..','iESPnet_SRC_main','utilities')))
from Generator         import SeizureDatasetLabelTimev2, permute_spec, smoothing_label
from Model             import iESPnet
from TrainEval         import train_model_v2, test_model_v2, get_performance_indices


sys.path.append(os.path.abspath(os.path.join('..','05-Train-Test')))
from utilit_train_test import make_weights_for_balanced_classes

sys.path.append(os.path.abspath(os.path.join('../../..','03 Dynamic-Spatial-Filtering')))
from models            import DynamicSpatialFilter

# set the seed for reproducibility
torch.manual_seed(0)

In [2]:
# direccion donde se encuentran los espectrogramas 
SPE_DIR        = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG_PROCESS/'
meta_data_file = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG_PROCESS/METADATA/allfiles_metadata.csv'

df_meta        = pd.read_csv(meta_data_file)

In [3]:
# Variables iESPnet
FREQ_MASK_PARAM    = 10
TIME_MASK_PARAN    = 20
N_CLASSES          = 1
learning_rate      = 1e-3
batch_size         = 64    #128
epochs             = 20
num_workers        = 4

save_path          = 'models_DSF_iESPnet/'
patients           = df_meta['rns_id'].unique().tolist()

In [4]:
# Variables DSF
denoising          = 'autoreject'   # 'autoreject' 'data_augm' 
model              = 'stager_net'
dsf_type           = 'dsfd'         # 'dsfd' 'dsfm_st'
mlp_input          = 'log_diag_cov'
dsf_soft_thresh    = False
dsf_n_out_channels = None
n_channels         = 4

In [5]:
# hiperparametros iESPnet
hparams = {
           "n_cnn_layers" : 3,
           "n_rnn_layers" : 3,
           "rnn_dim"      : [150, 100, 50],
           "n_class"      : N_CLASSES,
           "out_ch"       : [8,8,16],
           "dropout"      : 0.3,
           "learning_rate": learning_rate,
           "batch_size"   : batch_size,
           "num_workers"  : num_workers,
           "epochs"       : epochs
          }

In [6]:
# define train y test de df_meta

test_id  = ['PIT-RNS1090', 'PIT-RNS8973', 'PIT-RNS1438', 'PIT-RNS8326', 'PIT-RNS3016']
vali_id  = ['PIT-RNS1603', 'PIT-RNS1556', 'PIT-RNS1534', 'PIT-RNS6989', 'PIT-RNS2543', 'PIT-RNS7168', 'PIT-RNS6762']


train_df = df_meta.copy()
test_df  = pd.DataFrame()
vali_df  = pd.DataFrame()

for s in range (len(test_id)):
    test_df = pd.concat([test_df, df_meta[df_meta['rns_id'] == test_id[s]]])
    test_df.reset_index(drop=True, inplace=True)
    train_df.drop(train_df[train_df['rns_id'] == test_id[s]].index, inplace = True)

for s in range(len(vali_id)):
    vali_df=pd.concat([vali_df, df_meta[df_meta['rns_id'] == vali_id[s]]])
    vali_df.reset_index(drop=True, inplace=True)
    train_df.drop(train_df[train_df['rns_id'] == vali_id[s]].index, inplace = True)

In [8]:
len(train_df['rns_id'].unique())

18

In [9]:
len(test_df['rns_id'].unique())

5

In [10]:
len(vali_df['rns_id'].unique())

7

In [7]:
# experimentos que se van a realizar
experiments_1 = ['exp1','exp2','exp3']
experiments_2 = ['.1','.2','.3']

In [9]:
a = experiments_1[0] + experiments_2[0]
a

'exp1.1'

In [64]:
for s in range (len(experiments_1)):
    for j in range (len(experiments_2)):
        print(s,j)


0 0
0 1
0 2
1 0
1 1
1 2
2 0
2 1
2 2


In [70]:
# ejemplo s = 0

s = 0
j = 2

In [47]:
model1 = DynamicSpatialFilter(
                              n_channels, 
                              mlp_input            = mlp_input, 
                              n_out_channels       = dsf_n_out_channels, 
                              apply_soft_thresh    = dsf_soft_thresh
                             )

In [48]:
model2 = iESPnet(
                 hparams['n_cnn_layers'],
                 hparams['n_rnn_layers'],
                 hparams['rnn_dim'],
                 hparams['n_class'],
                 hparams['out_ch'],
                 hparams['dropout'],
                )

In [71]:
save_runs        = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) +'/runs/'
save_models      = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/models/'
save_predictions = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/results/'
save_figs        = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/figs/'

if not os.path.exists(save_path):
    os.makedirs(save_path)
    
if not os.path.exists(save_runs):
    os.makedirs(save_runs)
    
if not os.path.exists(save_models):
    os.makedirs(save_models)
    
if not os.path.exists(save_predictions):
    os.makedirs(save_predictions)
    
if not os.path.exists(save_figs):
    os.makedirs(save_figs)

In [72]:
print('Running training for: ' + experiments_1[s] +  experiments_2[j])

Running training for: exp1.3


Data loader train

In [52]:
# Dataloaders creados
train_data = SeizureDatasetLabelTimev2(
                                       file             = train_df,
                                       root_dir         = SPE_DIR,
                                       transform        = None, 
                                       target_transform = smoothing_label(),
                                      )

In [53]:
# testing data should be balanced, just be "as it is"
test_data = SeizureDatasetLabelTimev2(
                                      file             = test_df,
                                      root_dir         = SPE_DIR,
                                      transform        = None,
                                      target_transform = smoothing_label()  
                                     )

In [54]:
# validation data should be balanced, just be "as it is"
vali_data = SeizureDatasetLabelTimev2(
                                      file             = vali_df,
                                      root_dir         = SPE_DIR,
                                      transform        = None,
                                      target_transform = smoothing_label()  
                                     )

In [55]:
# data augmentation 
transform_train = transforms.Compose([
                                      T.FrequencyMasking(FREQ_MASK_PARAM),
                                      T.TimeMasking(TIME_MASK_PARAN), 
                                      permute_spec()                                                                     
                                    ])

In [56]:
weights = make_weights_for_balanced_classes(train_df, [0,1], n_concat=1)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

In [73]:
outputfile = save_models + 'model_' + str(experiments_1[s] + experiments_2[j])
outputfile

'models_DSF_iESPnet/exp1/exp1.3/models/model_exp1.3'

In [None]:
avg_train_losses, train_accs, avg_valid_losses, valid_accs = train_model_v2(
                                                                            model1, 
                                                                            model2, 
                                                                            hparams, 
                                                                            epochs, 
                                                                            train_data, 
                                                                            vali_data, 
                                                                            transform_train, 
                                                                            sampler, 
                                                                            outputfile,
                                                                            experiments_1[s],
                                                                            experiments_2[j]
                                                                           )

In [69]:
best_thr = 0.2
best_path = outputfile + '.pth'
best_path

'models_DSF_iESPnet/exp1/exp1.1/models/model_exp1.1.pth'

In [None]:
# in validation
outputs_vali = test_model_v2(model1, model2, hparams, best_path, vali_data, experiments_1[s], experiments_1[j])
prediction_va = get_performance_indices(outputs_vali['y_true'], outputs_vali['y_prob'], best_thr)

In [None]:
# in testing
outputs_test  = test_model_v2(model1, model2, hparams, best_path, test_data, experiments_1[s], experiments_1[j])
prediction_te = get_performance_indices(outputs_test['y_true'], outputs_test['y_prob'], best_thr)

In [None]:
# in training
outputs_train = test_model_v2(model1, model2, hparams, best_path, train_data, experiments_1[s], experiments_1[j])
prediction_tr = get_performance_indices(outputs_train['y_true'], outputs_train['y_prob'], best_thr)

In [None]:
predict_ = { 
            "train_losses" : avg_train_losses,
            "train_acupr"  : train_accs,
            "valid_losses" : avg_valid_losses, 
            "valid_acupr"  : valid_accs,
            "prediction_va": prediction_va, 
            "prediction_te": prediction_te,
            "prediction_tr": prediction_tr, 
            "hparams"      : hparams, 
            "threshold"    : 0.2, 
            "train_size"   : len(train_data)/len(df_meta) # verificar tamaño de train data
            }

np.save(save_predictions + 'results.npy', predict_)

In [68]:
a = save_predictions + 'results.npy'
a

'models_DSF_iESPnet/exp1/exp1.1/results/results.npy'