# Reproducibilidad de experimentos

In [1]:
import sys
import os
import torch
import random

import torchaudio.transforms    as T
import torch.optim              as optim
import pandas                   as pd
import numpy                    as np

from torchvision       import transforms
from torch.utils.data  import DataLoader
from torch             import nn

sys.path.append(os.path.abspath(os.path.join('..','05-Train-Test')))
from utilit_train_test import make_weights_for_balanced_classes

sys.path.append(os.path.abspath(os.path.join('..','..','iESPnet_SRC_main','utilities')))
from Generator         import SeizureDatasetLabelTimev2, permute_spec, smoothing_label
from Model             import iESPnet
from TrainEval         import train_model_v2, test_model_v2, get_performance_indices

sys.path.append(os.path.abspath(os.path.join('../../..','03 Dynamic-Spatial-Filtering')))
from models            import DynamicSpatialFilter

# set the seed for reproducibility
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [2]:
# direccion donde se encuentran los espectrogramas 
SPE_DIR        = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG/'
meta_data_file = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG/METADATA/allfiles_metadata.csv'

df_meta        = pd.read_csv(meta_data_file)

In [3]:
# Variables iESPnet
FREQ_MASK_PARAM    = 10
TIME_MASK_PARAN    = 20
N_CLASSES          = 1
learning_rate      = 1e-3
batch_size         = 64    #128
epochs             = 20
num_workers        = 4

save_path          = 'models_DSF_iESPnet_prueba/'
patients           = df_meta['rns_id'].unique().tolist()

In [4]:
# Variables DSF
denoising          = 'autoreject'   # 'autoreject' 'data_augm' 
model              = 'stager_net'
dsf_type           = 'dsfd'         # 'dsfd' 'dsfm_st'
mlp_input          = 'log_diag_cov'
dsf_soft_thresh    = False
dsf_n_out_channels = None
n_channels         = 4

In [5]:
# hiperparametros iESPnet
hparams = {
           "n_cnn_layers" : 3,
           "n_rnn_layers" : 3,
           "rnn_dim"      : [150, 100, 50],
           "n_class"      : N_CLASSES,
           "out_ch"       : [8,8,16],
           "dropout"      : 0.3,
           "learning_rate": learning_rate,
           "batch_size"   : batch_size,
           "num_workers"  : num_workers,
           "epochs"       : epochs
          }

In [6]:
# define train y test de df_meta
test_id  = ['PIT-RNS1090', 'PIT-RNS8973', 'PIT-RNS1438', 'PIT-RNS8326', 'PIT-RNS3016']
vali_id  = ['PIT-RNS1603', 'PIT-RNS1556', 'PIT-RNS1534', 'PIT-RNS6989', 'PIT-RNS2543', 'PIT-RNS7168', 'PIT-RNS6762']

In [7]:
train_df = df_meta.copy() # hace falta resetear el indice de train_df?
test_df  = pd.DataFrame()
vali_df  = pd.DataFrame()

In [8]:
for s in range (len(test_id)):
    test_df = pd.concat([test_df, df_meta[df_meta['rns_id'] == test_id[s]]])
    test_df.reset_index(drop=True, inplace=True)
    train_df.drop(train_df[train_df['rns_id'] == test_id[s]].index, inplace = True)

for s in range(len(vali_id)):
    vali_df=pd.concat([vali_df, df_meta[df_meta['rns_id'] == vali_id[s]]])
    vali_df.reset_index(drop=True, inplace=True)
    train_df.drop(train_df[train_df['rns_id'] == vali_id[s]].index, inplace = True)

In [9]:
# experimentos que se van a realizar
experiments_1 = ['exp1','exp2','exp3']
experiments_2 = ['.1','.2','.3']

In [10]:
s = 0
j = 0

In [11]:
model1 = DynamicSpatialFilter(
                              n_channels, 
                              mlp_input            = mlp_input, 
                              n_out_channels       = dsf_n_out_channels, 
                              apply_soft_thresh    = dsf_soft_thresh
                             )

In [12]:
model2 = iESPnet(
                 hparams['n_cnn_layers'],
                 hparams['n_rnn_layers'],
                 hparams['rnn_dim'],
                 hparams['n_class'],
                 hparams['out_ch'],
                 hparams['dropout'],
                )

In [13]:
save_runs        = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/runs/'
save_models      = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/models/'
save_predictions = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/results/'
save_figs        = save_path + experiments_1[s] + '/' + str(experiments_1[s]) + str(experiments_2[j]) + '/figs/'

if not os.path.exists(save_path):
    os.makedirs(save_path)

if not os.path.exists(save_runs):
    os.makedirs(save_runs)

if not os.path.exists(save_models):
    os.makedirs(save_models)

if not os.path.exists(save_predictions):
    os.makedirs(save_predictions)

if not os.path.exists(save_figs):
    os.makedirs(save_figs)

In [14]:
print('Running training for: ' + experiments_1[s] +  experiments_2[j])

Running training for: exp1.1


In [15]:
# Dataloaders creados
train_data = SeizureDatasetLabelTimev2(
                                       file             = train_df,
                                       root_dir         = SPE_DIR,
                                       transform        = None, 
                                       target_transform = smoothing_label(),
                                      )

In [16]:
# testing data should be balanced, just be "as it is"
test_data  = SeizureDatasetLabelTimev2(
                                       file             = test_df,
                                       root_dir         = SPE_DIR,
                                       transform        = None,
                                       target_transform = smoothing_label()  
                                      )

In [17]:
# validation data should be balanced, just be "as it is"
vali_data  = SeizureDatasetLabelTimev2(
                                       file             = vali_df,
                                       root_dir         = SPE_DIR,
                                       transform        = None,
                                       target_transform = smoothing_label()  
                                      )

In [18]:
# data augmentation 
transform_train = transforms.Compose([
                                      T.FrequencyMasking(FREQ_MASK_PARAM),
                                      T.TimeMasking(TIME_MASK_PARAN), 
                                      permute_spec()                                                                     
                                    ])

In [19]:
weights = make_weights_for_balanced_classes(train_df, [0,1], n_concat=1)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

In [20]:
outputfile = save_models + 'model_' + str(experiments_1[s] + experiments_2[j])

In [21]:
'''

avg_train_losses, train_accs, avg_valid_losses, valid_accs = train_model_v2(
                                                                            model1, 
                                                                            model2, 
                                                                            hparams, 
                                                                            epochs, 
                                                                            train_data, 
                                                                            vali_data, 
                                                                            transform_train, 
                                                                            sampler, 
                                                                            outputfile,
                                                                            experiments_1[s],
                                                                            experiments_2[j]
                                                                           )

'''                                                                          

'\n\navg_train_losses, train_accs, avg_valid_losses, valid_accs = train_model_v2(\n                                                                            model1, \n                                                                            model2, \n                                                                            hparams, \n                                                                            epochs, \n                                                                            train_data, \n                                                                            vali_data, \n                                                                            transform_train, \n                                                                            sampler, \n                                                                            outputfile,\n                                                                            experiments_1[s],\n                           

In [22]:
avg_train_losses = []
train_accs       = []

In [23]:
avg_valid_losses = [] 
valid_accs       = []

In [24]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print('Using {} device'.format(device))

Using cuda device


In [25]:
# following pytorch suggestion to speed up training
torch.backends.cudnn.benchmark = True

In [26]:
kwargs = {'num_workers': hparams["num_workers"], 'pin_memory': True} if use_cuda else {}

train_loader = DataLoader(train_data, batch_size = hparams["batch_size"], sampler = sampler, **kwargs)
valid_loader = DataLoader(vali_data, batch_size = hparams["batch_size"], shuffle = False, **kwargs)

In [27]:
#move model1 to device
model1.to(device)

DynamicSpatialFilter(
  (feat_extractor): SpatialFeatureExtractor()
  (mlp): Sequential(
    (0): Linear(in_features=4, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=20, bias=True)
  )
)

In [28]:
#move model2 to device
model2.to(device)

iESPnet(
  (freqcnn): Sequential(
    (0): Conv2d(4, 8, kernel_size=(120, 1), stride=(1, 1), padding=(119, 0), dilation=(2, 1), bias=False)
    (1): ReLU()
    (2): InstanceNorm2d(8, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
  )
  (timecnn): Sequential(
    (0): Conv2d(4, 8, kernel_size=(1, 181), stride=(1, 1), padding=(0, 180), dilation=(1, 2), bias=False)
    (1): ReLU()
    (2): InstanceNorm2d(8, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)
  )
  (cnn_ori): Conv2d(4, 16, kernel_size=(3, 3), stride=(2, 1), padding=(1, 1), bias=False)
  (cnn): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 1), padding=(1, 1), bias=False)
  (rescnn_layers): Sequential(
    (0): ResidualCNNbatch(
      (cnn1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (cnn2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (dropout1): Dropout(p=0.3, inplace=False)
      (dropout2): Dropout(p=0.3, inpla

In [29]:
print('Num Model Parameters', sum([param1.nelement() for param1 in model1.parameters()]))
print('Num Model Parameters', sum([param2.nelement() for param2 in model2.parameters()]))

Num Model Parameters 120
Num Model Parameters 1654837


In [30]:
optimizer1 = optim.AdamW(model1.parameters(), hparams['learning_rate'], weight_decay=1e-4)
optimizer2 = optim.AdamW(model2.parameters(), hparams['learning_rate'], weight_decay=1e-4)

scheduler1 = optim.lr_scheduler.OneCycleLR(
                                           optimizer1, 
                                           max_lr          = hparams['learning_rate'], 
                                           steps_per_epoch = int(len(train_loader)),
                                           epochs          = hparams['epochs'],
                                           anneal_strategy = 'linear'
                                          )

scheduler2 = optim.lr_scheduler.OneCycleLR(
                                           optimizer2, 
                                           max_lr          = hparams['learning_rate'], 
                                           steps_per_epoch = int(len(train_loader)*2),
                                           epochs          = hparams['epochs'],
                                           anneal_strategy = 'linear'
                                          )
      
criterion = nn.BCEWithLogitsLoss().to(device)

In [31]:
epoch = 1

In [None]:
'''

train_losses, train_aucpr = training_DSF_iESPnet(
                                                 model1, 
                                                 model2, 
                                                 device, 
                                                 train_loader, 
                                                 transform_train, 
                                                 criterion, 
                                                 optimizer1, 
                                                 optimizer2, 
                                                 scheduler1, 
                                                 scheduler2, 
                                                 epoch,
                                                 experiment_1,
                                                 experiment_2
                                                )

'''                                                

In [36]:
experiment_1 = experiments_1[s]
experiment_2 = experiments_2[j]

In [37]:
# create spectrogram
ECOG_SAMPLE_RATE = 250
ECOG_CHANNELS    = 4
TT               = 1000 # window length
SPEC_WIN_LEN     = int(ECOG_SAMPLE_RATE * TT / 1000 ) # win size
overlap          = 500 
SPEC_HOP_LEN     = int(ECOG_SAMPLE_RATE * (TT - overlap) / 1000) # Length of hop between windows.
SPEC_NFFT        = 500  # to see changes in 0.5 reso
if   experiment_2 == '.1':  
    top_db       = 40.0
elif experiment_2 == '.2':
    top_db       = 60.0
elif experiment_2 == '.3':
    top_db       = 80.0