# Train DSF e iESPnet

In [1]:
import sys
import os
import torch

import torchaudio.transforms    as T
import pandas                   as pd

from torchvision       import transforms
from torch.utils.data  import DataLoader

sys.path.append(os.path.abspath(os.path.join('..','..','iESPnet_SRC_main','utilities')))
from Generator         import SeizureDatasetLabelTimev2, scale_spec, permute_spec, smoothing_label
from Model             import iESPnet
from TrainEval         import train_model_opt, test_model, train_model, get_thr_output, get_performance_indices

sys.path.append(os.path.abspath(os.path.join('..','05-Train-Test')))
from utilit_train_test import make_weights_for_balanced_classes

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x7a38ace490f0>

In [3]:
# direccion donde se encuentran los espectrogramas 

SPE_DIR        = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG/'
meta_data_file = '/media/martin/Disco2/Rns_Data/PITT_PI_EEG/METADATA/allfiles_metadata.csv'

df_meta        = pd.read_csv(meta_data_file)

In [4]:
FREQ_MASK_PARAM = 10
TIME_MASK_PARAN = 20
N_CLASSES       = 1
learning_rate   = 1e-3
batch_size      = 128
epochs          = 20
num_workers     = 4


save_path       = 'models_DSF_iESPnet/'
patients        = df_meta['rns_id'].unique().tolist()

In [5]:
hparams = {
           "n_cnn_layers" : 3,
           "n_rnn_layers" : 3,
           "rnn_dim"      : [150, 100, 50],
           "n_class"      : N_CLASSES,
           "out_ch"       : [8,8,16],
           "dropout"      : 0.3,
           "learning_rate": learning_rate,
           "batch_size"   : batch_size,
           "num_workers"  : num_workers,
           "epochs"       : epochs
          }

In [6]:
# ejemplo para un unico paciente s = 0 --- patient = PIT-RNS1603

s = 0

In [7]:
model = iESPnet(
                hparams['n_cnn_layers'],
                hparams['n_rnn_layers'],
                hparams['rnn_dim'],
                hparams['n_class'],
                hparams['out_ch'],
                hparams['dropout'],
               )

In [8]:
save_runs        = save_path + patients[s] + '/runs/'
save_models      = save_path + patients[s] + '/models/'
save_predictions = save_path + patients[s] + '/results/'
save_figs        = save_path + patients[s] + '/figs/'

if not os.path.exists(save_path):
    os.makedirs(save_path)
    
if not os.path.exists(save_runs):
    os.makedirs(save_runs)
    
if not os.path.exists(save_models):
    os.makedirs(save_models)
    
if not os.path.exists(save_predictions):
    os.makedirs(save_predictions)
    
if not os.path.exists(save_figs):
    os.makedirs(save_figs)

print('Running training for subject ' + patients[s] + ' [s]: ' + str(s))

Running training for subject PIT-RNS1603 [s]: 0


In [9]:
# define train y test de df_meta

train_df = df_meta.copy()
test_df  = df_meta[df_meta['rns_id'] == patients[s]]
test_df.reset_index(drop=True, inplace=True)
train_df.drop(train_df[train_df['rns_id'] == patients[s]].index, inplace = True)

In [10]:
# Dataloaders creados

train_data_ori = SeizureDatasetLabelTimev2(
                                           file=train_df,
                                           root_dir=SPE_DIR,
                                           transform=None, 
                                           target_transform=smoothing_label(),
                                          )

aca esta el cambio

In [11]:
transform_train = transforms.Compose([
                                        T.FrequencyMasking(FREQ_MASK_PARAM),
                                        T.TimeMasking(TIME_MASK_PARAN), 
                                        permute_spec()                                                                     
                                      ])

In [12]:
# data augmentation only in train data

'''

train_data_trf = SeizureDatasetLabelTimev2(
                                            file=train_df,
                                            root_dir=SPE_DIR,
                                            transform=transform_train1, 
                                            target_transform=smoothing_label() 
                                           )

train_data = torch.utils.data.ConcatDataset([train_data_ori, train_data_trf1])

'''

hasta aca

In [13]:
# testing data should be balanced, just be "as it is"

test_data = SeizureDatasetLabelTimev2(
                                      file=test_df,
                                      root_dir=SPE_DIR,
                                      transform=None,
                                      target_transform=smoothing_label()  
                                     )

In [14]:
# se debe balancear train_df
weights = make_weights_for_balanced_classes(train_df, [0,1], n_concat=2)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

In [15]:
outputfile = save_models + 'model'

In [16]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print('Using {} device'.format(device))

Using cuda device


In [17]:
# following pytorch suggestion to speed up training
torch.backends.cudnn.benchmark = True


kwargs       = {'num_workers': hparams["num_workers"], 'pin_memory': True} if use_cuda else {}
train_loader = DataLoader(train_data, batch_size=hparams["batch_size"], sampler=sampler, **kwargs)

In [None]:
'''El dataloader es un iterador y se debe interpretar como tal, para acceder se debe utilizar o enumerate o next'''

In [None]:
avg_train_losses, avg_train_f1 = train_model_opt(model, hparams, epochs, train_data, sampler, outputfile)