# iESPnet 

In [1]:
import sys
import os
import torch
import random
import gc

import torchaudio.transforms    as T
import torch.optim              as optim
import pandas                   as pd
import numpy                    as np

from torchvision       import transforms

sys.path.append(os.path.abspath(os.path.join('..','05-Train-Test')))
from utilit_train_test import make_weights_for_balanced_classes

sys.path.append(os.path.abspath(os.path.join('..','..','iESPnet_SRC_main','utilities')))
from Generator         import SeizureDatasetLabelTime, permute_spec, smoothing_label
from Model             import iESPnet
from TrainEval         import train_model_iespnet, test_model_iespnet, get_performance_indices

In [2]:
# direccion donde se encuentran los espectrogramas (path: martin)
SPE_DIR        = '/media/martin/Disco2/Rns_Data/PITT_PI_SPEC/'
meta_data_file = '/media/martin/Disco2/Rns_Data/PITT_PI_SPEC/METADATA/allfiles_metadata.csv'

df_meta = pd.read_csv(meta_data_file)

In [3]:
# Variables iESPnet
FREQ_MASK_PARAM = 10
TIME_MASK_PARAN = 20
N_CLASSES       = 1
learning_rate   = 1e-3
batch_size      = 128
epochs          = 20
num_workers     = 4
save_path       = 'iespnet_global/'
patients        = df_meta['rns_id'].unique().tolist()

# hiperparametros iESPnet
hparams = {
        "n_cnn_layers" : 3,
        "n_rnn_layers" : 3,
        "rnn_dim"      : [150, 100, 50],
        "n_class"      : N_CLASSES,
        "out_ch"       : [8,8,16],
        "dropout"      : 0.3,
        "learning_rate": learning_rate,
        "batch_size"   : batch_size,
        "num_workers"  : num_workers,
        "epochs"       : epochs
        }

# define train y test de df_meta
test_id  = ['PIT-RNS1090', 'PIT-RNS8973', 'PIT-RNS1438', 'PIT-RNS8326', 'PIT-RNS3016']
vali_id  = ['PIT-RNS1603', 'PIT-RNS1556', 'PIT-RNS1534', 'PIT-RNS6989', 'PIT-RNS2543', 'PIT-RNS7168', 'PIT-RNS6762']


train_df = df_meta.copy()
test_df  = pd.DataFrame()
vali_df  = pd.DataFrame()

for s in range (len(test_id)):
    test_df = pd.concat([test_df, df_meta[df_meta['rns_id'] == test_id[s]]])
    test_df.reset_index(drop=True, inplace=True)
    train_df.drop(train_df[train_df['rns_id'] == test_id[s]].index, inplace = True)

for s in range(len(vali_id)):
    vali_df=pd.concat([vali_df, df_meta[df_meta['rns_id'] == vali_id[s]]])
    vali_df.reset_index(drop=True, inplace=True)
    train_df.drop(train_df[train_df['rns_id'] == vali_id[s]].index, inplace = True)

# experimentos que se van a realizar
experiments = 'exp3.2'

In [4]:
model = iESPnet(
                hparams['n_cnn_layers'],
                hparams['n_rnn_layers'],
                hparams['rnn_dim'],
                hparams['n_class'],
                hparams['out_ch'],
                hparams['dropout'],
                )

save_runs        = save_path + experiments +  '/runs/'
save_models      = save_path + experiments +  '/models/'
save_predictions = save_path + experiments +  '/results/'
save_figs        = save_path + experiments +  '/figs/'

if not os.path.exists(save_path):
    os.makedirs(save_path)

if not os.path.exists(save_runs):
    os.makedirs(save_runs)

if not os.path.exists(save_models):
    os.makedirs(save_models)

if not os.path.exists(save_predictions):
    os.makedirs(save_predictions)

if not os.path.exists(save_figs):
    os.makedirs(save_figs)

print('Running training for: ' + experiments)

# Dataloaders creados
train_data_orig = SeizureDatasetLabelTime(
                                          file             = train_df,
                                          root_dir         = SPE_DIR,
                                          transform        = None, 
                                          target_transform = smoothing_label(),
                                         )

transform_train = transforms.Compose([
                                      T.FrequencyMasking(FREQ_MASK_PARAM),
                                      T.TimeMasking(TIME_MASK_PARAN), 
                                      permute_spec()                                                                     
                                    ])

# data augmentation only in train data
train_data_tran = SeizureDatasetLabelTime(
                                          file             = train_df,
                                          root_dir         = SPE_DIR,
                                          transform        = transform_train, 
                                          target_transform = smoothing_label() 
                                         )

train_data = torch.utils.data.ConcatDataset([train_data_orig, train_data_tran])

# testing data should be balanced, just be "as it is"
test_data       = SeizureDatasetLabelTime(
                                          file             = test_df,
                                          root_dir         = SPE_DIR,
                                          transform        = None,
                                          target_transform = smoothing_label()  
                                         )

# validation data should be balanced, just be "as it is"
vali_data       = SeizureDatasetLabelTime(
                                          file             = vali_df,
                                          root_dir         = SPE_DIR,
                                          transform        = None,
                                          target_transform = smoothing_label()  
                                         )

Running training for: exp3.2


In [5]:
from torch.utils.data import DataLoader

# se debe balancear train_df
weights = make_weights_for_balanced_classes(train_df, [0,1], n_concat=2)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights))

In [6]:
if len(weights) != len(train_data):
    AssertionError('sampler should be equal to train data shape')

In [7]:


# train model until the indicated number of epochs
# to track the average training loss per epoch as the model trains
avg_train_losses = []
train_accs       = []

# to track the average validation loss per epoch as the model trains
avg_valid_losses = [] 
valid_accs       = []


use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print('Using {} device'.format(device))

# following pytorch suggestion to speed up training
torch.backends.cudnn.benchmark     = False # reproducibilidad
torch.backends.cudnn.deterministic = True

kwargs = {'num_workers': hparams["num_workers"], 'pin_memory': True} if use_cuda else {}
train_loader = DataLoader(train_data, batch_size = hparams["batch_size"], sampler = sampler, **kwargs)
valid_loader = DataLoader(vali_data, batch_size = hparams["batch_size"], shuffle = False, **kwargs)

Using cuda device


In [8]:
from torch import nn

#move model to device
model.to(device)

print('Num Model Parameters', sum([param.nelement() for param in model.parameters()]))

optimizer = optim.AdamW(model.parameters(), hparams['learning_rate'], weight_decay=1e-4)

scheduler = optim.lr_scheduler.OneCycleLR(
                                            optimizer, 
                                            max_lr          = hparams['learning_rate'], 
                                            steps_per_epoch = int(len(train_loader)),
                                            epochs          = hparams['epochs'],
                                            anneal_strategy = 'linear'
                                            )
        
criterion = nn.BCEWithLogitsLoss().to(device)

Num Model Parameters 1654837


In [9]:
train_loss   = 0.0
train_losses = []

# precision = Precision(average=False, device=device)
# recall    = Recall(average=False, device=device)

cont = 0
model.train()

for batch_idx, _data in enumerate(train_loader):
    cont+=1
    
    spectrograms, labels = _data
    spectrograms, labels = spectrograms.to(device), labels.to(device)
    break

torch.Size([128, 4, 120, 181])
