This notebook does the training of the models. Options to do cross validation and hyperparameter search. 

The modules used in this notebook are imported below. 

- The module utils_model contains auxiliary files for the model.
- The module network contains different neural network architectures.

In [4]:
from utils_model.data_helper import *
from utils_model.model_utils import *
from utils_model.visualization import *
from utils_model.process_bpm import *
from utils_model.eval_methods import *
from utils_model.data_prep_model import *
from network import LSTM as MyModel

from sklearn.model_selection import ParameterGrid
from sktime.distances import dtw_distance
import copy

## Train model

### Choose params and load data

In [19]:
# Choose the paramters for the model
params = {
            'epochs':[1500],
            'batch_size':[24],
            'lr_scheduler':['plateau'],
            'units': [[90,60,30,1],[60,30,1],[120,90,60,30,1]],  
            'loss':['custom'], 
            'metrics':[['r','rmse','mse']],
            'optimizer':['adam'], 
            'datasets': [[ 'PURE']], 
            'datasets_only_test':[[]],
            'methods_list': [['gt','cpu_LGI','cpu_CHROM','cpu_POS','cpu_ICA']], 
            'landmarks':['combined'],
            'fps' : [30],
            'win_secs':[10,5],
            'norm':['min_max'],
            'act_filter':['all'], # resting, resting+, all 
            'dropout':[.2],  
            'K_fold':[5],
            'early_stopping':[False],
            'overlap':[150,0],
}

param_grid = ParameterGrid(params)

# This is in case we want to run only one experiment
param = param_grid[0]
param['samples'] = int(param['fps']*param['win_secs'])
param['n_methods'] = len(param['methods_list'])-1
landmarks = select_landmarks(param['landmarks'])
param['n_landmarks'] = len(landmarks)

In [17]:
# Load and process data from all datasets
data, fps = load_dataset('.\\pyVHR\\datasets',param['datasets'])
data_no_window = copy.deepcopy(data)
data = clean_landmarks(data)
data = resample_gt(data)
data = split_dataset_windows(data, param['win_secs'], fps, overlap=param['overlap'])
data = resample_windows(data,param['win_secs'],param['fps'])
data = norm_windows(data, mode=param['norm'],dim=3)
data = clean_windowed_dataset(data)
data = hist_equalize(data)

loaded dataset  ['PURE']


In [None]:
# Get the train and test splits, and the names of the subjects in each of the splits
data_splits,names_splits,param = process(param,copy.deepcopy(data),ground_truth='gt')

### Single training

In [None]:
x_train = data_splits[0]['x_train']
x_test = data_splits[0]['x_test']
y_train = data_splits[0]['y_train']
y_test = data_splits[0]['y_test']
model = MyModel(param['samples'],param['n_methods'],param['n_landmarks'],drop=param['dropout'],units=param['units'])
model.build((None,param['samples'], param['n_methods']*param['n_landmarks']))
model.layers[0].summary()
model,history = run_model(model,param,x_train,x_test,y_train,y_test,write_dir=True,save_plot=True)

In [None]:
# K-FOLD CV
models0 = []
histories0 = []
for i in range(len(data_splits)):
    print('Number',i+1,'/',len(data_splits))
    # from data_splits, get the data for each fold
    x_train = data_splits[0]['x_train']
    x_test = data_splits[0]['x_test']
    y_train = data_splits[0]['y_train']
    y_test = data_splits[0]['y_test']

    model = MyModel(param['samples'],param['n_methods'],param['n_landmarks'],drop=param['dropout'],units=param['units'])
    # model.build((None,param['samples'], param['n_methods']*param['n_landmarks']))
    # model.layers[0].summary()
    model,history = run_model(model,param,x_train,x_test,y_train,y_test,write_dir=True,save_plot=True)
    models0.append(model)
    histories0.append(history)

### CV training

In [None]:
for i in range(6):
    # K-FOLD CV
    models = []
    histories = []
    for i in range(len(data_splits)):
        print('Fold',i+1,'/',len(data_splits))
        # from data_splits, get the data for each fold
        x_train = data_splits[i]['x_train']
        x_test = data_splits[i]['x_test']
        y_train = data_splits[i]['y_train']
        y_test = data_splits[i]['y_test']

        model = MyModel(param['samples'],param['n_methods'],param['n_landmarks'],drop=param['dropout'],units=param['units'])
        # model.build((None,param['samples'], param['n_methods']*param['n_landmarks']))
        # model.layers[0].summary()
        model,history = run_model(model,param,x_train,x_test,y_train,y_test,write_dir=True,save_plot=True)
        models.append(model)
        histories.append(history)

### Hyperparameter tuning

In [None]:
# HYPERPARAMETER TUNING

all_acc = [] # to find later the setting with highest acc
for i,param in enumerate(param_grid):
    param['samples'] = int(param['fps']*param['win_secs'])
    param['n_methods'] = len(param['methods_list'])-1
    landmarks = select_landmarks(param['landmarks'])
    param['n_landmarks'] = len(landmarks)

    data, fps = load_dataset('.\\pyVHR\\datasets',param['datasets'])
    data_no_window = copy.deepcopy(data)
    data = clean_landmarks(data)
    data = resample_gt(data)
    data = split_dataset_windows(data, param['win_secs'], fps, overlap=param['overlap'])
    data = resample_windows(data,param['win_secs'],param['fps'])
    data = norm_windows(data, mode=param['norm'],dim=3)
    data = clean_windowed_dataset(data)
    data = hist_equalize(data)

    # Get the train and test splits, and the names of the splits
    data_splits,names_splits,param = process(param,copy.deepcopy(data),ground_truth='gt')

    x_train = data_splits[0]['x_train']
    x_test = data_splits[0]['x_test']
    y_train = data_splits[0]['y_train']
    y_test = data_splits[0]['y_test']

    # if i > 0 :
    start_time = time.time()
    print('Fit number ',i,'/',len(param_grid))
    print('---------------------------------------')
    for key in param:
        print(key,':',param[key])  
    print('---------------------------------------')

    model = MyModel(param['samples'],param['n_methods'],param['n_landmarks'],drop=param['dropout'],units=param['units'])
    # model.build((None,param['samples'], param['n_methods']*param['n_landmarks']))
    # model.layers[0].summary()
    model,history = run_model(model,param,x_train,x_test,y_train,y_test,write_dir=True,save_plot=True)

    print("--- %s seconds ---" % (np.round(time.time() - start_time,2)))
    best_val_acc = history.history['val_loss'][-1]
    all_acc.append(best_val_acc)

best = np.argmin(np.array(all_acc))
print('The best combination is the number',best,':',param_grid[best], 'with an accuracy of ',all_acc[best])

In [None]:
best = np.argmin(np.array(all_acc))
print('The best combination is the number',best,':',param_grid[best], 'with an accuracy of ',all_acc[best])