# Training of classifiers on various datasets

In [1]:
import os, random
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np
import pandas as pd
import torch.nn as nn
import torch
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
import optuna
from torch.utils.data import DataLoader, Dataset
import pickle
from scipy.io import arff
from sklearn.utils import shuffle

import modules.dataset_configs as dc
from modules.classifier_modules import UnivarTSClfDataModule
from modules.classifier_modules import LSTMClf, LSTMClf_SaliencyGuidedTraining, AttentionLSTM
from modules.classifier_modules import CNNClf, CNNClf_SaliencyGuidedTraining, BasicTCN
from modules.helpers import seed

seed(1)

Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1
Global seed set to 1


### Preparation

In [2]:
save_location = "../models"

In [6]:
experiment_list = [
    dc.config_Experiment1, dc.config_Experiment2, dc.config_Experiment3, dc.config_Experiment4, dc.config_Experiment5, 
    dc.config_Experiment6, dc.config_Experiment7, dc.config_Experiment8, dc.config_Experiment9,dc.config_Experiment10
]

In [7]:
train_config = dict(
    # network architecture
    criterion = nn.BCELoss(),
    hidden_size = 5,
    num_layers = 1,
    dropout = 0,
    d_a = 50,
    # training
    batch_size = 10,
    learning_rate = 0.001,
    max_epochs = 1,                                                                                                      
    num_workers = 8,    
    # optimization
    number_of_trials = 200                                                
)

### Preparation of real-world dataset

In [8]:
class RealWorldDataset(Dataset):
    def __init__(self, df):
        data = df
        self.length = df.shape[0]
        labels = df.iloc[:, data.columns == "label"].values
        sequence = data.iloc[:, data.columns != "label"].values
        
        sequence = sequence.reshape(*sequence.shape,1)
        self.sequence = torch.tensor(sequence).float()
        self.labels = torch.tensor(labels).float()

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        return (self.sequence[idx], self.labels[idx])

In [9]:
class CWRUDataModule(pl.LightningDataModule):
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size

    def setup(self, stage: str):
        with open("../data/CWRU/CWRU_full_dataframe", "rb") as input:
            full_data = pickle.load(input)
        
        full_data = shuffle(full_data)
        self.train_data = RealWorldDataset(full_data.iloc[:int(0.6*full_data.shape[0]),:])
        self.val_data = RealWorldDataset(full_data.iloc[int(0.6*full_data.shape[0]):int(0.8*full_data.shape[0]),:])
        self.test_data = RealWorldDataset(full_data.iloc[int(0.8*full_data.shape[0]):,:])

    def train_dataloader(self):
        return DataLoader(self.train_data, batch_size=self.batch_size, shuffle = True)

    def val_dataloader(self):
        return DataLoader(self.val_data, batch_size=self.batch_size, shuffle = True)

    def test_dataloader(self):
        return DataLoader(self.test_data, batch_size=self.batch_size, shuffle = False)

    def predict_dataloader(self):
        return DataLoader(self.test_data, batch_size=self.batch_size, shuffle = False)

In [10]:
dm_CWRU = CWRUDataModule(batch_size = train_config['batch_size'])#, num_workers = train_config['num_workers'])

In [11]:
with open("../data/CWRU/CWRU_full_dataframe", "rb") as input:
    full_data = pickle.load(input)
    
full_data = shuffle(full_data)

### Training and optimization of classifiers

### a) LSTMClf

In [12]:
for experiment in experiment_list:

    # Configurate model
    config = experiment()
    dm = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
     )
    model = LSTMClf(
        n_features = config['n_features'],
        hidden_size = train_config['hidden_size'],
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        dropout = train_config['dropout'],
        learning_rate = train_config['learning_rate']
    )

    # Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
        learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
        hidden_dim = trial.suggest_int("hidden_dim", low=4, high=64, log=True)
        batch_size = trial.suggest_int("batch_size",low=4, high=64)
        trainer = pl.Trainer(
            max_epochs=train_config['max_epochs'],
            gpus=1,
            logger = True,
            deterministic=True,
        )
        hyperparameters = dict(learning_rate = learning_rate, hidden_size = hidden_dim ,batch_size = batch_size)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule = dm)
        return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"], timeout=600)                 

    model_name = LSTMClf.__name__ + "_" + experiment.__name__[7:]
    print(model_name)
    print("Best trial:")
    trial = study.best_trial
    print("  Validation loss: {}".format(trial.value))
    print("  Parameters: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    
    # Train model with optimized hyperparameters

    final_model = LSTMClf(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        hidden_size = study.best_params["hidden_dim"],
        batch_size = study.best_params["batch_size"],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        dropout = train_config["dropout"],
        learning_rate = study.best_params["learning_rate"]
    )
    dm_final = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )
    
    #dm_final = CWRUDataModule(batch_size = study.best_params["batch_size"])


    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    gpus=1,
    logger = logger,
    callbacks = [checkpoint_callback],
    deterministic=True,
    )

    final_trainer.fit(final_model, dm_final)

### b) LSTMClf_SaliencyGuidedTraining

In [13]:
for experiment in experiment_list:

    # Configurate model
    config = experiment()
    dm = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
    )
    model = LSTMClf_SaliencyGuidedTraining(
        n_features = config['n_features'],
        hidden_size = train_config['hidden_size'],
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        dropout = train_config["dropout"],
        learning_rate = train_config['learning_rate'],
        mask_factor = config['mask_factor'],
        kl_weight = config['kl_weight']
    )

    # Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
        learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
        hidden_dim = trial.suggest_int("hidden_dim", low=4, high=64, log=True)
        batch_size = trial.suggest_int("batch_size",low=4, high=64)
        trainer = pl.Trainer(
            max_epochs=train_config['max_epochs'],
            gpus=1,
            logger = True,
            deterministic=True,
        )
        hyperparameters = dict(learning_rate = learning_rate, hidden_size = hidden_dim ,batch_size = batch_size)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule = dm)
        return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"], timeout=600)                 

    model_name = LSTMClf_SaliencyGuidedTraining.__name__ + "_" + experiment.__name__[7:]
    print(model_name)
    print("Best trial:")
    trial = study.best_trial
    print("  Value: {}".format(trial.value))
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    
    # Train model with optimized hyperparameters

    final_model = LSTMClf_SaliencyGuidedTraining(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        hidden_size = study.best_params["hidden_dim"],
        batch_size = study.best_params["batch_size"],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        dropout = train_config["dropout"],
        learning_rate = study.best_params["learning_rate"],
        mask_factor = config['mask_factor'],
        kl_weight = config['kl_weight']
    )
    dm_final = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )

    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    gpus=1,
    logger = logger,
    callbacks = [checkpoint_callback],
    deterministic=True,
    )

    final_trainer.fit(final_model, dm_final)

### c) LSTM with input cell attention


In [14]:
for experiment in experiment_list:

    # Configurate model
    config = experiment()
    dm = UnivarTSClfDataModule(
         seq_len = config['seq_len'],
         batch_size = train_config['batch_size'],
         simulator = config['simulator'],
         train_size = config['train_size'],
         val_size = config['val_size'],
         test_size = config['test_size'],
     )
    model = AttentionLSTM(
        n_features = config['n_features'],
        hidden_size = train_config['hidden_size'],
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        dropout = train_config["dropout"],
        learning_rate = train_config['learning_rate'],
        d_a = train_config['d_a'],
        r = config['attention_hops']
    )

    # Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
        learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
        hidden_dim = trial.suggest_int("hidden_dim", low=4, high=64, log=True)
        batch_size = trial.suggest_int("batch_size",low=4, high=64)
        d_a = trial.suggest_int("d_a",low=30, high=100)
        trainer = pl.Trainer(
            max_epochs=train_config['max_epochs'],
            gpus=1,
            logger = True,
            deterministic=True,
        )
        hyperparameters = dict(learning_rate = learning_rate, hidden_size = hidden_dim ,batch_size = batch_size, d_a = d_a)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule = dm)
        return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"]*2, timeout=600)                 

    model_name = AttentionLSTM.__name__ + "_" + experiment.__name__[7:]
    print(model_name)
    print("Best trial:")
    trial = study.best_trial
    print("  Value: {}".format(trial.value))
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    
    # Train model with optimized hyperparameters

    final_model = AttentionLSTM(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        hidden_size = study.best_params["hidden_dim"],
        batch_size = study.best_params["batch_size"],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        dropout = train_config["dropout"],
        learning_rate = study.best_params["learning_rate"],
        d_a = study.best_params['d_a'],
        r = config['attention_hops']
    )
    dm_final = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )
  
        
    #dm_final = CWRUDataModule(batch_size = study.best_params["batch_size"])        # need to be un-commented when using CWRU dataset

    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    gpus=1,
    logger = logger,
    callbacks = [checkpoint_callback],
    deterministic=True,
    )

    final_trainer.fit(final_model, dm_final)

### d) CNNClf

In [15]:
for experiment in experiment_list:

    # Configurate model
    config = experiment()
    dm = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )
    model = CNNClf(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        learning_rate = train_config['learning_rate']
    )

    # Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
        learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
        batch_size = trial.suggest_int("batch_size",low=4, high=64)
        trainer = pl.Trainer(
            max_epochs=train_config['max_epochs'],
            gpus=1,
            logger = True,
            deterministic=True,
        )
        hyperparameters = dict(learning_rate = learning_rate, batch_size = batch_size)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule = dm)
        return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"], timeout=600)                 

    model_name = CNNClf.__name__ + "_" + experiment.__name__[7:]
    print(model_name)
    print("Best trial:")
    trial = study.best_trial
    print("  Validation loss: {}".format(trial.value))
    print("  Parameters: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    
    # Train model with optimized hyperparameters

    final_model = CNNClf(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        learning_rate = study.best_params["learning_rate"]
    )
    dm_final = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )

    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    gpus=1,
    logger = logger,
    callbacks = [checkpoint_callback],
    deterministic=True,
    )

    final_trainer.fit(final_model, dm_final)

### e) CNN with Saliency guided training

In [16]:
for experiment in experiment_list:

    # Configurate model
    config = experiment()
    dm = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
    )
    model = CNNClf_SaliencyGuidedTraining(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        learning_rate = train_config['learning_rate'],
        mask_factor = config['mask_factor'],
        kl_weight = config['kl_weight']
    )

    # Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
        learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
        batch_size = trial.suggest_int("batch_size",low=4, high=64)
        trainer = pl.Trainer(
            max_epochs=train_config['max_epochs'],
            gpus=1,
            logger = True,
            deterministic=True,
        )
        hyperparameters = dict(learning_rate = learning_rate, batch_size = batch_size)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule = dm)
        return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"], timeout=600)                 

    model_name = CNNClf_SaliencyGuidedTraining.__name__ + "_" + experiment.__name__[7:]
    print(model_name)
    print("Best trial:")
    trial = study.best_trial
    print("  Value: {}".format(trial.value))
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    
    # Train model with optimized hyperparameters

    final_model = CNNClf_SaliencyGuidedTraining(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        criterion = train_config['criterion'],
        num_layers = train_config['num_layers'],
        learning_rate = study.best_params['learning_rate'],
        mask_factor = config['mask_factor'],
        kl_weight = config['kl_weight']
    )
    dm_final = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )

    
    #dm_final = CWRUDataModule(batch_size = study.best_params["batch_size"])

    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints",
        save_top_k=1,
        mode="min",
    )

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    gpus=1,
    logger = logger,
    callbacks = [checkpoint_callback],
    deterministic=True,
    )

    final_trainer.fit(final_model, dm_final)

### f) TCN

In [17]:
for experiment in experiment_list:

    # Configurate model
    config = experiment()
    dm = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )
    model = BasicTCN(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        batch_size = train_config['batch_size'],
        criterion = train_config['criterion'],
        learning_rate = train_config['learning_rate']
    )

    # Hyperparameter optimization

    def objective(trial: optuna.trial.Trial) -> float:
        learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
        batch_size = trial.suggest_int("batch_size",low=4, high=64)
        trainer = pl.Trainer(
            max_epochs=train_config['max_epochs'],
            gpus=1,
            logger = True,
            deterministic=True,
        )
        hyperparameters = dict(learning_rate = learning_rate, batch_size = batch_size)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule = dm)
        return trainer.callback_metrics["val_loss"].item()

    pruner = optuna.pruners.MedianPruner()
    sampler = optuna.samplers.TPESampler(seed=1)

    study = optuna.create_study(direction="minimize", pruner=pruner, sampler = sampler)
    study.optimize(objective, n_trials=train_config["number_of_trials"], timeout=600)                 

    model_name = BasicTCN.__name__ + "_" + experiment.__name__[7:]
    print(model_name)
    print("Best trial:")
    trial = study.best_trial
    print("  Validation loss: {}".format(trial.value))
    print("  Parameters: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    
    # Train model with optimized hyperparameters

    final_model = BasicTCN(
        n_features = config['n_features'],
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        criterion = train_config['criterion'],
        learning_rate = study.best_params["learning_rate"]
    )
    dm_final = UnivarTSClfDataModule(
        seq_len = config['seq_len'],
        batch_size = study.best_params["batch_size"],
        simulator = config['simulator'],
        train_size = config['train_size'],
        val_size = config['val_size'],
        test_size = config['test_size'],
        num_workers = train_config['num_workers']
    )

    
    #dm_final = CWRUDataModule(batch_size = study.best_params["batch_size"])

    logger = TensorBoardLogger(save_location, name = model_name)

    checkpoint_callback = ModelCheckpoint(
        monitor="val_loss",
        dirpath= save_location + "/" + model_name,
        filename= model_name + "_checkpoints"
        mode="min",
    )

    final_trainer = pl.Trainer(
    max_epochs=train_config['max_epochs'],
    gpus=1,
    logger = logger,
    callbacks = [checkpoint_callback],
    deterministic=True,
    )

    final_trainer.fit(final_model, dm_final)