## Hyperparameter Tuning with Optuna

In [None]:
import os
os.chdir('/home/roobz/Jupyter/afib-detector/src/')

In [None]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

from eda import load_label_map
import random
import pandas as pd
import numpy as np
import optuna

from model import Afib_CNN, epoch_train, epoch_test, save_model
from dataset import load_train_test_datasets

%load_ext autoreload
%autoreload 2

In [None]:
data_folder = '../data/physionet/afdb/'
window_size = 2500
model_train_kwargs = {
    'train_size': 100000,
    'batch_size': 64,
}
model_test_kwargs = {
    'test_size': 25000,
    'test_batch_size': 1000
}
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
train_dataset, test_dataset = load_train_test_datasets(
    window_size,
    model_train_kwargs['train_size'],
    model_test_kwargs['test_size'],
    random_seed=42
)

In [None]:
def objective(trial):
    gamma = 1 #trial.suggest_loguniform("gamma", 1e-5, 1-1e-10)
    lr = trial.suggest_loguniform("lr", 1e-5, 1)
    weight_decay = trial.suggest_uniform("weight_decay", 0, 1)
    
    model = define_model(trial)
    optimizer = optim.Adadelta(model.parameters(), lr=lr)
    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
#     scheduler = ReduceLROnPlateau(optimizer, mode='min', patience = 10) 

    
    epochs = 10
    result = None
    for epoch in range(1, epochs+1):
        epoch_train(model, device, train_dataset, optimizer, epoch, **model_train_kwargs)
        acc, avg_loss = epoch_test(model, device, test_dataset, **model_test_kwargs)
        scheduler.step()
        result = avg_loss
        trial.report(result, epoch)
        
        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
    return result
    

def define_model(trial):
    repeat_layers = 1 #trial.suggest_int('repeat_layers', 1, 4)
    model = Afib_CNN(2500, 2, repeat_layers).to(device)
    return model

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

pruned_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]

In [None]:
print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
best_trial = study.best_trial

print("  Value: ", best_trial.value)

print("  Params: ")
for key, value in best_trial.params.items():
    print("    {}: {}".format(key, value))

    
model, training_record = train_model(
    study.best_params, 
    epochs=150)

In [None]:
pd.DataFrame(
    data=[(trial.value, trial.params) for trial in study.trials],
    columns=['acc', 'params']
).sort_values('acc', ascending=False).to_pickle('../data/cleaned/hyperparameter_optimization.pkl')