In [3]:
import torch
import csv
import os
import random
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, random_split
import pandas as pd
from dataset_reader import Traces_Dataset
from mlp_model import MLP
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import optuna
from optuna.trial import TrialState

In [53]:
DEVICE = torch.device("cpu")
target_features = 7
DIR = os.getcwd()
EPOCHS = 50

N_TRAIN_EXAMPLES = 10 * 300
N_VALID_EXAMPLES = 10 * 100

In [52]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 5)
    layers = []

    in_features = 321
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 16, 321)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))
        in_features = out_features

    layers.append(nn.Linear(in_features, target_features))
    # layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [61]:
def get_dataset(trial):
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512, 1024])
    
    dataset = Traces_Dataset('dataset_test.csv')
    dataset.split_dataset(0.95, 0.05, 0)
    dataset.clean_features()
    dataset.find_mean_std()
    dataset.normalize()
    print(dataset.inputs.shape)

    # initialize train, val, test set
    X_train = dataset[dataset.train_set.indices][0]
    Y_train = dataset[dataset.train_set.indices][1]

    X_val = dataset[dataset.val_set.indices][0]
    Y_val = dataset[dataset.val_set.indices][1]

    X_test = dataset[dataset.test_set.indices][0]
    Y_test = dataset[dataset.test_set.indices][1]

    # initialize dataloader 
    train_dataset = TensorDataset(X_train, Y_train)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    val_dataset = TensorDataset(X_val, Y_val)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

    return train_loader, val_loader

In [64]:
def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    
    

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_dataset(trial)

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (train_inputs, train_targets) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * 10 >= N_TRAIN_EXAMPLES:
                break

            # data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
            train_inputs, train_targets = train_inputs.to(DEVICE), train_targets.to(DEVICE)

            optimizer.zero_grad()
            train_outputs = model(train_inputs)
            loss = nn.MSELoss()(train_outputs, train_targets)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for batch_idx, (val_inputs, val_targets) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * 10 >= N_VALID_EXAMPLES:
                    break
                # data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                val_inputs, val_targets = val_inputs.to(DEVICE), val_targets.to(DEVICE)

                val_outputs = model(val_inputs)
                # Get the index of the max log-probability.
                val_loss = nn.MSELoss()(val_outputs, val_targets)
                total_val_loss += val_loss.item()

            # Average validation loss for the epoch
        avg_val_loss = total_val_loss / len(valid_loader)

        trial.report(avg_val_loss, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return avg_val_loss

In [65]:
if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20, timeout=600)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[I 2024-03-27 01:37:49,481] A new study created in memory with name: no-name-527fdee5-c8a9-490d-97aa-79f1fcf1484b


torch.Size([1000, 321])


[I 2024-03-27 01:37:49,780] Trial 0 finished with value: 2855.56884765625 and parameters: {'n_layers': 1, 'n_units_l0': 106, 'dropout_l0': 0.22733226711445575, 'optimizer': 'RMSprop', 'lr': 0.00010700405515314282, 'batch_size': 512}. Best is trial 0 with value: 2855.56884765625.


torch.Size([1000, 321])


[I 2024-03-27 01:37:50,045] Trial 1 finished with value: 148.9717559814453 and parameters: {'n_layers': 2, 'n_units_l0': 43, 'dropout_l0': 0.4990525363955867, 'n_units_l1': 22, 'dropout_l1': 0.2606691756103567, 'optimizer': 'Adam', 'lr': 0.04561459107659196, 'batch_size': 256}. Best is trial 1 with value: 148.9717559814453.


torch.Size([1000, 321])


[I 2024-03-27 01:37:51,154] Trial 2 finished with value: 349.80023193359375 and parameters: {'n_layers': 4, 'n_units_l0': 129, 'dropout_l0': 0.48551120660818026, 'n_units_l1': 306, 'dropout_l1': 0.39435393372484245, 'n_units_l2': 304, 'dropout_l2': 0.2708554250443419, 'n_units_l3': 234, 'dropout_l3': 0.43256801047677307, 'optimizer': 'Adam', 'lr': 3.356047461620424e-05, 'batch_size': 128}. Best is trial 1 with value: 148.9717559814453.


torch.Size([1000, 321])


[I 2024-03-27 01:37:51,426] Trial 3 finished with value: 3223.8056640625 and parameters: {'n_layers': 2, 'n_units_l0': 93, 'dropout_l0': 0.3477335924067406, 'n_units_l1': 60, 'dropout_l1': 0.2509113497490308, 'optimizer': 'RMSprop', 'lr': 1.3103820133459395e-05, 'batch_size': 512}. Best is trial 1 with value: 148.9717559814453.


torch.Size([1000, 321])


[I 2024-03-27 01:37:55,178] Trial 4 finished with value: 274.44824600219727 and parameters: {'n_layers': 5, 'n_units_l0': 304, 'dropout_l0': 0.37397797090505525, 'n_units_l1': 36, 'dropout_l1': 0.4398636523504552, 'n_units_l2': 92, 'dropout_l2': 0.29552974740846266, 'n_units_l3': 86, 'dropout_l3': 0.2984455323725679, 'n_units_l4': 273, 'dropout_l4': 0.2163777070624155, 'optimizer': 'Adam', 'lr': 5.268220843306462e-05, 'batch_size': 16}. Best is trial 1 with value: 148.9717559814453.


torch.Size([1000, 321])


[I 2024-03-27 01:37:55,597] Trial 5 finished with value: 256.9565734863281 and parameters: {'n_layers': 3, 'n_units_l0': 172, 'dropout_l0': 0.31729669608876754, 'n_units_l1': 178, 'dropout_l1': 0.2793585828718232, 'n_units_l2': 135, 'dropout_l2': 0.41313037454744045, 'optimizer': 'RMSprop', 'lr': 0.0003776858268757089, 'batch_size': 1024}. Best is trial 1 with value: 148.9717559814453.
[I 2024-03-27 01:37:55,688] Trial 6 pruned. 
[I 2024-03-27 01:37:55,733] Trial 7 pruned. 
[I 2024-03-27 01:37:55,832] Trial 8 pruned. 


torch.Size([1000, 321])
torch.Size([1000, 321])
torch.Size([1000, 321])
torch.Size([1000, 321])


[I 2024-03-27 01:37:56,536] Trial 9 finished with value: 96.78843688964844 and parameters: {'n_layers': 3, 'n_units_l0': 105, 'dropout_l0': 0.2616313052618471, 'n_units_l1': 271, 'dropout_l1': 0.22890079083443746, 'n_units_l2': 169, 'dropout_l2': 0.448719831664334, 'optimizer': 'Adam', 'lr': 0.0013844500401859064, 'batch_size': 256}. Best is trial 9 with value: 96.78843688964844.
  return np.nanmin(values)
[I 2024-03-27 01:37:56,608] Trial 10 pruned. 
[I 2024-03-27 01:37:56,748] Trial 11 pruned. 


torch.Size([1000, 321])
torch.Size([1000, 321])


[I 2024-03-27 01:37:56,804] Trial 12 pruned. 


torch.Size([1000, 321])
torch.Size([1000, 321])


[I 2024-03-27 01:37:57,290] Trial 13 finished with value: 89.01972198486328 and parameters: {'n_layers': 3, 'n_units_l0': 52, 'dropout_l0': 0.2806062747331935, 'n_units_l1': 120, 'dropout_l1': 0.33868845274871306, 'n_units_l2': 249, 'dropout_l2': 0.20595601499396263, 'optimizer': 'Adam', 'lr': 0.013319859294656915, 'batch_size': 256}. Best is trial 13 with value: 89.01972198486328.


torch.Size([1000, 321])


[I 2024-03-27 01:37:57,990] Trial 14 finished with value: 80.54651641845703 and parameters: {'n_layers': 4, 'n_units_l0': 171, 'dropout_l0': 0.27654455496906005, 'n_units_l1': 122, 'dropout_l1': 0.3329525784217172, 'n_units_l2': 253, 'dropout_l2': 0.49701927558438475, 'n_units_l3': 163, 'dropout_l3': 0.49887652034845, 'optimizer': 'Adam', 'lr': 0.00924472497677181, 'batch_size': 256}. Best is trial 14 with value: 80.54651641845703.
[I 2024-03-27 01:37:58,085] Trial 15 pruned. 


torch.Size([1000, 321])
torch.Size([1000, 321])


[I 2024-03-27 01:37:59,818] Trial 16 pruned. 
  return np.nanmin(values)
[I 2024-03-27 01:37:59,894] Trial 17 pruned. 


torch.Size([1000, 321])
torch.Size([1000, 321])


[I 2024-03-27 01:38:03,378] Trial 18 finished with value: 49.37183094024658 and parameters: {'n_layers': 4, 'n_units_l0': 161, 'dropout_l0': 0.20600770445362854, 'n_units_l1': 150, 'dropout_l1': 0.30167377439845716, 'n_units_l2': 267, 'dropout_l2': 0.49784224537991045, 'n_units_l3': 110, 'dropout_l3': 0.2690332022158084, 'optimizer': 'Adam', 'lr': 0.0006726580394291194, 'batch_size': 16}. Best is trial 18 with value: 49.37183094024658.


torch.Size([1000, 321])


[I 2024-03-27 01:38:07,775] Trial 19 finished with value: 45.33682465553284 and parameters: {'n_layers': 4, 'n_units_l0': 174, 'dropout_l0': 0.20698929462045085, 'n_units_l1': 225, 'dropout_l1': 0.30723814508389546, 'n_units_l2': 321, 'dropout_l2': 0.4999956412579912, 'n_units_l3': 107, 'dropout_l3': 0.2550368762103258, 'optimizer': 'Adam', 'lr': 0.0003417690685853737, 'batch_size': 16}. Best is trial 19 with value: 45.33682465553284.


Study statistics: 
  Number of finished trials:  20
  Number of pruned trials:  9
  Number of complete trials:  11
Best trial:
  Value:  45.33682465553284
  Params: 
    n_layers: 4
    n_units_l0: 174
    dropout_l0: 0.20698929462045085
    n_units_l1: 225
    dropout_l1: 0.30723814508389546
    n_units_l2: 321
    dropout_l2: 0.4999956412579912
    n_units_l3: 107
    dropout_l3: 0.2550368762103258
    optimizer: Adam
    lr: 0.0003417690685853737
    batch_size: 16


In [66]:
optuna.importance.get_param_importances(study)

{'batch_size': 0.5167787332586548,
 'n_layers': 0.16080507562249335,
 'optimizer': 0.15665348579823324,
 'dropout_l0': 0.06870345193655863,
 'lr': 0.06637936426822885,
 'n_units_l0': 0.0306798891158311}