In [2]:
import torch
import csv
import os
import random
import numpy as np
from torch.utils.data import TensorDataset, DataLoader, random_split
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import optuna
from optuna.trial import TrialState



from dataset_reader import Traces_Dataset
from mlp_model import MLP

In [10]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
target_features = 7
DIR = os.getcwd()
EPOCHS = 30
BATCH_SIZE = 1024

N_TRAIN_EXAMPLES = 200 * 30
N_VALID_EXAMPLES = 200 * 10

In [4]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 5)
    layers = []

    in_features = 321
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 256, 1024)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))
        in_features = out_features

    layers.append(nn.Linear(in_features, target_features))
    # layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [5]:
dataset = Traces_Dataset('dataset2mil.csv')
dataset.split_dataset(0.95, 0.05, 0)
dataset.clean_features()
dataset.find_mean_std()
dataset.normalize()
print(dataset.inputs.shape)

# initialize train, val, test set
X_train = dataset[dataset.train_set.indices][0]
Y_train = dataset[dataset.train_set.indices][1]

X_val = dataset[dataset.val_set.indices][0]
Y_val = dataset[dataset.val_set.indices][1]

X_test = dataset[dataset.test_set.indices][0]
Y_test = dataset[dataset.test_set.indices][1]


torch.Size([2000000, 321])


In [12]:
def get_dataset(trial):
# def get_dataset():

    # initialize dataloader 
    train_dataset = TensorDataset(X_train, Y_train)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

    val_dataset = TensorDataset(X_val, Y_val)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
    print('get')
    return train_loader, val_loader

In [13]:
# train_loader, valid_loader = get_dataset()

In [14]:
def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    # optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    # lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    # optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-2)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_dataset(trial)

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (train_inputs, train_targets) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * 10 >= N_TRAIN_EXAMPLES:
                break

            # data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
            train_inputs, train_targets = train_inputs.to(DEVICE), train_targets.to(DEVICE)

            optimizer.zero_grad()
            train_outputs = model(train_inputs)
            loss = nn.MSELoss()(train_outputs, train_targets)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for batch_idx, (val_inputs, val_targets) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * 10 >= N_VALID_EXAMPLES:
                    break
                
                # data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                val_inputs, val_targets = val_inputs.to(DEVICE), val_targets.to(DEVICE)

                val_outputs = model(val_inputs)
                # Get the index of the max log-probability.
                val_loss = nn.MSELoss()(val_outputs, val_targets)
                total_val_loss += val_loss.item()

            # Average validation loss for the epoch
        avg_val_loss = total_val_loss / len(valid_loader)

        trial.report(avg_val_loss, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return avg_val_loss

In [15]:
if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=2, timeout=600)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[I 2024-04-02 04:13:13,162] A new study created in memory with name: no-name-2d6a5315-cecb-4ee8-8967-c5c957dfdbbd
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-2)


get


[I 2024-04-02 04:17:14,429] Trial 0 finished with value: 11.667877644908671 and parameters: {'n_layers': 3, 'n_units_l0': 487, 'dropout_l0': 0.2624531815829616, 'n_units_l1': 851, 'dropout_l1': 0.3896867427531606, 'n_units_l2': 459, 'dropout_l2': 0.4017251954136952, 'learning_rate': 0.0023726475324028595, 'weight_decay': 0.003035493928860235}. Best is trial 0 with value: 11.667877644908671.


get


[I 2024-04-02 04:21:31,989] Trial 1 finished with value: 57.3564565230389 and parameters: {'n_layers': 5, 'n_units_l0': 832, 'dropout_l0': 0.48316669670064405, 'n_units_l1': 647, 'dropout_l1': 0.33701891840999854, 'n_units_l2': 523, 'dropout_l2': 0.2870321202881455, 'n_units_l3': 860, 'dropout_l3': 0.477194090966461, 'n_units_l4': 751, 'dropout_l4': 0.2861567376058094, 'learning_rate': 0.0069379576438017175, 'weight_decay': 0.0053841564475703586}. Best is trial 0 with value: 11.667877644908671.


Study statistics: 
  Number of finished trials:  2
  Number of pruned trials:  0
  Number of complete trials:  2
Best trial:
  Value:  11.667877644908671
  Params: 
    n_layers: 3
    n_units_l0: 487
    dropout_l0: 0.2624531815829616
    n_units_l1: 851
    dropout_l1: 0.3896867427531606
    n_units_l2: 459
    dropout_l2: 0.4017251954136952
    learning_rate: 0.0023726475324028595
    weight_decay: 0.003035493928860235


In [17]:
optuna.importance.get_param_importances(study)

{'dropout_l2': 0.15503875968992245,
 'n_units_l2': 0.14728682170542634,
 'n_units_l0': 0.14728682170542634,
 'lr': 0.13953488372093023,
 'dropout_l0': 0.13953488372093023,
 'n_units_l1': 0.1317829457364341,
 'dropout_l1': 0.12403100775193797,
 'n_layers': 0.015503875968992246,
 'optimizer': 0.0,
 'batch_size': 0.0}