In [75]:
import os

import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets
from torchvision import transforms
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np


DEVICE = torch.device("cpu")
BATCHSIZE = 64
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 30

INPUT_SIZE = 1928


In [None]:
# Create batched data loaders for training and validation sets
train_data = pd.read_csv("train_fe.csv")
train_loaders = []
val_loaders = []
for i in range(5): 
    train = train_data[train_data.fold != i]
    val = train_data[train_data.fold == i]
    ss = StandardScaler()

    x_train = train.drop(["target", "fold"], axis=1).to_numpy(dtype=np.float32)
    ss.fit(x_train)
    x_train = torch.tensor(ss.transform(x_train)) 
    y_train = torch.LongTensor(train.target.to_numpy(dtype=np.long))

    x_valid = torch.tensor(ss.transform(val.drop(["target", "fold"], axis=1).to_numpy(dtype=np.float32)))
    y_valid = torch.LongTensor(val.target.to_numpy(dtype=np.long))

    train_ds = TensorDataset(x_train, y_train)
    val_ds = TensorDataset(x_valid, y_valid)

    train_dl = DataLoader(train_ds, BATCHSIZE, shuffle=True)
    val_dl = DataLoader(val_ds, BATCHSIZE, shuffle=True)
    
    train_loaders.append(train_dl)
    val_loaders.append(val_dl)


In [None]:
def model_def(trial): 
    """
    Arhitecture hyperparameters: 
    - Number of layers
    - Number of units per layer
    - Dropout rate per layer
    - Activation function 
    """

    n_layers = trial.suggest_int("n_layers", 1, 6)
    layers = []

    in_features = INPUT_SIZE
    for i in range(n_layers): 
        out_size = trial.suggest_int(f"n_units_l{i}", 4, 128)
        layers.append(nn.Linear(in_features, out_size))

        activation = trial.suggest_categorical(f"activation_l{i}", ["ReLU", "Tanh", "Sigmoid"])
        if activation == "ReLU": 
            layers.append(nn.ReLU())
        elif activation == "Tanh": 
            layers.append(nn.Tanh())
        else: 
            layers.append(nn.Sigmoid())

        p = trial.suggest_float(f"dropout_l{i}", 0.1, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_size 

    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

def objective(trial):
    # Generate one model per fold
    models = [model_def(trial).to(DEVICE) for _ in range(5)]

    """
    Training hyperparameters: 
    - Optimizer
    - Learning rate
    - Epochs 
    """
    # Generate optimizer
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizers = [getattr(optim, optimizer_name)(models[i].parameters(), lr=lr) for i in range(5)]

    # Training of the model.
    max_acc = np.float64(0)
    for epoch in range(EPOCHS):
        val_acc = []
        for i in range(5): 
            models[i].train()

            for batch_idx, (data, target) in enumerate(train_loaders[i]): 
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

                optimizers[i].zero_grad()
                output = models[i](data)

                loss = F.nll_loss(output, target)
                loss.backward()
                optimizers[i].step() 
        
            models[i].eval() 
            correct = 0 
            with torch.no_grad(): 
                for batch_idx, (data, target) in enumerate(val_loaders[i]): 
                    data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                    output = models[i](data)
                    pred = output.argmax(dim=1, keepdim=True)
                    correct += pred.eq(target.view_as(pred)).sum().item()
            
            acc = correct / len(val_loaders[i].dataset)
            val_acc.append(acc)
        accuracy = np.mean(val_acc)
        max_acc = max(accuracy, max_acc)
        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return max_acc

In [None]:
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize",        
                                storage="sqlite:///db.sqlite3",  # Specify the storage URL
                                study_name="cs467-study"
    )
    study.optimize(objective, n_trials=100)
    

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[I 2024-12-06 02:34:26,857] A new study created in RDB with name: cs467-study4
[I 2024-12-06 02:35:34,010] Trial 0 finished with value: 0.6780387455038809 and parameters: {'n_layers': 2, 'n_units_l0': 17, 'activation_l0': 'ReLU', 'dropout_l0': 0.3183910809383067, 'n_units_l1': 19, 'activation_l1': 'Tanh', 'dropout_l1': 0.19776741621483912, 'optimizer': 'Adam', 'lr': 1.809161140458303e-05}. Best is trial 0 with value: 0.6780387455038809.
[I 2024-12-06 02:37:08,209] Trial 1 finished with value: 0.6353942071054458 and parameters: {'n_layers': 4, 'n_units_l0': 82, 'activation_l0': 'ReLU', 'dropout_l0': 0.3778078694195257, 'n_units_l1': 42, 'activation_l1': 'Sigmoid', 'dropout_l1': 0.11553339335009066, 'n_units_l2': 36, 'activation_l2': 'ReLU', 'dropout_l2': 0.46132845988020016, 'n_units_l3': 98, 'activation_l3': 'ReLU', 'dropout_l3': 0.43463902383546105, 'optimizer': 'SGD', 'lr': 0.0027320061895683098}. Best is trial 0 with value: 0.6780387455038809.
[I 2024-12-06 02:38:07,242] Trial 2 fin

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  60
  Number of complete trials:  40
Best trial:
  Value:  0.6825623356681602
  Params: 
    n_layers: 3
    n_units_l0: 45
    activation_l0: Tanh
    dropout_l0: 0.33814858557939
    n_units_l1: 45
    activation_l1: Sigmoid
    dropout_l1: 0.41601441860582045
    n_units_l2: 88
    activation_l2: Tanh
    dropout_l2: 0.32315395376219097
    optimizer: Adam
    lr: 0.00028092897413284015


In [107]:
class Model(nn.Module): 
    def __init__(self, input_size): 
        super(Model, self).__init__()
        self.l0 = nn.Linear(input_size, 45)
        self.d0 = nn.Dropout(0.33814858557939)
        self.l1 = nn.Linear(45, 45)
        self.d1 = nn.Dropout(0.41601441860582045)
        self.l2 = nn.Linear(45, 88)
        self.d2 = nn.Dropout(0.32315395376219097)
        self.final = nn.Linear(88, 2)

        self.tanh = nn.Tanh()
        self.sigmoid = nn.Sigmoid() 
        self.lsmax = nn.LogSoftmax(dim=1)

    def forward(self, x): 
        x = self.d0(self.tanh(self.l0(x)))
        x = self.d1(self.sigmoid(self.l1(x)))
        x = self.d2(self.tanh(self.l2(x)))
        x = self.lsmax(self.final(x))
        return x

In [79]:
# Train on full training data, test on test set 
test_data = pd.read_csv("test_fe.csv")

ss_full = StandardScaler()
x_train_full = train_data.drop(["target", "fold"], axis=1).to_numpy(dtype=np.float32)
ss_full.fit(x_train_full)
x_train_full = torch.tensor(ss.transform(x_train_full))
y_train_full = torch.LongTensor(train_data.target.to_numpy())
train_dl_full = DataLoader(TensorDataset(x_train_full, y_train_full), BATCHSIZE, shuffle=True)

x_test = torch.tensor(ss.transform(test_data.drop(["target"], axis=1).to_numpy(dtype=np.float32)))
y_test = torch.LongTensor(test_data.target.to_numpy())
test_dl = DataLoader(TensorDataset(x_test, y_test), BATCHSIZE, shuffle=True)

In [145]:
model = Model(INPUT_SIZE).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=0.00028092897413284015)

In [146]:
# Training
for epoch in range(4):
    model.train()

    correct = 0 
    for batch_idx, (data, target) in enumerate(train_dl_full): 
        data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

        optimizer.zero_grad()
        output = model(data)

        pred = output.argmax(dim=1, keepdim=True)
        # correct += pred.eq(target.view_as(pred)).sum().item()
        correct += (pred.numpy().flatten() == target.numpy()).sum()

        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step() 

    acc = correct / len(train_dl_full.dataset)
    print(f"Epoch {epoch} training accuracy: {acc:.4f}")


Epoch 0 training accuracy: 0.6440
Epoch 1 training accuracy: 0.6808
Epoch 2 training accuracy: 0.6947
Epoch 3 training accuracy: 0.6961


In [147]:
# Test set
model.eval()
test_correct = 0 
with torch.no_grad(): 
    for batch_idx, (data, target) in enumerate(test_dl): 
        data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        test_correct += pred.eq(target.view_as(pred)).sum().item()

test_acc = test_correct / len(test_dl.dataset)
print(f"Test set accuracy: {test_acc:.4f}")

Test set accuracy: 0.6757


In [148]:
# Save model
torch.save(model.state_dict(), 'model.pth')