In [8]:
import os
import sys

notebook_dir = os.path.dirname(os.path.abspath('__file__'))
os.chdir(notebook_dir)

sys.path.append(os.path.abspath('../src'))

In [14]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# import from src/
from data import DataPreparation
from models import TransformerModel

In [15]:
# Load the technical indicators data
file_path = '../data/processed/market_features.csv'
data_prep = DataPreparation(file_path)
X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor = data_prep.prepare_data()

In [16]:
input_dim = X_train_tensor.shape[1]
num_classes = len(y_train_tensor.unique())  


# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print("Device Used", device)

Device Used cpu


In [17]:
def objective(trial):
    # Define the hyperparameters to optimize
    d_model = trial.suggest_categorical('d_model', [32, 64, 96, 128])
    possible_nheads = [n for n in [2, 4, 8] if d_model % n == 0]
    nhead = trial.suggest_categorical('nhead', possible_nheads)

    num_layers = trial.suggest_int('num_layers', 1, 4)
    dim_feedforward = trial.suggest_int('dim_feedforward', 128, 512, step=64)
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 64, step=16)

    print(f'Trial {trial.number}:')
    print(f'  d_model: {d_model}')
    print(f'  nhead: {nhead}')
    print(f'  num_layers: {num_layers}')
    print(f'  dim_feedforward: {dim_feedforward}')
    print(f'  lr: {lr}')
    print(f'  batch_size: {batch_size}')

    # Ensure the model class definition accepts the specified parameters
    model = TransformerModel(input_dim, num_classes, d_model=d_model, nhead=nhead, num_layers=num_layers, dim_feedforward=dim_feedforward).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Prepare DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Training loop with early stopping
    epochs = 100
    best_val_loss = float('inf')
    patience = 5
    trigger_times = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        epoch_train_loss = running_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_running_loss = 0.0
        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)
                val_running_loss += val_loss.item()
        epoch_val_loss = val_running_loss / len(val_loader)

        print(f'Epoch {epoch+1}/{epochs}, Training Loss: {epoch_train_loss}, Validation Loss: {epoch_val_loss}')

        # Early stopping
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            trigger_times = 0
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print(f'Early stopping at epoch {epoch+1}')
                break

        # Scheduler for learning rate
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)
        scheduler.step(epoch_val_loss)

    return best_val_loss

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

print("Best hyperparameters: ", study.best_params)


[I 2024-06-24 01:36:16,166] A new study created in memory with name: no-name-1202f800-079c-47db-857c-efaceed453dd


Trial 0:
  d_model: 128
  nhead: 8
  num_layers: 4
  dim_feedforward: 128
  lr: 0.0003224979421630959
  batch_size: 16


[W 2024-06-24 01:36:21,808] Trial 0 failed with parameters: {'d_model': 128, 'nhead': 8, 'num_layers': 4, 'dim_feedforward': 128, 'lr': 0.0003224979421630959, 'batch_size': 16} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/lok/miniconda3/envs/coreml-tools/lib/python3.12/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/rx/5p9gp21s50b5rd91wjwp9c6c0000gn/T/ipykernel_35083/2880576364.py", line 44, in objective
    outputs = model(inputs)
              ^^^^^^^^^^^^^
  File "/Users/lok/miniconda3/envs/coreml-tools/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/lok/miniconda3/envs/coreml-tools/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return fo

KeyboardInterrupt: 