<h1> Import Libraries </h1>
We import all the necessary libraries, including Optuna, PyTorch, and other utilities.

In [73]:
import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import time
from tqdm import tqdm

# Ensure reproducibility
torch.manual_seed(0)
np.random.seed(0)


<h1> Read Data </h1>
We read the data saved in `data_processing.ipynb`.

In [74]:
# Load data
X  = np.load('../data/training_inputs_cart_numpy_binary.npy') # Stick input as cartesian coordinates.
# X  = np.load('../data/training_inputs_polar_numpy_binary.npy') # Stick inputs as polar coordinates.

# Load labels
y  = np.load('../data/labes_is_sheik_numpy_binary.npy')

# Print shape to make sure we have what we want.
print(X.shape)
print(y.shape)

(48982, 9, 900)
(48982,)


<h1> Data Splitting </h1>

In [75]:
# Split data into training + validation and holdout sets
X_train_val, X_holdout, y_train_val, y_holdout = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Split training + validation set into separate training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, stratify=y_train_val, random_state=42)  # 0.25 * 0.8 = 0.2

<h1> Data Loader </h1>

In [76]:
# Convert arrays into tensors and create dataset objects
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
holdout_dataset = TensorDataset(torch.tensor(X_holdout, dtype=torch.float32), torch.tensor(y_holdout, dtype=torch.float32))

# Create data loaders
num_batches = 32 # Can be tuned
num_workers = 1 # Can be tuned

batch_size = X.shape[0] // num_batches  # Can be tuned
print(batch_size)
# batch_size = 64  # Can be tuned

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
holdout_loader = DataLoader(holdout_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

1530


In [77]:
def calculate_accuracy(outputs, labels):
    # Apply sigmoid and threshold at 0.5
    preds = torch.sigmoid(outputs) >= 0.5
    correct = (preds.squeeze().long() == labels.long()).float().sum()
    return correct / labels.shape[0]

<h1> Define the Customizable Network </h1>
We define the neural network architecture. We'll use Optuna to suggest hyperparameters for convolutional layers, optional ReLU activation, max pooling layers, and linear layers.

In [78]:
class CustomNet(nn.Module):
    def __init__(self, trial):
        super(CustomNet, self).__init__()

        # Fixed dropout rate (not tuned by Optuna)
        dropout_rate = 0.5

        # Convolutional layers setup
        self.conv_layers = nn.ModuleList()
        self.activations = []
        self.dropouts = nn.ModuleList()

        in_channels = 9  # Fixed input channel size
        for i in range(3):  # Three convolutional layers
            out_channels = trial.suggest_int(f"conv_{i}_out_channels", 16, 64)
            kernel_size = trial.suggest_int(f"conv_{i}_kernel_size", 3, 7, step=2)
            dilation = trial.suggest_int(f"conv_{i}_dilation", 1, 4)
            self.conv_layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, dilation=dilation))
            in_channels = out_channels  # Update in_channels for the next layer

            # Optional ReLU activation
            use_activation = trial.suggest_categorical(f"conv_{i}_activation", [True, False])
            self.activations.append(use_activation)

            # Add dropout after each activation
            self.dropouts.append(nn.Dropout(dropout_rate))

        # Optional max pooling layer
        self.use_pool1 = trial.suggest_categorical("use_pool1", [True, False])
        if self.use_pool1:
            self.pool1 = nn.MaxPool1d(kernel_size=trial.suggest_int("pool1_kernel", 2, 4),
                                      stride=trial.suggest_int("pool1_stride", 2, 4))

        # Optional additional convolutional layer
        self.use_conv4 = trial.suggest_categorical("use_conv4", [True, False])
        if self.use_conv4:
            self.conv4 = nn.Conv1d(in_channels, 
                                   trial.suggest_int("conv4_out_channels", 32, 128), 
                                   trial.suggest_int("conv4_kernel_size", 3, 7, step=2), 
                                   stride=trial.suggest_int("conv4_stride", 1, 2),
                                   dilation=trial.suggest_int("conv4_dilation", 1, 4))
            self.use_conv4_activation = trial.suggest_categorical("conv4_activation", [True, False])
            in_channels = self.conv4.out_channels  # Update in_channels in case this layer is used
            self.conv4_dropout = nn.Dropout(dropout_rate)  # Dropout after optional conv4

        # Optional second max pooling layer
        self.use_pool2 = trial.suggest_categorical("use_pool2", [True, False])
        if self.use_pool2:
            self.pool2 = nn.MaxPool1d(kernel_size=trial.suggest_int("pool2_kernel", 2, 4),
                                      stride=trial.suggest_int("pool2_stride", 2, 4))

        # Fully connected layers setup
        self.fc1 = nn.LazyLinear(trial.suggest_int("fc1_out_features", 64, 256))
        self.fc1_dropout = nn.Dropout(dropout_rate)  # Dropout after fc1
        self.fc2 = nn.Linear(self.fc1.out_features, trial.suggest_int("fc2_out_features", 32, 128))
        self.fc2_dropout = nn.Dropout(dropout_rate)  # Dropout after fc2
        self.fc3 = nn.Linear(self.fc2.out_features, 1)  # Output layer with 1 unit for binary classification

    def forward(self, x):
        # Apply convolutional layers with optional ReLU and fixed dropout
        for i, (conv_layer, dropout) in enumerate(zip(self.conv_layers, self.dropouts)):
            x = conv_layer(x)
            if self.activations[i]:
                x = F.relu(x)
            x = dropout(x)

        # Optional max pooling after conv layers
        if self.use_pool1:
            x = self.pool1(x)

        # Optional additional conv layer with optional ReLU and fixed dropout
        if self.use_conv4:
            x = self.conv4(x)
            if self.use_conv4_activation:
                x = F.relu(x)
            x = self.conv4_dropout(x)

        # Optional second max pooling layer
        if self.use_pool2:
            x = self.pool2(x)

        # Flatten for fully connected layers
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc1_dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc2_dropout(x)
        x = self.fc3(x)  # Output without activation for BCEWithLogitsLoss
        return x


<h1> Define the Objective Function </h1>
We define the objective function for Optuna, which involves training and validating the model with the suggested hyperparameters to minimize the validation loss.

In [79]:
def objective(trial):
    # Device configuration
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model with hyperparameters suggested by Optuna
    model = CustomNet(trial).to(device)

    # Load and prepare data (assuming X and y are already loaded)
    # Splitting, converting to TensorDataset, and DataLoader setup would go here

    # Define the optimizer and criterion
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCEWithLogitsLoss()

    def train_epoch(model, dataloader, optimizer, criterion):
        model.train()
        running_loss = 0.0
        running_accuracy = 0.0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
            
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy

    def validate_epoch(model, dataloader, criterion):
        model.eval()
        running_loss = 0.0
        running_accuracy = 0.0
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy

    def evaluate_holdout(model, dataloader, criterion):
        model.eval()  # Set model to evaluation mode
        running_loss = 0.0
        running_accuracy = 0.0
        with torch.no_grad():  # No gradients needed
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy
                
    # Early stopping parameters
    patience = 10  # Number of epochs to wait for improvement before stopping
    min_delta = 0.001  # Minimum change to qualify as an improvement
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # Training loop with early stopping
    epochs = 250
    for epoch in range(epochs):
        start_time = time.time()
        
        train_loss, train_accuracy = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, val_accuracy = validate_epoch(model, val_loader, criterion)
        
        end_time = time.time()
        epoch_duration = end_time - start_time

        # Early Stopping check
        if (val_loss + min_delta) < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # Print progress every 5 epochs
        if (epoch + 1) % 5 == 0:
            tqdm.write(f'Epoch {epoch+1}/{epochs} - Duration: {epoch_duration:.2f}s - Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f} - Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')

        # Check early stopping condition
        if epochs_no_improve >= patience:
            tqdm.write(f'Early stopping triggered at epoch {epoch + 1}')
            # holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
            # print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')
            break

    # Evaluate model on holdout set after training is complete (if necessary)
    holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
    print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')
    
    return best_val_loss


<h1> Define Callback Function </h1>
We define a callback function that will be called by the Optuna study after each trial. This function will check if the current trial has a better value than the previous best and, if so, will save its parameters.

In [80]:
def save_params_if_best(study, trial):
    if study.best_trial.number == trial.number:
        # Save the best parameters so far
        print(f"New best trial at trial {trial.number}:")
        print(f"  Value: {trial.value}")
        print("  Params: ")
        for key, value in trial.params.items():
            print(f"    {key}: {value}")
        print("\n")


<h1> Run the Optimization </h1>
We create an Optuna study and start the optimization process.

In [81]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, show_progress_bar=True, timeout=3600*3, callbacks=[save_params_if_best])

# Print the overall best hyperparameters
print("Best trial overall:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2024-02-14 13:17:27,326] A new study created in memory with name: no-name-7f21ee19-ca2d-47ea-83bd-94f8a4814ecd


  0%|          | 0/100 [00:00<?, ?it/s]



Epoch 5/250 - Duration: 4.20s - Training Loss: 0.6688, Accuracy: 0.5933 - Validation Loss: 0.6594, Accuracy: 0.5979
Epoch 10/250 - Duration: 4.19s - Training Loss: 0.5555, Accuracy: 0.7240 - Validation Loss: 0.6803, Accuracy: 0.6741
Epoch 15/250 - Duration: 4.20s - Training Loss: 0.5103, Accuracy: 0.7570 - Validation Loss: 0.6020, Accuracy: 0.7156
Epoch 20/250 - Duration: 4.20s - Training Loss: 0.4869, Accuracy: 0.7727 - Validation Loss: 0.7299, Accuracy: 0.6964
Epoch 25/250 - Duration: 4.22s - Training Loss: 0.4762, Accuracy: 0.7780 - Validation Loss: 0.6474, Accuracy: 0.7334
Early stopping triggered at epoch 25
Holdout Loss: 0.6258, Accuracy: 0.7333
[I 2024-02-14 13:19:14,326] Trial 0 finished with value: 0.6020458375627659 and parameters: {'conv_0_out_channels': 49, 'conv_0_kernel_size': 5, 'conv_0_dilation': 1, 'conv_0_activation': True, 'conv_1_out_channels': 42, 'conv_1_kernel_size': 7, 'conv_1_dilation': 2, 'conv_1_activation': True, 'conv_2_out_channels': 47, 'conv_2_kernel_siz

In [82]:
study.optimize(objective, n_trials=50, show_progress_bar=True, timeout=3600*3, callbacks=[save_params_if_best])

# Print the overall best hyperparameters
print("Best trial overall:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


  0%|          | 0/50 [00:00<?, ?it/s]

Epoch 5/250 - Duration: 4.24s - Training Loss: 0.6274, Accuracy: 0.6607 - Validation Loss: 0.5987, Accuracy: 0.6853
Epoch 10/250 - Duration: 4.34s - Training Loss: 0.5253, Accuracy: 0.7445 - Validation Loss: 0.5071, Accuracy: 0.7577
Epoch 15/250 - Duration: 4.37s - Training Loss: 0.4712, Accuracy: 0.7783 - Validation Loss: 0.4673, Accuracy: 0.7803
Epoch 20/250 - Duration: 4.48s - Training Loss: 0.4350, Accuracy: 0.8008 - Validation Loss: 0.4327, Accuracy: 0.8069
Epoch 25/250 - Duration: 4.43s - Training Loss: 0.3797, Accuracy: 0.8339 - Validation Loss: 0.3683, Accuracy: 0.8397
Epoch 30/250 - Duration: 4.42s - Training Loss: 0.3432, Accuracy: 0.8524 - Validation Loss: 0.3243, Accuracy: 0.8608
Epoch 35/250 - Duration: 4.33s - Training Loss: 0.3055, Accuracy: 0.8686 - Validation Loss: 0.3629, Accuracy: 0.8427
Epoch 40/250 - Duration: 4.36s - Training Loss: 0.2955, Accuracy: 0.8741 - Validation Loss: 0.2952, Accuracy: 0.8730
Epoch 45/250 - Duration: 4.39s - Training Loss: 0.2732, Accuracy: