<h1> Import Libraries </h1>
We import all the necessary libraries, including Optuna, PyTorch, and other utilities.

In [None]:
import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import time
from tqdm import tqdm
from prettytable import PrettyTable

# Ensure reproducibility
torch.manual_seed(0)
np.random.seed(0)

<h1> Read Data </h1>
We read the data saved in `data_processing.ipynb`.

In [None]:
# Load data
X  = np.load('../data/training_inputs_cart_numpy_binary_1024.npy') # Stick input as cartesian coordinates.
# X  = np.load('../data/training_inputs_polar_numpy_binary.npy') # Stick inputs as polar coordinates.
# X  = np.load('../data/training_inputs_cart_numpy_binary.npy') # Stick input as cartesian coordinates.
# X  = np.load('../data/training_inputs_polar_numpy_binary.npy') # Stick inputs as polar coordinates.
# Load labels
y  = np.load('../data/labes_is_sheik_numpy_binary_1024.npy')
# Load labels
# y  = np.load('../data/labes_is_sheik_numpy_binary.npy')
# Print shape to make sure we have what we want.
print(X.shape)
print(X)
print(y.shape)

<h1> Data Splitting </h1>

In [None]:
# Split data into training + validation and holdout sets
X_train_val, X_holdout, y_train_val, y_holdout = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Split training + validation set into separate training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, stratify=y_train_val, random_state=42)  # 0.25 * 0.8 = 0.2
print(y_holdout.shape)
print(y_train.shape)
print(y_val.shape)

<h1> Data Loader </h1>

In [None]:
# Convert arrays into tensors and create dataset objects
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
holdout_dataset = TensorDataset(torch.tensor(X_holdout, dtype=torch.float32), torch.tensor(y_holdout, dtype=torch.float32))

# Create data loaders
num_batches = 32 # Can be tuned
num_workers = 1 # Can be tuned

batch_size = X.shape[0] // num_batches  # Can be tuned
print(batch_size)
# batch_size = 64  # Can be tuned

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
holdout_loader = DataLoader(holdout_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [None]:
def calculate_accuracy(outputs, labels):
    # Apply sigmoid and threshold at 0.5
    epsilon = 10 ** -44
    preds = torch.sigmoid(outputs) >= 0.5
    correct = (preds.squeeze().long() == labels.long()).float().sum()
    return correct / labels.shape[0]

# class myLoss(torch.nn.Module):

#     def __init__(self, pos_weight=1):
#       super().__init__()
#       self.pos_weight = pos_weight

#     def forward(self, input, target):
#       epsilon = 10 ** -44
#       input = input.sigmoid().clamp(epsilon, 1 - epsilon)

#       my_bce_loss = -1 * (self.pos_weight * target * torch.log(input)
#                           + (1 - target) * torch.log(1 - input))
#       add_loss = (target - 0.5) ** 2 * 4
#       mean_loss = (my_bce_loss * add_loss).mean()
#       return mean_loss

<h1> Define the Customizable Network </h1>
We define the neural network architecture. We'll use Optuna to suggest hyperparameters for convolutional layers, optional ReLU activation, max pooling layers, and linear layers.

In [None]:
class CustomNet(nn.Module):
    def __init__(self, trial):
        super(CustomNet, self).__init__()

        # Fixed dropout rate (not tuned by Optuna)
        dropout_rate = 0.5

        # Convolutional layers setup
        self.conv_layers = nn.ModuleList()
        self.activations = []
        self.dropouts = nn.ModuleList()

        num_layers = trial.suggest_int(f"num_conv_layers", 3, 5)
        in_channels = 9  # Fixed input channel size

        
        
        ######################################################################################################
        # In length is 2 ** 10
        # Padding is set up so that the out length is always reduced by 1 / 2 ** out_length_reduction_exponent
        # The length of a kernel is: kernel + (dilation - 1) * (kernel_size - 1)
        # The max lenght of a kernel is 25 which is kernel_size = 7 and dilation = 4
        # The in lenght can never be less than 25
        # Since the in lenght is always a power of 2, the in lenght can be no less than 2 ** 5 = 32,
        # we need to make sure not to reduce the in lenght too much, we keep track of
        # how much we can still reduce the length by using length_reduction_power_left which is set to 5.
        ######################################################################################################
        length_reduction_exporent_remaining = 5
        in_length_exponent = 10
        for i in range(num_layers):  # Convolutional layers
            ###########################
            # In length is a power of 2
            ###########################
            out_channels = trial.suggest_int(f"conv_{i}_out_channels", 16, 128)
            # kernel_size = trial.suggest_int(f"conv_{i}_kernel_size", 3, 7, step=2)
            k = trial.suggest_int(f"conv_{i}_kernel_size_power", 1, 3)  # can safely change 3 to be anything
            kernel_size = 2 * k + 1
            dilation = trial.suggest_int(f"conv_{i}_dilation", 1, 4)
            out_length_reduction_exponent = trial.suggest_int(f"conv_{i}_out_length_reduction_exponent", 0, min(2,length_reduction_exporent_remaining))
            # Keep track of how much reducing we still can do
            length_reduction_exporent_remaining -= out_length_reduction_exponent
            in_length_exponent -= out_length_reduction_exponent
            # Set stride
            stride = 2 ** out_length_reduction_exponent
            # Padding is chosen so that out length is a power of 2
            # there is a floor in the formula. If we want to use more than 2 for out_length_reduction_exponent, we neen do caluclate the cases
            if (out_length_reduction_exponent == 2) and (((dilation * k) % 2) == 1):
                padding = dilation * k - 1
            else:
                padding = dilation * k
                
            self.conv_layers.append(nn.Conv1d(in_channels, out_channels, kernel_size,stride, padding, dilation=dilation))
            in_channels = out_channels  # Update in_channels for the next layer

            # Optional ReLU activation
            use_activation = trial.suggest_categorical(f"conv_{i}_activation", [True, False])
            self.activations.append(use_activation)

            # Add dropout after each activation
            self.dropouts.append(nn.Dropout(dropout_rate))

        
        # Max pooling layer
        # The kernel can be a power of two, up to the in lenght
        # In length of the output will be 2 ** out_length_exponent
        # and lenght can be 1, 2, 4, 8, 16, 32
        
        kernel_exponent = trial.suggest_int(f"maxpool_kernel_exponent",length_reduction_exporent_remaining , in_length_exponent)
        kernel_size = 2 ** kernel_exponent
        in_length_exponent -= kernel_exponent
        
        self.pool1 = nn.MaxPool1d(kernel_size=kernel_size)
        
        

        '''# Optional additional convolutional layer
        self.use_conv4 = trial.suggest_categorical("use_conv4", [True, False])
        if self.use_conv4:
            self.conv4 = nn.Conv1d(in_channels, 
                                   trial.suggest_int("conv4_out_channels", 32, 128), 
                                   trial.suggest_int("conv4_kernel_size", 3, 7, step=2), 
                                   stride=trial.suggest_int("conv4_stride", 1, 2),
                                   dilation=trial.suggest_int("conv4_dilation", 1, 4))
            self.use_conv4_activation = trial.suggest_categorical("conv4_activation", [True, False])
            in_channels = self.conv4.out_channels  # Update in_channels in case this layer is used
            self.conv4_dropout = nn.Dropout(dropout_rate)  # Dropout after optional conv4

        # Optional second max pooling layer
        self.use_pool2 = trial.suggest_categorical("use_pool2", [True, False])
        if self.use_pool2:
            self.pool2 = nn.MaxPool1d(kernel_size=trial.suggest_int("pool2_kernel", 2, 4),
                                      stride=trial.suggest_int("pool2_stride", 2, 4))
        '''
        # The length right now should be 2 ** in_length_exponent, so we can be exact in our first lineal layer
        self.fc1 = nn.Linear(out_channels * 2 ** in_length_exponent, trial.suggest_int("fc1_out_features", 64, 256))
        # self.fc1 = nn.LazyLinear(trial.suggest_int("fc1_out_features", 64, 256))
        self.fc1_dropout = nn.Dropout(dropout_rate)  # Dropout after fc1
        self.fc2 = nn.Linear(self.fc1.out_features, trial.suggest_int("fc2_out_features", 32, 128))
        self.fc2_dropout = nn.Dropout(dropout_rate)  # Dropout after fc2
        self.fc3 = nn.Linear(self.fc2.out_features, 1)  # Output layer with 1 unit for binary classification

    def forward(self, x):
        # Apply convolutional layers with optional ReLU and fixed dropout
        for i, (conv_layer, dropout) in enumerate(zip(self.conv_layers, self.dropouts)):
            x = conv_layer(x)
            if self.activations[i]:
                x = F.relu(x)
            x = dropout(x)

        # Optional max pooling after conv layers
        # if self.use_pool1:
        x = self.pool1(x)

        # Optional additional conv layer with optional ReLU and fixed dropout
        '''if self.use_conv4:
            x = self.conv4(x)
            if self.use_conv4_activation:
                x = F.relu(x)
            x = self.conv4_dropout(x)

        # Optional second max pooling layer
        if self.use_pool2:
            x = self.pool2(x)
        '''
        # Flatten for fully connected layers
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc1_dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc2_dropout(x)
        x = self.fc3(x)  # Output without activation for BCEWithLogitsLoss
        return x


In [None]:
# class CustomNet(nn.Module):
#     def __init__(self, trial):
#         super(CustomNet, self).__init__()

#         # Fixed dropout rate (not tuned by Optuna)
#         dropout_rate = 0.5

#         # Convolutional layers setup
#         self.conv_layers = nn.ModuleList()
#         self.activations = []
#         self.dropouts = nn.ModuleList()

#         num_layers = trial.suggest_int(f"num_conv_layers", 1, 5)
#         in_channels = 9  # Fixed input channel size

#         for i in range(num_layers):  # Convolutional layers
#             out_channels = trial.suggest_int(f"conv_{i}_out_channels", 16, 64)
#             kernel_size = trial.suggest_int(f"conv_{i}_kernel_size", 3, 7, step=2)
#             dilation = trial.suggest_int(f"conv_{i}_dilation", 1, 4)
#             stride = trial.suggest_int(f"conv_{i}_stride", 1, 4)
#             self.conv_layers.append(nn.Conv1d(in_channels, out_channels, kernel_size, stride, dilation=dilation))
#             in_channels = out_channels  # Update in_channels for the next layer

#             # Optional ReLU activation
#             use_activation = trial.suggest_categorical(f"conv_{i}_activation", [True, False])
#             self.activations.append(use_activation)

#             # Add dropout after each activation
#             self.dropouts.append(nn.Dropout(dropout_rate))

#         # Max pooling layer
#         #self.use_pool1 = trial.suggest_categorical("use_pool1", [True, False])
#         #if self.use_pool1:
#         self.pool1 = nn.MaxPool1d(kernel_size=trial.suggest_int("pool1_kernel", 2, 4),
#                                     stride=trial.suggest_int("pool1_stride", 2, 4))

#         '''# Optional additional convolutional layer
#         self.use_conv4 = trial.suggest_categorical("use_conv4", [True, False])
#         if self.use_conv4:
#             self.conv4 = nn.Conv1d(in_channels, 
#                                    trial.suggest_int("conv4_out_channels", 32, 128), 
#                                    trial.suggest_int("conv4_kernel_size", 3, 7, step=2), 
#                                    stride=trial.suggest_int("conv4_stride", 1, 2),
#                                    dilation=trial.suggest_int("conv4_dilation", 1, 4))
#             self.use_conv4_activation = trial.suggest_categorical("conv4_activation", [True, False])
#             in_channels = self.conv4.out_channels  # Update in_channels in case this layer is used
#             self.conv4_dropout = nn.Dropout(dropout_rate)  # Dropout after optional conv4

#         # Optional second max pooling layer
#         self.use_pool2 = trial.suggest_categorical("use_pool2", [True, False])
#         if self.use_pool2:
#             self.pool2 = nn.MaxPool1d(kernel_size=trial.suggest_int("pool2_kernel", 2, 4),
#                                       stride=trial.suggest_int("pool2_stride", 2, 4))
#         '''
#         # Fully connected layers setup
#         self.fc1 = nn.LazyLinear(trial.suggest_int("fc1_out_features", 64, 256))
#         self.fc1_dropout = nn.Dropout(dropout_rate)  # Dropout after fc1
#         self.fc2 = nn.Linear(self.fc1.out_features, trial.suggest_int("fc2_out_features", 32, 128))
#         self.fc2_dropout = nn.Dropout(dropout_rate)  # Dropout after fc2
#         self.fc3 = nn.Linear(self.fc2.out_features, 1)  # Output layer with 1 unit for binary classification

#     def forward(self, x):
#         # Apply convolutional layers with optional ReLU and fixed dropout
#         for i, (conv_layer, dropout) in enumerate(zip(self.conv_layers, self.dropouts)):
#             x = conv_layer(x)
#             if self.activations[i]:
#                 x = F.relu(x)
#             x = dropout(x)

#         # Optional max pooling after conv layers
#         # if self.use_pool1:
#         x = self.pool1(x)

#         # Optional additional conv layer with optional ReLU and fixed dropout
#         '''if self.use_conv4:
#             x = self.conv4(x)
#             if self.use_conv4_activation:
#                 x = F.relu(x)
#             x = self.conv4_dropout(x)

#         # Optional second max pooling layer
#         if self.use_pool2:
#             x = self.pool2(x)
#         '''
#         # Flatten for fully connected layers
#         x = torch.flatten(x, 1)
#         x = F.relu(self.fc1(x))
#         x = self.fc1_dropout(x)
#         x = F.relu(self.fc2(x))
#         x = self.fc2_dropout(x)
#         x = self.fc3(x)  # Output without activation for BCEWithLogitsLoss
#         return x


<h1> Define the Objective Function </h1>
We define the objective function for Optuna, which involves training and validating the model with the suggested hyperparameters to minimize the validation loss.

In [None]:
def objective(trial):
    # Device configuration
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model with hyperparameters suggested by Optuna
    model = CustomNet(trial).to(device)

    # Load and prepare data (assuming X and y are already loaded)
    # Splitting, converting to TensorDataset, and DataLoader setup would go here

    # Define the optimizer and criterion
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCEWithLogitsLoss()
    def train_epoch(model, dataloader, optimizer, criterion):
        model.train()
        running_loss = 0.0
        running_accuracy = 0.0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
            
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy

    def validate_epoch(model, dataloader, criterion):
        model.eval()
        running_loss = 0.0
        running_accuracy = 0.0
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                # loss = criterion(outputs.squeeze(), labels)
                loss = criterion(torch.sigmoid(outputs.squeeze()), labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy

    def evaluate_holdout(model, dataloader, criterion):
        model.eval()  # Set model to evaluation mode
        running_loss = 0.0
        running_accuracy = 0.0
        with torch.no_grad():  # No gradients needed
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy
                
    # Early stopping parameters
    patience = 10  # Number of epochs to wait for improvement before stopping
    min_delta = 0.001  # Minimum change to qualify as an improvement
    min_overfit = .2
    best_val_loss = float('inf')
    epochs_no_improve = 0
    epochs_overfit = 0

    # Training loop with early stopping
    epochs = 250
    # epochs = 2
    for epoch in range(epochs):
        start_time = time.time()
        
        train_loss, train_accuracy = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, val_accuracy = validate_epoch(model, val_loader, criterion)
        
        end_time = time.time()
        epoch_duration = end_time - start_time

        # Early Stopping check
        if (val_loss + min_delta) < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
        
        if abs(train_loss - val_loss) < min_overfit:
            epochs_overfit = 0
        else:
            epochs_overfit += 1

        # Print progress every 5 epochs
        if (epoch + 1) % 5 == 0:
            tqdm.write(f'Epoch {epoch+1}/{epochs} - Duration: {epoch_duration:.2f}s - Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f} - Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')

        # Check early stopping condition
        if (epochs_no_improve >= patience) or (epochs_overfit >= patience):
            tqdm.write(f'Early stopping triggered at epoch {epoch + 1}')
            # holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
            # print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')
            break

    # Evaluate model on holdout set after training is complete (if necessary)
    holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
    print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')
    
    return best_val_loss


In [None]:
# def objective(trial):
#     # Device configuration
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#     # Initialize the model with hyperparameters suggested by Optuna
#     model = CustomNet(trial).to(device)

#     # Load and prepare data (assuming X and y are already loaded)
#     # Splitting, converting to TensorDataset, and DataLoader setup would go here

#     # Define the optimizer and criterion
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#     criterion = nn.BCEWithLogitsLoss()

#     def train_epoch(model, dataloader, optimizer, criterion):
#         model.train()
#         running_loss = 0.0
#         running_accuracy = 0.0
#         for inputs, labels in dataloader:
#             inputs, labels = inputs.to(device), labels.to(device)
#             optimizer.zero_grad()
#             outputs = model(inputs)
#             loss = criterion(outputs.squeeze(), labels)
#             loss.backward()
#             optimizer.step()
            
#             running_loss += loss.item() * inputs.size(0)
#             running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
            
#         epoch_loss = running_loss / len(dataloader.dataset)
#         epoch_accuracy = running_accuracy / len(dataloader.dataset)
#         return epoch_loss, epoch_accuracy

#     def validate_epoch(model, dataloader, criterion):
#         model.eval()
#         running_loss = 0.0
#         running_accuracy = 0.0
#         with torch.no_grad():
#             for inputs, labels in dataloader:
#                 inputs, labels = inputs.to(device), labels.to(device)
#                 outputs = model(inputs)
#                 loss = criterion(outputs.squeeze(), labels)
                
#                 running_loss += loss.item() * inputs.size(0)
#                 running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
#         epoch_loss = running_loss / len(dataloader.dataset)
#         epoch_accuracy = running_accuracy / len(dataloader.dataset)
#         return epoch_loss, epoch_accuracy

#     def evaluate_holdout(model, dataloader, criterion):
#         model.eval()  # Set model to evaluation mode
#         running_loss = 0.0
#         running_accuracy = 0.0
#         with torch.no_grad():  # No gradients needed
#             for inputs, labels in dataloader:
#                 inputs, labels = inputs.to(device), labels.to(device)
#                 outputs = model(inputs)
#                 loss = criterion(outputs.squeeze(), labels)
                
#                 running_loss += loss.item() * inputs.size(0)
#                 running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
#         epoch_loss = running_loss / len(dataloader.dataset)
#         epoch_accuracy = running_accuracy / len(dataloader.dataset)
#         return epoch_loss, epoch_accuracy
                
#     # Early stopping parameters
#     patience = 10  # Number of epochs to wait for improvement before stopping
#     min_delta = 0.001  # Minimum change to qualify as an improvement
#     best_val_loss = float('inf')
#     epochs_no_improve = 0

#     # Training loop with early stopping
#     epochs = 250
#     for epoch in range(epochs):
#         start_time = time.time()
        
#         train_loss, train_accuracy = train_epoch(model, train_loader, optimizer, criterion)
#         val_loss, val_accuracy = validate_epoch(model, val_loader, criterion)
        
#         end_time = time.time()
#         epoch_duration = end_time - start_time

#         # Early Stopping check
#         if (val_loss + min_delta) < best_val_loss:
#             best_val_loss = val_loss
#             epochs_no_improve = 0
#         else:
#             epochs_no_improve += 1

#         # Print progress every 5 epochs
#         if (epoch + 1) % 5 == 0:
#             tqdm.write(f'Epoch {epoch+1}/{epochs} - Duration: {epoch_duration:.2f}s - Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f} - Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}')

#         # Check early stopping condition
#         if epochs_no_improve >= patience:
#             tqdm.write(f'Early stopping triggered at epoch {epoch + 1}')
#             # holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
#             # print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')
#             break

#     # Evaluate model on holdout set after training is complete (if necessary)
#     holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
#     print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')
    
#     return best_val_loss


<h1> Define Callback Function </h1>
We define a callback function that will be called by the Optuna study after each trial. This function will check if the current trial has a better value than the previous best and, if so, will save its parameters.

In [None]:
def save_params_if_best(study, trial):
    if study.best_trial.number == trial.number:
        # Save the best parameters so far
        print(f"New best trial at trial {trial.number}:")
        print(f"  Value: {trial.value}")
        print("  Params: ")
        for key, value in trial.params.items():
            print(f"    {key}: {value}")
        print("\n")


<h1> Run the Optimization </h1>
We create an Optuna study and then iterate the optimizer separately.

In [None]:
study = optuna.create_study(direction="minimize")

In [13]:

study.optimize(objective, n_trials=1, show_progress_bar=True, timeout=3600*3, callbacks=[save_params_if_best])

# Print the overall best hyperparameters
print("Best trial overall:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


In [None]:
def count_parameters(model):
    table = PrettyTable(['Modules', 'Parameters'])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f'Total Trainable Params: {total_params}')
    return total_param

count_parameters(CustomNet(trial))
