<h1> Import Libraries </h1>
We import all the necessary libraries, including Optuna, PyTorch, and other utilities.

In [1]:
# !conda install anaconda::mysql-python  

In [2]:
import optuna
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import time
from tqdm import tqdm
from prettytable import PrettyTable
import gc


import mysql.connector

# Ensure reproducibility
torch.manual_seed(0)
np.random.seed(0)

<h1> Read Data </h1>
We read the data saved in `data_processing.ipynb`.

In [3]:
# Load data
X  = np.load('../../data/training_inputs_cart_numpy_binary_1024.npy') # Stick input as cartesian coordinates.
# X  = np.load('../data/training_inputs_polar_numpy_binary.npy') # Stick inputs as polar coordinates.
# X  = np.load('../data/training_inputs_cart_numpy_binary.npy') # Stick input as cartesian coordinates.
# X  = np.load('../data/training_inputs_polar_numpy_binary.npy') # Stick inputs as polar coordinates.
# Load labels
y  = np.load('../../data/labes_is_sheik_numpy_binary_1024.npy')
# Load labels
# y  = np.load('../data/labes_is_sheik_numpy_binary.npy')
# Print shape to make sure we have what we want.
print(X.shape)
# print(X)
print(y.shape)

(42768, 9, 1024)
(42768,)


<h1> Data Splitting </h1>

In [4]:
# Split data into training + validation and holdout sets
X_train_val, X_holdout, y_train_val, y_holdout = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Split training + validation set into separate training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, stratify=y_train_val, random_state=42)  # 0.25 * 0.8 = 0.2
print(y_holdout.shape)
print(y_train.shape)
print(y_val.shape)

(8554,)
(25660,)
(8554,)


<h1> Data Loader </h1>

In [5]:
# Convert arrays into tensors and create dataset objects
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
holdout_dataset = TensorDataset(torch.tensor(X_holdout, dtype=torch.float32), torch.tensor(y_holdout, dtype=torch.float32))

# Create data loaders
num_batches = 32 # Can be tuned
num_workers = 1 # Can be tuned

batch_size = X.shape[0] // num_batches  # Can be tuned
print(batch_size)
# batch_size = 64  # Can be tuned

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
holdout_loader = DataLoader(holdout_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

1336


In [6]:
def calculate_accuracy(outputs, labels):
    # Apply sigmoid and threshold at 0.5
    # epsilon = 10 ** -44
    preds = torch.sigmoid(outputs) >= 0.5
    correct = (preds.squeeze().long() == labels.long()).float().sum()
    return correct / labels.shape[0]

# class myLoss(torch.nn.Module):

#     def __init__(self, pos_weight=1):
#       super().__init__()
#       self.pos_weight = pos_weight

#     def forward(self, input, target):
#       epsilon = 10 ** -44
#       input = input.sigmoid().clamp(epsilon, 1 - epsilon)

#       my_bce_loss = -1 * (self.pos_weight * target * torch.log(input)
#                           + (1 - target) * torch.log(1 - input))
#       add_loss = (target - 0.5) ** 2 * 4
#       mean_loss = (my_bce_loss * add_loss).mean()
#       return mean_loss

<h1> Define the Customizable Network </h1>
We define the neural network architecture. We'll use Optuna to suggest hyperparameters for convolutional layers, optional ReLU activation, max pooling layers, and linear layers.

In [7]:
class CustomNet(nn.Module):
    def __init__(self, trial):
        super(CustomNet, self).__init__()

        # Fixed dropout rate (not tuned by Optuna)
        dropout_rate = 0.5

        # Convolutional layers setup
        self.conv_layers = nn.ModuleList()
        self.activations = []
        self.poolings = []
        self.bns = nn.ModuleList()
        self.dropouts = nn.ModuleList()

        num_layers = trial.suggest_int(f"num_conv_layers", 3, 5)
        in_channels = 9  # Fixed input channel size

        
        
        ######################################################################################################
        # In length is 2 ** 10
        # Padding is set up so that the out length is always reduced by 1 / 2 ** out_length_reduction_exponent
        # The length of a kernel is: kernel + (dilation - 1) * (kernel_size - 1)
        # The max lenght of a kernel is 25 which is kernel_size = 7 and dilation = 4
        # The in lenght can never be less than 25
        # Since the in lenght is always a power of 2, the in lenght can be no less than 2 ** 5 = 32,
        # we need to make sure not to reduce the in lenght too much, we keep track of
        # how much we can still reduce the length by using length_reduction_power_left which is set to 5.
        ######################################################################################################
        length_reduction_exporent_remaining = 5
        in_length_exponent = 10
        for i in range(num_layers):  # Convolutional layers
            ###########################
            # In length is a power of 2
            ###########################
            if i == 0: 
                out_channels = trial.suggest_int(f"conv_{i}_out_channels", 9, 9 * 48, step = 9)
                groups = 9
            elif i == -1:
                out_channels = trial.suggest_int(f"conv_{i}_out_channels", 1, 256)
                groups = 1
            else:
                out_channels = trial.suggest_int(f"conv_{i}_out_channels", 1, 512)
                groups = 1
            # kernel_size = trial.suggest_int(f"conv_{i}_kernel_size", 3, 7, step=2)
            k = trial.suggest_int(f"conv_{i}_kernel_size_power", 1, 5)  # can safely change 5 to be anything
            kernel_size = 2 * k + 1
            dilation = trial.suggest_int(f"conv_{i}_dilation", 1, 4)
            out_length_reduction_exponent = trial.suggest_int(f"conv_{i}_out_length_reduction_exponent", 0, min(2,length_reduction_exporent_remaining))
            # conv_stride_length_exponent = trial.suggest_int(f"conv_{i}_stride_length_exponent", 0, out_length_reduction_exponent)
            conv_stride_length_exponent = out_length_reduction_exponent
            # Keep track of how much reducing we still can do
            length_reduction_exporent_remaining -= out_length_reduction_exponent
            in_length_exponent -= out_length_reduction_exponent
            # Set stride
            stride = 2 ** conv_stride_length_exponent
            # Padding is chosen so that out length is a power of 2
            # there is a floor in the formula. If we want to use more than 2 for out_length_reduction_exponent, we neen do caluclate the cases
            if (conv_stride_length_exponent == 2) and (((dilation * k) % 2) == 1):
                padding = dilation * k - 1
            else:
                padding = dilation * k
                
            self.conv_layers.append(nn.Conv1d(in_channels, out_channels, kernel_size,stride, padding, dilation, groups))
            in_channels = out_channels  # Update in_channels for the next layer

            if conv_stride_length_exponent < out_length_reduction_exponent:
                pooling_type = trial.suggest_int(f"layer_{i}_pooling_type", 0, 1)    # 1: max, 0: avg
                pool_kernal_size_exponent = out_length_reduction_exponent - conv_stride_length_exponent
                if pooling_type == 1:
                    self.poolings.append(nn.MaxPool1d(2 ** pool_kernal_size_exponent))
                else:
                    self.poolings.append(nn.AvgPool1d(2 ** pool_kernal_size_exponent))
            else:
                self.poolings.append(None)    #   No pooling in current layer

            
            # Optional Batch Normalization
            use_bn = trial.suggest_categorical(f"conv_{i}_bn", [True, False])
            if use_bn:
                self.bns.append(nn.BatchNorm1d(in_channels))
            else:
                self.bns.append(None)

            # Optional ReLU activation
            use_activation = trial.suggest_categorical(f"conv_{i}_activation", [True, False])
            self.activations.append(use_activation)

        
        # Max pooling layer
        # The kernel can be a power of two, up to the in lenght
        # In length of the output will be 2 ** out_length_exponent
        # and lenght can be 1, 2, 4, 8, 16, 32
        
        kernel_exponent = trial.suggest_int(f"maxpool_kernel_exponent",length_reduction_exporent_remaining , in_length_exponent)
        kernel_size = 2 ** kernel_exponent
        in_length_exponent -= kernel_exponent
        
        self.pool1 = nn.MaxPool1d(kernel_size=kernel_size)
        
        
        # The length right now should be 2 ** in_length_exponent, so we can be exact in our first lineal layer
        self.fc1 = nn.Linear(out_channels * 2 ** in_length_exponent, trial.suggest_int("fc1_out_features", 32, 256))
        # self.fc1 = nn.LazyLinear(trial.suggest_int("fc1_out_features", 64, 256))
        self.fc1_dropout = nn.Dropout(dropout_rate)  # Dropout after fc1
        self.fc2 = nn.Linear(self.fc1.out_features, trial.suggest_int("fc2_out_features", 32, 128))
        self.fc2_dropout = nn.Dropout(dropout_rate)  # Dropout after fc2
        self.fc3 = nn.Linear(self.fc2.out_features, 1)  # Output layer with 1 unit for binary classification

    def forward(self, x):
        # Apply convolutional layers with optional ReLU and fixed dropout
        for i, conv_layer in enumerate(self.conv_layers):
            x = conv_layer(x)
            if self.bns[i]:
                x = self.bns[i](x)
            if self.poolings[i]:
                x = self.poolings[i](x)
            if self.activations[i]:
                x = F.relu(x)            

        # Optional max pooling after conv layers
        # if self.use_pool1:
        x = self.pool1(x)

        # Flatten for fully connected layers
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc1_dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc2_dropout(x)
        x = self.fc3(x)  # Output without activation for BCEWithLogitsLoss
        return x


<h1> Define the Objective Function </h1>
We define the objective function for Optuna, which involves training and validating the model with the suggested hyperparameters to minimize the validation loss.

In [8]:
def objective(trial):
    # Device configuration
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model with hyperparameters suggested by Optuna
    model = CustomNet(trial).to(device)

    # print(f"Trial {trial.number}:")
    # print(model)

    # Load and prepare data (assuming X and y are already loaded)
    # Splitting, converting to TensorDataset, and DataLoader setup would go here

    # Define the optimizer and criterion
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    # optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)
    criterion = nn.BCEWithLogitsLoss()
    def train_epoch(model, dataloader, optimizer, criterion):
        model.train()
        running_loss = 0.0
        running_accuracy = 0.0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
            
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy

    def validate_epoch(model, dataloader, criterion):
        model.eval()
        running_loss = 0.0
        running_accuracy = 0.0
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                # loss = criterion(torch.sigmoid(outputs.squeeze()), labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy

    def evaluate_holdout(model, dataloader, criterion):
        model.eval()  # Set model to evaluation mode
        running_loss = 0.0
        running_accuracy = 0.0
        with torch.no_grad():  # No gradients needed
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_accuracy += calculate_accuracy(outputs, labels) * inputs.size(0)
                
        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_accuracy = running_accuracy / len(dataloader.dataset)
        return epoch_loss, epoch_accuracy
                
    # Training loop with early stopping and tqdm progress bar
    patience = 10
    best_val_loss = float('inf')
    epochs_no_improve = 0
    epochs_overfit = 0
    epochs = 250
    min_delta = 0.001
    min_overfit = .275
    min_overfit = .025

    # Initialize tqdm progress bar
    pbar = tqdm(total=epochs, desc="Epochs", position=0, leave=True)

    for epoch in range(epochs):
        train_loss, train_accuracy = train_epoch(model, train_loader, optimizer, criterion)
        val_loss, val_accuracy = validate_epoch(model, val_loader, criterion)
        
        # Early Stopping check and progress bar update
        if (val_loss + min_delta) < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if abs(train_loss - val_loss) < min_overfit:
            epochs_overfit = 0
        else:
            epochs_overfit += 1

        # Update progress bar
        pbar.set_postfix_str(f"Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")
        pbar.update(1)  # Move the progress bar by one epoch

        # Check early stopping condition
        if epochs_no_improve >= patience or epochs_overfit >= patience:
            # pbar.write(f'Early stopping triggered at epoch {epoch + 1}')
            pbar.close()  # Close the progress bar
            break

    # Evaluate model on holdout set after training is complete (if necessary)
    holdout_loss, holdout_accuracy = evaluate_holdout(model, holdout_loader, criterion)
    print(f'Holdout Loss: {holdout_loss:.4f}, Accuracy: {holdout_accuracy:.4f}')

    pbar.close()  # Ensure the progress bar is closed
    gc.collect()
    torch.cuda.empty_cache()
    return best_val_loss


<h1> Define Callback Function </h1>
We define a callback function that will be called by the Optuna study after each trial. This function will check if the current trial has a better value than the previous best and, if so, will save its parameters.

In [9]:
def save_params_if_best(study, trial):
    if study.best_trial.number == trial.number:
        # Save the best parameters so far
        print(f"New best trial at trial {trial.number}: {trial.value}")


<h1> Run the Optimization </h1>
We create an Optuna study and then iterate the optimizer separately.

In [10]:
import pymysql
from datetime import datetime

# Get the current date and time
current_datetime = datetime.now()

current_datetime_string = current_datetime.strftime("%Y-%m-%d %H:%M:%S ")

study = optuna.create_study(study_name = current_datetime_string + "Classical CNN",
                            direction="minimize",
                            storage = "mysql+pymysql://root:MomentusPigs@localhost:3306/optuna_trials")


# storage_url = "mysql+mysqlconnector://optuna_user:your_password@localhost/optuna_db"
# study = optuna.create_study(study_name="your_study_name", storage=storage_url, load_if_exists=True)
# storage_url = "mysql+pymysql://root:MomentusPigs@localhost:33060/optuna_trials"
# study = optuna.create_study(direction="minimize", storage=storage_url)

# !optuna-dashboard mysql+pymysql://root:MomentusPigs@localhost/optuna_trials


[I 2024-02-22 09:23:41,815] A new study created in RDB with name: 2024-02-22 09:23:41 Classical CNN


In [11]:
study.optimize(objective, n_trials=1000, show_progress_bar=True, timeout=3600*6, callbacks=[save_params_if_best])

# Print the overall best hyperparameters
print("Best trial overall:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


  0%|          | 0/1000 [00:00<?, ?it/s]

Epochs:   4%|▍         | 11/250 [00:48<17:24,  4.37s/it, Training Loss: 0.1725, Accuracy: 0.9347, Validation Loss: 0.4343, Accuracy: 0.8298]


Holdout Loss: nan, Accuracy: 0.8255
[I 2024-02-22 09:24:32,574] Trial 0 finished with value: 0.407128811781446 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 252, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 445, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 39, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': True, 'conv_3_out_channels': 55, 'conv_3_kernel_size_power': 1, 'conv_3_dilation': 1, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': True, 'conv_3_activation': False, 'maxpool_kernel_exponent': 1, 'fc1_out_features': 56, 'fc2_out_features': 102}. Best is trial 0 with value: 0.407128811781446.
New best trial at trial 0: 0.4071288117814

Epochs:  10%|█         | 25/250 [03:06<27:55,  7.45s/it, Training Loss: 0.3711, Accuracy: 0.8401, Validation Loss: 0.5230, Accuracy: 0.7497]


Holdout Loss: nan, Accuracy: 0.7510
[I 2024-02-22 09:27:41,109] Trial 1 finished with value: 0.5050050187172219 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 108, 'conv_0_kernel_size_power': 1, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 334, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': False, 'conv_2_out_channels': 292, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': False, 'conv_2_activation': False, 'conv_3_out_channels': 27, 'conv_3_kernel_size_power': 5, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 2, 'conv_3_bn': True, 'conv_3_activation': True, 'conv_4_out_channels': 242, 'conv_4_kernel_size_power': 3, 'conv_4_dilation': 2, 'conv_4_out_length_reduction_exponent': 0, 'conv_4_bn': False, 'conv_4_activation':

Epochs:   8%|▊         | 19/250 [01:23<16:58,  4.41s/it, Training Loss: 0.1005, Accuracy: 0.9629, Validation Loss: 0.8305, Accuracy: 0.7349]


Holdout Loss: nan, Accuracy: 0.7398
[I 2024-02-22 09:29:06,981] Trial 2 finished with value: 0.48589584187477974 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 153, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 168, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': False, 'conv_2_out_channels': 466, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': False, 'conv_2_activation': False, 'maxpool_kernel_exponent': 4, 'fc1_out_features': 212, 'fc2_out_features': 50}. Best is trial 0 with value: 0.407128811781446.


Epochs:   8%|▊         | 19/250 [01:50<22:27,  5.83s/it, Training Loss: 0.1767, Accuracy: 0.9306, Validation Loss: 0.4337, Accuracy: 0.8375]


Holdout Loss: nan, Accuracy: 0.8325
[I 2024-02-22 09:30:59,960] Trial 3 finished with value: 0.3943303496809186 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 135, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': False, 'conv_1_out_channels': 245, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': True, 'conv_1_activation': True, 'conv_2_out_channels': 297, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 220, 'fc2_out_features': 102}. Best is trial 3 with value: 0.3943303496809186.
New best trial at trial 3: 0.3943303496809186


Epochs:   5%|▌         | 13/250 [00:56<17:11,  4.35s/it, Training Loss: 0.1743, Accuracy: 0.9407, Validation Loss: 0.3990, Accuracy: 0.8500]


Holdout Loss: nan, Accuracy: 0.8479
[I 2024-02-22 09:31:58,751] Trial 4 finished with value: 0.3637929971889271 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 72, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 112, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 113, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': True, 'conv_2_activation': True, 'conv_3_out_channels': 81, 'conv_3_kernel_size_power': 1, 'conv_3_dilation': 2, 'conv_3_out_length_reduction_exponent': 1, 'conv_3_bn': True, 'conv_3_activation': True, 'conv_4_out_channels': 277, 'conv_4_kernel_size_power': 5, 'conv_4_dilation': 1, 'conv_4_out_length_reduction_exponent': 0, 'conv_4_bn': True, 'conv_4_activation': False,

Epochs:   9%|▉         | 23/250 [03:43<36:43,  9.71s/it, Training Loss: 0.3212, Accuracy: 0.8647, Validation Loss: 0.3817, Accuracy: 0.8389]


Holdout Loss: nan, Accuracy: 0.8425
[I 2024-02-22 09:35:44,502] Trial 5 finished with value: 0.356860128252033 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 270, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 300, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': True, 'conv_1_activation': True, 'conv_2_out_channels': 309, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': False, 'conv_2_activation': True, 'maxpool_kernel_exponent': 6, 'fc1_out_features': 165, 'fc2_out_features': 126}. Best is trial 5 with value: 0.356860128252033.
New best trial at trial 5: 0.356860128252033


Epochs:  10%|▉         | 24/250 [01:45<16:36,  4.41s/it, Training Loss: 0.2682, Accuracy: 0.8656, Validation Loss: 0.4359, Accuracy: 0.8296]


Holdout Loss: nan, Accuracy: 0.8325
[I 2024-02-22 09:37:32,438] Trial 6 finished with value: 0.3763441503159432 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 315, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 228, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 423, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': False, 'conv_2_activation': False, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 79, 'fc2_out_features': 52}. Best is trial 5 with value: 0.356860128252033.


Epochs:   5%|▍         | 12/250 [00:53<17:33,  4.43s/it, Training Loss: 0.1218, Accuracy: 0.9547, Validation Loss: 0.7781, Accuracy: 0.7503]


Holdout Loss: nan, Accuracy: 0.7485
[I 2024-02-22 09:38:27,674] Trial 7 finished with value: 0.5220643764264622 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 297, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': False, 'conv_1_out_channels': 503, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': False, 'conv_2_out_channels': 126, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': True, 'conv_2_activation': True, 'conv_3_out_channels': 291, 'conv_3_kernel_size_power': 5, 'conv_3_dilation': 4, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': True, 'conv_3_activation': True, 'conv_4_out_channels': 62, 'conv_4_kernel_size_power': 1, 'conv_4_dilation': 4, 'conv_4_out_length_reduction_exponent': 0, 'conv_4_bn': False, 'conv_4_activation': Fa

Epochs:  10%|█         | 26/250 [01:43<14:54,  3.99s/it, Training Loss: 0.5172, Accuracy: 0.7440, Validation Loss: 0.5616, Accuracy: 0.7097]


Holdout Loss: nan, Accuracy: 0.7119
[I 2024-02-22 09:40:13,557] Trial 8 finished with value: 0.5615853755195926 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 108, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': False, 'conv_1_out_channels': 8, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': True, 'conv_2_out_channels': 134, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'conv_3_out_channels': 2, 'conv_3_kernel_size_power': 1, 'conv_3_dilation': 2, 'conv_3_out_length_reduction_exponent': 1, 'conv_3_bn': True, 'conv_3_activation': True, 'conv_4_out_channels': 412, 'conv_4_kernel_size_power': 3, 'conv_4_dilation': 2, 'conv_4_out_length_reduction_exponent': 0, 'conv_4_bn': True, 'conv_4_activation': True, '

Epochs:   6%|▋         | 16/250 [01:10<17:14,  4.42s/it, Training Loss: 0.1985, Accuracy: 0.9190, Validation Loss: 0.4569, Accuracy: 0.8387]


Holdout Loss: nan, Accuracy: 0.8336
[I 2024-02-22 09:41:26,361] Trial 9 finished with value: 0.3462601960025883 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 279, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 206, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 499, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 55, 'conv_3_kernel_size_power': 1, 'conv_3_dilation': 2, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': True, 'conv_4_out_channels': 88, 'conv_4_kernel_size_power': 3, 'conv_4_dilation': 2, 'conv_4_out_length_reduction_exponent': 0, 'conv_4_bn': False, 'conv_4_activation': Fal

Epochs:  14%|█▍        | 36/250 [02:45<16:25,  4.61s/it, Training Loss: 0.1865, Accuracy: 0.9245, Validation Loss: 0.4846, Accuracy: 0.8237]


Holdout Loss: nan, Accuracy: 0.8222
[I 2024-02-22 09:44:14,224] Trial 10 finished with value: 0.2935780449910841 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 432, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 47, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 499, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 503, 'conv_3_kernel_size_power': 3, 'conv_3_dilation': 1, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 1, 'fc1_out_features': 233, 'fc2_out_features': 34}. Best is trial 10 with value: 0.2935780449910841.
New best trial at trial 10: 0.293578

Epochs:  10%|▉         | 24/250 [01:48<17:02,  4.52s/it, Training Loss: 0.6739, Accuracy: 0.5838, Validation Loss: 0.6952, Accuracy: 0.4797]


Holdout Loss: nan, Accuracy: 0.4820
[I 2024-02-22 09:46:04,981] Trial 11 finished with value: 0.657058093587957 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 432, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 9, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 507, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 503, 'conv_3_kernel_size_power': 3, 'conv_3_dilation': 1, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 1, 'fc1_out_features': 251, 'fc2_out_features': 33}. Best is trial 10 with value: 0.2935780449910841.


Epochs:  12%|█▏        | 30/250 [02:16<16:43,  4.56s/it, Training Loss: 0.1362, Accuracy: 0.9487, Validation Loss: 0.4080, Accuracy: 0.8596]


Holdout Loss: nan, Accuracy: 0.8522
[I 2024-02-22 09:48:23,962] Trial 12 finished with value: 0.3137822960104261 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 405, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 105, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 385, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 510, 'conv_3_kernel_size_power': 3, 'conv_3_dilation': 1, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 2, 'fc1_out_features': 256, 'fc2_out_features': 36}. Best is trial 10 with value: 0.2935780449910841.


Epochs:  11%|█         | 28/250 [02:04<16:29,  4.46s/it, Training Loss: 0.1689, Accuracy: 0.9337, Validation Loss: 0.3888, Accuracy: 0.8673]


Holdout Loss: nan, Accuracy: 0.8609
[I 2024-02-22 09:50:30,928] Trial 13 finished with value: 0.33586381057001646 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 432, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 91, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 391, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 510, 'conv_3_kernel_size_power': 3, 'conv_3_dilation': 1, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 2, 'fc1_out_features': 248, 'fc2_out_features': 33}. Best is trial 10 with value: 0.2935780449910841.


Epochs:   7%|▋         | 18/250 [01:21<17:28,  4.52s/it, Training Loss: 0.6932, Accuracy: 0.5027, Validation Loss: 0.6932, Accuracy: 0.5000]


Holdout Loss: nan, Accuracy: 0.5001
[I 2024-02-22 09:51:54,447] Trial 14 finished with value: 0.6788758474977211 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 369, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 84, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 370, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 376, 'conv_3_kernel_size_power': 4, 'conv_3_dilation': 1, 'conv_3_out_length_reduction_exponent': 1, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 1, 'fc1_out_features': 194, 'fc2_out_features': 41}. Best is trial 10 with value: 0.2935780449910841.


Epochs:  10%|▉         | 24/250 [01:46<16:39,  4.42s/it, Training Loss: 0.0419, Accuracy: 0.9855, Validation Loss: 0.4194, Accuracy: 0.8681]


Holdout Loss: nan, Accuracy: 0.8658
[I 2024-02-22 09:53:42,707] Trial 15 finished with value: 0.2882488577450049 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 378, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 141, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 215, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 407, 'conv_3_kernel_size_power': 3, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 254, 'fc2_out_features': 78}. Best is trial 15 with value: 0.2882488577450049.
New best trial at trial 15: 0.28824

Epochs:   8%|▊         | 20/250 [01:43<19:46,  5.16s/it, Training Loss: 0.1580, Accuracy: 0.9387, Validation Loss: 0.3550, Accuracy: 0.8684]


Holdout Loss: nan, Accuracy: 0.8652
[I 2024-02-22 09:55:28,037] Trial 16 finished with value: 0.3037827208286305 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 360, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 149, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 209, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 385, 'conv_3_kernel_size_power': 2, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 2, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 225, 'fc2_out_features': 81}. Best is trial 15 with value: 0.2882488577450049.


Epochs:   4%|▍         | 10/250 [00:41<16:36,  4.15s/it, Training Loss: 0.3105, Accuracy: 0.8660, Validation Loss: 0.3851, Accuracy: 0.8366]


Holdout Loss: nan, Accuracy: 0.8405
[I 2024-02-22 09:56:11,692] Trial 17 finished with value: 0.38510771468194643 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 66, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 211, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 411, 'conv_3_kernel_size_power': 4, 'conv_3_dilation': 4, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 4, 'fc1_out_features': 186, 'fc2_out_features': 92}. Best is trial 15 with value: 0.2882488577450049.


Epochs:   7%|▋         | 18/250 [01:09<14:58,  3.87s/it, Training Loss: 0.0488, Accuracy: 0.9843, Validation Loss: 1.3070, Accuracy: 0.6888]


Holdout Loss: nan, Accuracy: 0.6936
[I 2024-02-22 09:57:23,429] Trial 18 finished with value: 0.5309153775592427 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 9, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 42, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 211, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 168, 'conv_3_kernel_size_power': 2, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 133, 'fc2_out_features': 71}. Best is trial 15 with value: 0.2882488577450049.


Epochs:   8%|▊         | 19/250 [01:21<16:36,  4.31s/it, Training Loss: 0.2167, Accuracy: 0.9124, Validation Loss: 0.6042, Accuracy: 0.7399]


Holdout Loss: nan, Accuracy: 0.7440
[I 2024-02-22 09:58:47,473] Trial 19 finished with value: 0.3364370347030926 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 342, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 188, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 38, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 1, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 231, 'fc2_out_features': 93}. Best is trial 15 with value: 0.2882488577450049.


Epochs:   9%|▉         | 22/250 [01:38<17:03,  4.49s/it, Training Loss: 0.1078, Accuracy: 0.9596, Validation Loss: 0.5136, Accuracy: 0.8341]


Holdout Loss: nan, Accuracy: 0.8301
[I 2024-02-22 10:00:28,328] Trial 20 finished with value: 0.31632866323872205 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 387, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 2, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 299, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 2, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 246, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 304, 'conv_3_kernel_size_power': 4, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 0, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 1, 'fc1_out_features': 186, 'fc2_out_features': 59}. Best is trial 15 with value: 0.2882488577450049.


Epochs:   8%|▊         | 20/250 [01:40<19:15,  5.02s/it, Training Loss: 0.1368, Accuracy: 0.9470, Validation Loss: 0.4213, Accuracy: 0.8502]


Holdout Loss: nan, Accuracy: 0.8504
[I 2024-02-22 10:02:10,940] Trial 21 finished with value: 0.3188249370553991 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 342, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 152, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 170, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 411, 'conv_3_kernel_size_power': 2, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 2, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 233, 'fc2_out_features': 78}. Best is trial 15 with value: 0.2882488577450049.


Epochs:   9%|▉         | 22/250 [01:53<19:40,  5.18s/it, Training Loss: 0.1895, Accuracy: 0.9224, Validation Loss: 0.4747, Accuracy: 0.8292]


Holdout Loss: nan, Accuracy: 0.8263
[I 2024-02-22 10:04:06,956] Trial 22 finished with value: 0.31845579030693816 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 360, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 4, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 139, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 248, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 450, 'conv_3_kernel_size_power': 2, 'conv_3_dilation': 4, 'conv_3_out_length_reduction_exponent': 2, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 4, 'fc1_out_features': 231, 'fc2_out_features': 78}. Best is trial 15 with value: 0.2882488577450049.


Epochs:  13%|█▎        | 33/250 [02:47<18:20,  5.07s/it, Training Loss: 0.1222, Accuracy: 0.9514, Validation Loss: 0.4116, Accuracy: 0.8750]


Holdout Loss: nan, Accuracy: 0.8747
[I 2024-02-22 10:06:56,447] Trial 23 finished with value: 0.26313375396123134 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 396, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 45, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 1, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 333, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 0, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 348, 'conv_3_kernel_size_power': 2, 'conv_3_dilation': 3, 'conv_3_out_length_reduction_exponent': 1, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 3, 'fc1_out_features': 207, 'fc2_out_features': 89}. Best is trial 23 with value: 0.26313375396123134.
New best trial at trial 23: 0.2631

Epochs:   9%|▉         | 22/250 [03:09<32:46,  8.63s/it, Training Loss: 0.1161, Accuracy: 0.9592, Validation Loss: 0.3038, Accuracy: 0.8916]


Holdout Loss: nan, Accuracy: 0.8971
[I 2024-02-22 10:10:08,723] Trial 24 finished with value: 0.2666603561341832 and parameters: {'num_conv_layers': 4, 'conv_0_out_channels': 414, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 45, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 344, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 340, 'conv_3_kernel_size_power': 3, 'conv_3_dilation': 2, 'conv_3_out_length_reduction_exponent': 1, 'conv_3_bn': False, 'conv_3_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 208, 'fc2_out_features': 92}. Best is trial 23 with value: 0.26313375396123134.


Epochs:   8%|▊         | 19/250 [02:32<30:59,  8.05s/it, Training Loss: 0.0989, Accuracy: 0.9647, Validation Loss: 0.5138, Accuracy: 0.8443]


Holdout Loss: nan, Accuracy: 0.8529
[I 2024-02-22 10:12:44,192] Trial 25 finished with value: 0.3093324301675962 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 396, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': True, 'conv_0_activation': True, 'conv_1_out_channels': 38, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': True, 'conv_1_activation': False, 'conv_2_out_channels': 337, 'conv_2_kernel_size_power': 2, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': False, 'conv_2_activation': True, 'conv_3_out_channels': 337, 'conv_3_kernel_size_power': 2, 'conv_3_dilation': 2, 'conv_3_out_length_reduction_exponent': 1, 'conv_3_bn': False, 'conv_3_activation': False, 'conv_4_out_channels': 491, 'conv_4_kernel_size_power': 5, 'conv_4_dilation': 4, 'conv_4_out_length_reduction_exponent': 2, 'conv_4_bn': True, 'conv_4_activation': T

Epochs:  13%|█▎        | 32/250 [03:01<20:34,  5.66s/it, Training Loss: 0.1230, Accuracy: 0.9648, Validation Loss: 0.2278, Accuracy: 0.9207]


Holdout Loss: nan, Accuracy: 0.9188
[I 2024-02-22 10:15:47,507] Trial 26 finished with value: 0.19172867900903798 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 216, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 117, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 338, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 179, 'fc2_out_features': 104}. Best is trial 26 with value: 0.19172867900903798.
New best trial at trial 26: 0.19172867900903798


Epochs:  14%|█▍        | 36/250 [02:50<16:51,  4.73s/it, Training Loss: 0.3837, Accuracy: 0.8354, Validation Loss: 0.2924, Accuracy: 0.9058]


Holdout Loss: nan, Accuracy: 0.9046
[I 2024-02-22 10:18:39,807] Trial 27 finished with value: 0.2516142749984269 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 216, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 11, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 341, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 179, 'fc2_out_features': 109}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  10%|█         | 26/250 [02:06<18:10,  4.87s/it, Training Loss: 0.2932, Accuracy: 0.8756, Validation Loss: 0.2117, Accuracy: 0.9207]


Holdout Loss: nan, Accuracy: 0.9221
[I 2024-02-22 10:20:48,474] Trial 28 finished with value: 0.21172690082159393 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 9, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 437, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 173, 'fc2_out_features': 112}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  27%|██▋       | 67/250 [05:19<14:31,  4.76s/it, Training Loss: 0.2557, Accuracy: 0.9177, Validation Loss: 0.2624, Accuracy: 0.9255]


Holdout Loss: nan, Accuracy: 0.9240
[I 2024-02-22 10:26:09,798] Trial 29 finished with value: 0.25068028888915334 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 216, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 1, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 438, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 143, 'fc2_out_features': 110}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  10%|█         | 25/250 [03:09<28:23,  7.57s/it, Training Loss: 0.3404, Accuracy: 0.8619, Validation Loss: 0.3049, Accuracy: 0.9031]


Holdout Loss: nan, Accuracy: 0.9076
[I 2024-02-22 10:29:21,456] Trial 30 finished with value: 0.304863548220277 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 189, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 389, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 432, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 32, 'fc2_out_features': 128}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  13%|█▎        | 33/250 [02:38<17:22,  4.80s/it, Training Loss: 0.3614, Accuracy: 0.8397, Validation Loss: 0.4592, Accuracy: 0.7269]


Holdout Loss: nan, Accuracy: 0.7255
[I 2024-02-22 10:32:02,056] Trial 31 finished with value: 0.30428863300573666 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 234, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 1, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 425, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 139, 'fc2_out_features': 109}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   5%|▌         | 13/250 [01:10<21:25,  5.42s/it, Training Loss: 0.4004, Accuracy: 0.8420, Validation Loss: 0.3215, Accuracy: 0.8893]


Holdout Loss: nan, Accuracy: 0.8920
[I 2024-02-22 10:33:14,718] Trial 32 finished with value: 0.3215143631935566 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 180, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 81, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 455, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 173, 'fc2_out_features': 118}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 17/250 [01:26<19:45,  5.09s/it, Training Loss: 0.5092, Accuracy: 0.7569, Validation Loss: 0.4300, Accuracy: 0.8339]


Holdout Loss: nan, Accuracy: 0.8410
[I 2024-02-22 10:34:43,340] Trial 33 finished with value: 0.4300211688082492 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 243, 'conv_0_kernel_size_power': 1, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 20, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 399, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 122, 'fc2_out_features': 102}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   8%|▊         | 19/250 [01:36<19:33,  5.08s/it, Training Loss: 0.2179, Accuracy: 0.9150, Validation Loss: 0.4191, Accuracy: 0.8229]


Holdout Loss: nan, Accuracy: 0.8234
[I 2024-02-22 10:36:21,972] Trial 34 finished with value: 0.40952160141304716 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 153, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 3, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 119, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 356, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 153, 'fc2_out_features': 107}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 18/250 [01:38<21:11,  5.48s/it, Training Loss: 0.3015, Accuracy: 0.8767, Validation Loss: 0.2413, Accuracy: 0.9095]


Holdout Loss: nan, Accuracy: 0.9072
[I 2024-02-22 10:38:02,767] Trial 35 finished with value: 0.24134641014751998 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 216, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 70, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 469, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 179, 'fc2_out_features': 119}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   5%|▍         | 12/250 [00:55<18:29,  4.66s/it, Training Loss: 0.6932, Accuracy: 0.4995, Validation Loss: 0.6931, Accuracy: 0.5000]


Holdout Loss: nan, Accuracy: 0.5000
[I 2024-02-22 10:39:00,797] Trial 36 finished with value: 0.6915599882728116 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 171, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 62, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 463, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 105, 'fc2_out_features': 120}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   8%|▊         | 21/250 [02:49<30:46,  8.06s/it, Training Loss: 0.1219, Accuracy: 0.9536, Validation Loss: 0.3273, Accuracy: 0.8964]


Holdout Loss: nan, Accuracy: 0.9003
[I 2024-02-22 10:41:52,567] Trial 37 finished with value: 0.2106134652819677 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 261, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 385, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 471, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 150, 'fc2_out_features': 122}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   8%|▊         | 20/250 [02:00<23:06,  6.03s/it, Training Loss: 0.2340, Accuracy: 0.9090, Validation Loss: 0.9629, Accuracy: 0.7017]


Holdout Loss: nan, Accuracy: 0.7090
[I 2024-02-22 10:43:55,389] Trial 38 finished with value: 0.4188006130548624 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 126, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 368, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 479, 'conv_2_kernel_size_power': 1, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 157, 'fc2_out_features': 122}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   9%|▉         | 22/250 [03:24<35:17,  9.29s/it, Training Loss: 0.1473, Accuracy: 0.9516, Validation Loss: 0.2291, Accuracy: 0.9115]


Holdout Loss: nan, Accuracy: 0.9145
[I 2024-02-22 10:47:22,285] Trial 39 finished with value: 0.22912995568468997 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 261, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 438, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 273, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 123, 'fc2_out_features': 98}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 17/250 [02:39<36:21,  9.36s/it, Training Loss: 0.1616, Accuracy: 0.9419, Validation Loss: 0.4010, Accuracy: 0.8502]


Holdout Loss: nan, Accuracy: 0.8490
[I 2024-02-22 10:50:04,047] Trial 40 finished with value: 0.36329415462160325 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 261, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 446, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 269, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 87, 'fc2_out_features': 103}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  12%|█▏        | 31/250 [05:07<36:08,  9.90s/it, Training Loss: 0.1448, Accuracy: 0.9604, Validation Loss: 0.2420, Accuracy: 0.9293]


Holdout Loss: nan, Accuracy: 0.9295
[I 2024-02-22 10:55:13,654] Trial 41 finished with value: 0.21536328839188232 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 297, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 445, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 304, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 122, 'fc2_out_features': 115}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   8%|▊         | 20/250 [03:15<37:27,  9.77s/it, Training Loss: 0.0933, Accuracy: 0.9519, Validation Loss: 0.4438, Accuracy: 0.8658]


Holdout Loss: nan, Accuracy: 0.8653
[I 2024-02-22 10:58:31,639] Trial 42 finished with value: 0.21869909485659644 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 288, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 446, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 304, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 126, 'fc2_out_features': 98}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   9%|▉         | 23/250 [02:53<28:35,  7.56s/it, Training Loss: 0.1153, Accuracy: 0.9592, Validation Loss: 0.4358, Accuracy: 0.8373]


Holdout Loss: nan, Accuracy: 0.8472
[I 2024-02-22 11:01:27,877] Trial 43 finished with value: 0.2367402923763537 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 297, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 495, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 3, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 109, 'fc2_out_features': 112}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  12%|█▏        | 30/250 [04:49<35:23,  9.65s/it, Training Loss: 0.3750, Accuracy: 0.7366, Validation Loss: 0.2002, Accuracy: 0.9230]


Holdout Loss: nan, Accuracy: 0.9224
[I 2024-02-22 11:06:19,946] Trial 44 finished with value: 0.1931955382935154 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 288, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 464, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 306, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 95, 'fc2_out_features': 114}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   6%|▋         | 16/250 [02:37<38:29,  9.87s/it, Training Loss: 0.4753, Accuracy: 0.6949, Validation Loss: 0.4020, Accuracy: 0.8462]


Holdout Loss: nan, Accuracy: 0.8500
[I 2024-02-22 11:09:00,465] Trial 45 finished with value: 0.37632932746162306 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 315, 'conv_0_kernel_size_power': 1, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 476, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 315, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 89, 'fc2_out_features': 124}. Best is trial 26 with value: 0.19172867900903798.


Epochs:  10%|█         | 25/250 [04:40<42:03, 11.21s/it, Training Loss: 0.1216, Accuracy: 0.9255, Validation Loss: 0.6068, Accuracy: 0.8056]


Holdout Loss: nan, Accuracy: 0.8094
[I 2024-02-22 11:13:43,496] Trial 46 finished with value: 0.2438944963921669 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 315, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 413, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 409, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 6, 'fc1_out_features': 158, 'fc2_out_features': 114}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   6%|▌         | 14/250 [01:51<31:13,  7.94s/it, Training Loss: 0.5421, Accuracy: 0.7062, Validation Loss: 0.5731, Accuracy: 0.7195]


Holdout Loss: nan, Accuracy: 0.7228
[I 2024-02-22 11:15:36,988] Trial 47 finished with value: 0.42971982253135743 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 243, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 336, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 376, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 74, 'fc2_out_features': 115}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   8%|▊         | 20/250 [02:34<29:36,  7.72s/it, Training Loss: 0.1732, Accuracy: 0.9158, Validation Loss: 0.2771, Accuracy: 0.8946]


Holdout Loss: nan, Accuracy: 0.8971
[I 2024-02-22 11:18:13,818] Trial 48 finished with value: 0.25553691737669104 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 333, 'conv_0_kernel_size_power': 1, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 253, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 283, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 67, 'fc2_out_features': 125}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   9%|▉         | 22/250 [02:08<22:11,  5.84s/it, Training Loss: 0.2565, Accuracy: 0.9146, Validation Loss: 0.4281, Accuracy: 0.8166]


Holdout Loss: nan, Accuracy: 0.8189
[I 2024-02-22 11:20:24,425] Trial 49 finished with value: 0.4209154893478445 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 279, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 282, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 318, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 92, 'fc2_out_features': 104}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 17/250 [01:41<23:10,  5.97s/it, Training Loss: 0.2002, Accuracy: 0.9199, Validation Loss: 0.4128, Accuracy: 0.8384]


Holdout Loss: nan, Accuracy: 0.8370
[I 2024-02-22 11:22:08,033] Trial 50 finished with value: 0.30889268888751564 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 189, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 370, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 4, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 364, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 2, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 6, 'fc1_out_features': 166, 'fc2_out_features': 114}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 18/250 [02:46<35:43,  9.24s/it, Training Loss: 0.1155, Accuracy: 0.9569, Validation Loss: 0.4943, Accuracy: 0.8398]


Holdout Loss: nan, Accuracy: 0.8463
[I 2024-02-22 11:24:56,910] Trial 51 finished with value: 0.2813913028235187 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 288, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 461, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 303, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 131, 'fc2_out_features': 96}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   5%|▍         | 12/250 [12:04<3:59:36, 60.41s/it, Training Loss: 0.2657, Accuracy: 0.9122, Validation Loss: 0.3021, Accuracy: 0.8880]


Holdout Loss: nan, Accuracy: 0.8880
[I 2024-02-22 11:37:04,640] Trial 52 finished with value: 0.3020884632395338 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 297, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 512, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 295, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 149, 'fc2_out_features': 99}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   4%|▍         | 11/250 [01:48<39:15,  9.86s/it, Training Loss: 0.6932, Accuracy: 0.5000, Validation Loss: 0.6931, Accuracy: 0.5004]


Holdout Loss: nan, Accuracy: 0.5006
[I 2024-02-22 11:38:55,666] Trial 53 finished with value: 0.6936436259136151 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 270, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 421, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 487, 'conv_2_kernel_size_power': 3, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 116, 'fc2_out_features': 106}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 18/250 [02:32<32:50,  8.49s/it, Training Loss: 0.1421, Accuracy: 0.9443, Validation Loss: 0.2674, Accuracy: 0.9096]


Holdout Loss: nan, Accuracy: 0.9147
[I 2024-02-22 11:41:30,957] Trial 54 finished with value: 0.2530124212972522 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 234, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 2, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 479, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 3, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 232, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 4, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 133, 'fc2_out_features': 118}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 17/250 [02:19<31:53,  8.21s/it, Training Loss: 0.0621, Accuracy: 0.9772, Validation Loss: 0.2050, Accuracy: 0.9285]


Holdout Loss: nan, Accuracy: 0.9261
[I 2024-02-22 11:43:53,027] Trial 55 finished with value: 0.20498365605925464 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 252, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 410, 'conv_1_kernel_size_power': 1, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 167, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 99, 'fc2_out_features': 123}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 18/250 [01:38<21:03,  5.45s/it, Training Loss: 0.0976, Accuracy: 0.9646, Validation Loss: 0.8376, Accuracy: 0.7916]


Holdout Loss: nan, Accuracy: 0.7888
[I 2024-02-22 11:45:33,220] Trial 56 finished with value: 0.28298250926357577 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 198, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 401, 'conv_1_kernel_size_power': 2, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 89, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 100, 'fc2_out_features': 127}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   6%|▌         | 14/250 [01:31<25:42,  6.54s/it, Training Loss: 0.1029, Accuracy: 0.9618, Validation Loss: 0.2464, Accuracy: 0.9177]


Holdout Loss: nan, Accuracy: 0.9210
[I 2024-02-22 11:47:06,991] Trial 57 finished with value: 0.20063657355567743 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 162, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 345, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 172, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 113, 'fc2_out_features': 122}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 18/250 [01:37<20:52,  5.40s/it, Training Loss: 0.1291, Accuracy: 0.9564, Validation Loss: 0.2515, Accuracy: 0.9010]


Holdout Loss: nan, Accuracy: 0.9034
[I 2024-02-22 11:48:46,292] Trial 58 finished with value: 0.25150877297941926 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 90, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 336, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 165, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 7, 'fc1_out_features': 65, 'fc2_out_features': 123}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   7%|▋         | 17/250 [01:42<23:30,  6.05s/it, Training Loss: 0.1863, Accuracy: 0.9136, Validation Loss: 0.3817, Accuracy: 0.8512]


Holdout Loss: nan, Accuracy: 0.8523
[I 2024-02-22 11:50:31,445] Trial 59 finished with value: 0.2892600117037445 and parameters: {'num_conv_layers': 5, 'conv_0_out_channels': 162, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 213, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 183, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'conv_3_out_channels': 197, 'conv_3_kernel_size_power': 5, 'conv_3_dilation': 4, 'conv_3_out_length_reduction_exponent': 2, 'conv_3_bn': True, 'conv_3_activation': True, 'conv_4_out_channels': 218, 'conv_4_kernel_size_power': 1, 'conv_4_dilation': 3, 'conv_4_out_length_reduction_exponent': 1, 'conv_4_bn': True, 'conv_4_activation': F

Epochs:   5%|▍         | 12/250 [00:53<17:36,  4.44s/it, Training Loss: 0.1542, Accuracy: 0.9411, Validation Loss: 0.3700, Accuracy: 0.8700]


Holdout Loss: nan, Accuracy: 0.8708
[I 2024-02-22 11:51:26,716] Trial 60 finished with value: 0.3104354939032638 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 126, 'conv_0_kernel_size_power': 4, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 360, 'conv_1_kernel_size_power': 5, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 124, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 6, 'fc1_out_features': 194, 'fc2_out_features': 128}. Best is trial 26 with value: 0.19172867900903798.


Epochs:   8%|▊         | 19/250 [02:51<34:50,  9.05s/it, Training Loss: 0.1117, Accuracy: 0.9591, Validation Loss: 0.1753, Accuracy: 0.9314]


Holdout Loss: nan, Accuracy: 0.9311
[I 2024-02-22 11:54:21,171] Trial 61 finished with value: 0.1753348012602661 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 252, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 428, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 233, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 115, 'fc2_out_features': 117}. Best is trial 61 with value: 0.1753348012602661.
New best trial at trial 61: 0.1753348012602661


Epochs:   7%|▋         | 18/250 [02:47<36:01,  9.32s/it, Training Loss: 0.0918, Accuracy: 0.9669, Validation Loss: 0.4866, Accuracy: 0.8688]


Holdout Loss: nan, Accuracy: 0.8725
[I 2024-02-22 11:57:11,427] Trial 62 finished with value: 0.21147117290104944 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 252, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 422, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 193, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 114, 'fc2_out_features': 117}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 19/250 [02:49<34:19,  8.91s/it, Training Loss: 0.0737, Accuracy: 0.9744, Validation Loss: 0.2032, Accuracy: 0.9247]


Holdout Loss: nan, Accuracy: 0.9253
[I 2024-02-22 12:00:03,260] Trial 63 finished with value: 0.1843418104746979 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 243, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 382, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 148, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 111, 'fc2_out_features': 117}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 19/250 [02:52<35:00,  9.09s/it, Training Loss: 0.0760, Accuracy: 0.9726, Validation Loss: 0.2182, Accuracy: 0.9308]


Holdout Loss: nan, Accuracy: 0.9299
[I 2024-02-22 12:02:58,561] Trial 64 finished with value: 0.18634443611136547 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 243, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 387, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 146, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 99, 'fc2_out_features': 121}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 21/250 [02:52<31:23,  8.22s/it, Training Loss: 0.0898, Accuracy: 0.9680, Validation Loss: 0.7293, Accuracy: 0.8101]


Holdout Loss: nan, Accuracy: 0.8057
[I 2024-02-22 12:05:53,643] Trial 65 finished with value: 0.2150431109284418 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 234, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 312, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 147, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 100, 'fc2_out_features': 85}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   6%|▌         | 15/250 [01:10<18:30,  4.72s/it, Training Loss: 0.0886, Accuracy: 0.9691, Validation Loss: 0.4600, Accuracy: 0.8483]


Holdout Loss: nan, Accuracy: 0.8456
[I 2024-02-22 12:07:06,573] Trial 66 finished with value: 0.3822553489694312 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 27, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 353, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 1, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 87, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 8, 'fc1_out_features': 82, 'fc2_out_features': 111}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 19/250 [02:46<33:43,  8.76s/it, Training Loss: 0.0939, Accuracy: 0.9658, Validation Loss: 0.4115, Accuracy: 0.8963]


Holdout Loss: nan, Accuracy: 0.8923
[I 2024-02-22 12:09:55,487] Trial 67 finished with value: 0.19908399110660394 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 234, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 392, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 151, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 95, 'fc2_out_features': 120}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 20/250 [02:42<31:09,  8.13s/it, Training Loss: 0.0862, Accuracy: 0.9690, Validation Loss: 0.2871, Accuracy: 0.9067]


Holdout Loss: nan, Accuracy: 0.9004
[I 2024-02-22 12:12:40,433] Trial 68 finished with value: 0.2438080842448418 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 225, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 388, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 105, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 109, 'fc2_out_features': 120}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   7%|▋         | 17/250 [02:54<39:49, 10.26s/it, Training Loss: 0.1388, Accuracy: 0.9479, Validation Loss: 0.4736, Accuracy: 0.8412]


Holdout Loss: nan, Accuracy: 0.8369
[I 2024-02-22 12:15:37,270] Trial 69 finished with value: 0.40708771324308224 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 189, 'conv_0_kernel_size_power': 5, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 317, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 143, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 95, 'fc2_out_features': 108}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 21/250 [03:06<33:50,  8.87s/it, Training Loss: 0.0685, Accuracy: 0.9761, Validation Loss: 0.1938, Accuracy: 0.9352]


Holdout Loss: nan, Accuracy: 0.9322
[I 2024-02-22 12:18:45,955] Trial 70 finished with value: 0.19375488931661125 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 428, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 192, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 84, 'fc2_out_features': 117}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 19/250 [03:06<37:43,  9.80s/it, Training Loss: 0.0748, Accuracy: 0.9744, Validation Loss: 0.3018, Accuracy: 0.9164]


Holdout Loss: nan, Accuracy: 0.9159
[I 2024-02-22 12:21:54,780] Trial 71 finished with value: 0.18290667757757753 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 463, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 193, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 84, 'fc2_out_features': 117}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 19/250 [03:04<37:26,  9.73s/it, Training Loss: 0.0781, Accuracy: 0.9730, Validation Loss: 0.2096, Accuracy: 0.9306]


Holdout Loss: nan, Accuracy: 0.9342
[I 2024-02-22 12:25:02,171] Trial 72 finished with value: 0.20562413941066288 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 429, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 223, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 75, 'fc2_out_features': 117}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   7%|▋         | 18/250 [03:03<39:30, 10.22s/it, Training Loss: 0.0923, Accuracy: 0.9684, Validation Loss: 0.1949, Accuracy: 0.9280]


Holdout Loss: nan, Accuracy: 0.9296
[I 2024-02-22 12:28:08,718] Trial 73 finished with value: 0.19492802559381783 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 225, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 465, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 240, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 84, 'fc2_out_features': 112}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   7%|▋         | 18/250 [02:57<38:04,  9.85s/it, Training Loss: 0.0776, Accuracy: 0.9698, Validation Loss: 0.2411, Accuracy: 0.9202]


Holdout Loss: nan, Accuracy: 0.9226
[I 2024-02-22 12:31:08,633] Trial 74 finished with value: 0.2199818948272904 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 466, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 198, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 83, 'fc2_out_features': 113}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   5%|▍         | 12/250 [02:02<40:22, 10.18s/it, Training Loss: 0.3368, Accuracy: 0.8107, Validation Loss: 0.2805, Accuracy: 0.8867]


Holdout Loss: nan, Accuracy: 0.8861
[I 2024-02-22 12:33:13,464] Trial 75 finished with value: 0.27442627417754145 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 225, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 461, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 234, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 64, 'fc2_out_features': 105}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   9%|▉         | 22/250 [03:50<39:53, 10.50s/it, Training Loss: 0.0562, Accuracy: 0.9806, Validation Loss: 0.2366, Accuracy: 0.9265]


Holdout Loss: nan, Accuracy: 0.9289
[I 2024-02-22 12:37:07,092] Trial 76 finished with value: 0.1890543040087604 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 270, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 485, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 252, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 78, 'fc2_out_features': 46}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   9%|▉         | 22/250 [03:46<39:10, 10.31s/it, Training Loss: 0.0911, Accuracy: 0.9673, Validation Loss: 0.2549, Accuracy: 0.9240]


Holdout Loss: nan, Accuracy: 0.9210
[I 2024-02-22 12:40:56,616] Trial 77 finished with value: 0.20063632663608516 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 279, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 483, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 191, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 72, 'fc2_out_features': 116}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   5%|▍         | 12/250 [02:24<47:54, 12.08s/it, Training Loss: 0.2556, Accuracy: 0.9013, Validation Loss: 0.2083, Accuracy: 0.9142]


Holdout Loss: nan, Accuracy: 0.9190
[I 2024-02-22 12:43:24,417] Trial 78 finished with value: 0.2083359082426065 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 270, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 499, 'conv_1_kernel_size_power': 4, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 269, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 43, 'fc2_out_features': 55}. Best is trial 61 with value: 0.1753348012602661.


Epochs:  15%|█▍        | 37/250 [04:27<25:41,  7.24s/it, Training Loss: 0.1674, Accuracy: 0.9190, Validation Loss: 2.1066, Accuracy: 0.5624]


Holdout Loss: nan, Accuracy: 0.5676
[I 2024-02-22 12:47:54,488] Trial 79 finished with value: 0.23596941812979944 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 243, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 1, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 431, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 258, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 2, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 5, 'fc1_out_features': 105, 'fc2_out_features': 70}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   7%|▋         | 18/250 [02:13<28:35,  7.40s/it, Training Loss: 0.2273, Accuracy: 0.9142, Validation Loss: 0.4652, Accuracy: 0.8339]


Holdout Loss: nan, Accuracy: 0.8290
[I 2024-02-22 12:50:09,999] Trial 80 finished with value: 0.37825122819845436 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 180, 'conv_0_kernel_size_power': 2, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': False, 'conv_1_out_channels': 274, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 206, 'conv_2_kernel_size_power': 4, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 61, 'fc2_out_features': 43}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   8%|▊         | 21/250 [03:22<36:46,  9.64s/it, Training Loss: 0.0624, Accuracy: 0.9778, Validation Loss: 0.2249, Accuracy: 0.9311]


Holdout Loss: nan, Accuracy: 0.9313
[I 2024-02-22 12:53:34,958] Trial 81 finished with value: 0.2121428818465227 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 225, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 456, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 248, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 85, 'fc2_out_features': 109}. Best is trial 61 with value: 0.1753348012602661.


Epochs:   5%|▍         | 12/250 [01:56<38:36,  9.73s/it, Training Loss: 0.2097, Accuracy: 0.9270, Validation Loss: 0.3214, Accuracy: 0.8830]


Holdout Loss: nan, Accuracy: 0.8783
[I 2024-02-22 12:55:34,381] Trial 82 finished with value: 0.2747471174584501 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 252, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 488, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 230, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 78, 'fc2_out_features': 101}. Best is trial 61 with value: 0.1753348012602661.


Epochs:  10%|█         | 25/250 [04:28<40:20, 10.76s/it, Training Loss: 0.0907, Accuracy: 0.9577, Validation Loss: 0.2124, Accuracy: 0.9306]


Holdout Loss: nan, Accuracy: 0.9332
[I 2024-02-22 13:00:05,967] Trial 83 finished with value: 0.18486012505863436 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 198, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 471, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 283, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 90, 'fc2_out_features': 112}. Best is trial 61 with value: 0.1753348012602661.


Epochs:  10%|▉         | 24/250 [2:11:36<20:39:22, 329.04s/it, Training Loss: 0.1290, Accuracy: 0.9581, Validation Loss: 0.1867, Accuracy: 0.9332]


Holdout Loss: nan, Accuracy: 0.9364
[I 2024-02-22 15:12:01,221] Trial 84 finished with value: 0.1867289981825039 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 207, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 409, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 327, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 55, 'fc2_out_features': 126}. Best is trial 61 with value: 0.1753348012602661.


Epochs:  11%|█         | 28/250 [30:46<4:03:58, 65.94s/it, Training Loss: 0.1273, Accuracy: 0.9652, Validation Loss: 0.2911, Accuracy: 0.8940]


Holdout Loss: nan, Accuracy: 0.8983
[I 2024-02-22 15:42:50,566] Trial 85 finished with value: 0.2064308844243697 and parameters: {'num_conv_layers': 3, 'conv_0_out_channels': 198, 'conv_0_kernel_size_power': 3, 'conv_0_dilation': 1, 'conv_0_out_length_reduction_exponent': 0, 'conv_0_bn': False, 'conv_0_activation': True, 'conv_1_out_channels': 512, 'conv_1_kernel_size_power': 3, 'conv_1_dilation': 2, 'conv_1_out_length_reduction_exponent': 0, 'conv_1_bn': False, 'conv_1_activation': True, 'conv_2_out_channels': 284, 'conv_2_kernel_size_power': 5, 'conv_2_dilation': 3, 'conv_2_out_length_reduction_exponent': 1, 'conv_2_bn': True, 'conv_2_activation': False, 'maxpool_kernel_exponent': 9, 'fc1_out_features': 57, 'fc2_out_features': 126}. Best is trial 61 with value: 0.1753348012602661.
Best trial overall:
  Value: 0.1753348012602661
  Params: 
    num_conv_layers: 3
    conv_0_out_channels: 252
    conv_0_kernel_size_power: 3
    conv_0_dilation: 1
    conv_0_out_length_reduction_exponent

In [12]:
def count_parameters(model):
    table = PrettyTable(['Modules', 'Parameters'])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f'Total Trainable Params: {total_params}')
    return total_params

count_parameters(CustomNet(study.best_trial))


+----------------------+------------+
|       Modules        | Parameters |
+----------------------+------------+
| conv_layers.0.weight |    1764    |
|  conv_layers.0.bias  |    252     |
| conv_layers.1.weight |   754992   |
|  conv_layers.1.bias  |    428     |
| conv_layers.2.weight |  1096964   |
|  conv_layers.2.bias  |    233     |
|     bns.2.weight     |    233     |
|      bns.2.bias      |    233     |
|      fc1.weight      |   26795    |
|       fc1.bias       |    115     |
|      fc2.weight      |   13455    |
|       fc2.bias       |    117     |
|      fc3.weight      |    117     |
|       fc3.bias       |     1      |
+----------------------+------------+
Total Trainable Params: 1895699


1895699