In [15]:
%run 'Setup.py'

8 different classes: Electronic, Experimental, Folk, Hip-Hop, Instrumental, International, Pop or Rock.
objective 1: construct a classifier which, based on the features of a song, predicts its genre
objective 2: estimate its generalisation error under the 0–1 loss.
Features are real-valued, correspond to summary statistics (mean, sd, skewness, kurtosis, median, min, max) of 
time series of various music features, such as the chromagram or the Mel-frequency cepstrum.
Feature description: 

Feature description: 
chroma_cens: Chroma Energy Normalized (CENS, 12 chroma) - 84 features
chroma_cqt: Constant-Q chromagram (12 chroma) - 84 features
chroma_stft: Chromagram (12 chroma) - 84 features
mfcc: Mel-frequency cepstrum (20 coefficients) - 140 features
rmse: Root-mean-square - 7 features
spectral_bandwidth: Spectral bandwidth - 7 features
spectral_centroid: Spectral centroid - 7 features
spectral_contrast: Spectral contrast (7 frequency bands) - 49 features
spectral_rolloff: Roll-off freque

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline

In [36]:
def preprocess_data(x, y):
    scaler = StandardScaler()
    x_scaled = scaler.fit_transform(x)
    return train_test_split(x_scaled, y, test_size=0.2, random_state=42)

X_train, X_test, Y_train, Y_test = preprocess_data(x_train_np, y_train_np)

# Function to train and evaluate a model
def train_evaluate(model, X_train, X_test, Y_train, Y_test):
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(Y_test, y_pred)
    print(f'Accuracy: {accuracy}')
    return model

In [24]:
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(4800, 518) (4800, 1)
(1200, 518) (1200, 1)


In [122]:
mlp_model = MLPClassifier(hidden_layer_sizes=(256, 128, 64), activation='relu', 
                          solver='adam', alpha=1e-4, max_iter=300, random_state=42)

train_evaluate(mlp_model, X_train, X_test, Y_train, Y_test)


Accuracy: 0.5633333333333334


In [20]:
pipeline = make_pipeline(StandardScaler(), MLPClassifier(random_state=42, max_iter=600))

In [None]:
# Define a parameter grid to search over
param_grid = {
    'mlpclassifier__hidden_layer_sizes': [(100,), (256, 128, 64), (512, 256, 128, 64), (512, 256, 128)],
    'mlpclassifier__alpha': [0.0001, 0.001, 0.01, 0.1],
    'mlpclassifier__learning_rate_init': [0.001, 0.01, 0.1],
    'mlpclassifier__activation': ['relu', 'tanh', 'logistic', 'softmax']
}

# Configure GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, n_jobs=-1, cv=3, scoring='accuracy')

# Perform the grid search
grid_search.fit(X_train, y_train)

# Output the results
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best cross-validation score: {grid_search.best_score_}')

# Evaluate on the test set
y_pred = grid_search.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Test set accuracy: {accuracy}')



# Pytorch to use GPU


In [32]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from torch.optim.lr_scheduler import StepLR

In [37]:
# Encode labels
# Initialize the encoder
label_encoder = LabelEncoder()
# Flatten

# Fit the encoder on the training labels
y_train_encoded = label_encoder.fit_transform(Y_train.ravel())

# Transform the test labels using the same encoder
y_test_encoded = label_encoder.transform(Y_test)

In [41]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_encoded, dtype=torch.int64)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_encoded, dtype=torch.int64)

In [42]:
# Create TensorDatasets and DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [66]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(518, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 8)  # Corrected for 8 classes
        )
    
    def forward(self, x):
        return self.layers(x)

# Assuming 'train_loader' is already defined and your data is loaded into it
# Move the model to GPU if available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = MLP().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
model.train()  # Set the model to training mode
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

for epoch in range(50):  # Assuming 100 epochs
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    scheduler.step()  # Adjust the learning rate
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')


Epoch 1, Loss: 1.1909230947494507
Epoch 2, Loss: 1.3671176433563232
Epoch 3, Loss: 0.8340537548065186
Epoch 4, Loss: 0.8327663540840149
Epoch 5, Loss: 0.8143966794013977
Epoch 6, Loss: 0.7048672437667847
Epoch 7, Loss: 0.2515137195587158
Epoch 8, Loss: 0.32579243183135986
Epoch 9, Loss: 0.188766747713089
Epoch 10, Loss: 0.24157410860061646
Epoch 11, Loss: 0.1186475157737732
Epoch 12, Loss: 0.10536760836839676
Epoch 13, Loss: 0.07978427410125732
Epoch 14, Loss: 0.04183381050825119
Epoch 15, Loss: 0.03940463438630104
Epoch 16, Loss: 0.017656996846199036
Epoch 17, Loss: 0.08177593350410461
Epoch 18, Loss: 0.13821882009506226
Epoch 19, Loss: 0.11040196567773819
Epoch 20, Loss: 0.04518599063158035
Epoch 21, Loss: 0.02966383472084999
Epoch 22, Loss: 0.014675465412437916
Epoch 23, Loss: 0.08688538521528244
Epoch 24, Loss: 0.003013056004419923
Epoch 25, Loss: 0.13955458998680115
Epoch 26, Loss: 0.006165251601487398
Epoch 27, Loss: 0.064602330327034
Epoch 28, Loss: 0.08257416635751724
Epoch 29,

In [67]:
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Inference mode, gradients not needed
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


Test Accuracy: 59.17%


In [68]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torch.utils.data import DataLoader, TensorDataset, random_split

class ConfigurableMLP(nn.Module):
    def __init__(self, input_size, layer_configurations, activation_fn):
        super(ConfigurableMLP, self).__init__()
        self.layers = nn.ModuleList()

        # Iterate through the provided layer configurations
        last_size = input_size
        for config in layer_configurations:
            if type(config) == int:  # If it's an integer, it's a layer size
                self.layers.append(nn.Linear(last_size, config))
                last_size = config
            elif config == 'dropout':  # If the config specifies dropout
                self.layers.append(nn.Dropout(0.25))  # Example dropout rate: 0.5
            elif config == 'batchnorm':  # If the config specifies batch normalization
                self.layers.append(nn.BatchNorm1d(last_size))  # Apply BatchNorm to the last layer size
                
            # Only add an activation layer if it's not the special 'dropout' or 'batchnorm' keyword
            if config in activation_fn:
                self.layers.append(activation_fn[config]())

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x



def train_evaluate_model(model, train_loader, test_loader, device, epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters())
    model.to(device)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
    # Evaluation loop
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')
    return accuracy


In [71]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Define configurations
layer_configurations = [
    [512, 256, 128, 8],
    [1024, 512, 256, 128, 8],
    [512, 1024, 2048, 'batchnorm', 512, 'dropout', 256, 128, 8],
    # [2048, 1024, 512, 256, 128, 8],
    # [2048, 1024, 512, 'dropout', 1024, 256, 128, 8],
    # [512, 1024, 512, 256, 128, 8],
    # [1024, 512, 512, 256, 8],
    # [1024, 'dropout', 512, 'dropout', 256, 8],
    [512, 'dropout', 256, 'batchnorm', 128, 8]
]

activation_functions = [
    [nn.ReLU, nn.ReLU, nn.ReLU],  # Corresponds to activations between layers, excluding final
    [nn.Tanh, nn.Tanh, nn.Tanh],
    [nn.Softplus, nn.Softplus, nn.Softplus],
    [nn.ReLU, nn.Tanh, nn.Softplus],
    [nn.Softplus, nn.ReLU, nn.Softplus],
    [nn.Softplus, nn.Sigmoid, nn.Softplus]
]
accuracies = []
# Loop through configurations
for layers in layer_configurations:
    for activations in activation_functions:
        model = ConfigurableMLP(input_size=518, layer_configurations=layers, activation_fn=activations)
        print(f"Training with layers: {layers} and activations: {[fn.__name__ for fn in activations if fn]}")
        accuracy = train_evaluate_model(model, train_loader, test_loader, device, epochs=40)
        accuracies.append(accuracy)
print(max(accuracies))

Training with layers: [512, 256, 128, 8] and activations: ['ReLU', 'ReLU', 'ReLU']
Accuracy: 54.67%
Training with layers: [512, 256, 128, 8] and activations: ['Tanh', 'Tanh', 'Tanh']
Accuracy: 54.42%
Training with layers: [512, 256, 128, 8] and activations: ['Softplus', 'Softplus', 'Softplus']
Accuracy: 53.83%
Training with layers: [512, 256, 128, 8] and activations: ['ReLU', 'Tanh', 'Softplus']
Accuracy: 54.58%
Training with layers: [512, 256, 128, 8] and activations: ['Softplus', 'ReLU', 'Softplus']
Accuracy: 52.75%
Training with layers: [512, 256, 128, 8] and activations: ['Softplus', 'Sigmoid', 'Softplus']
Accuracy: 53.50%
Training with layers: [1024, 512, 256, 128, 8] and activations: ['ReLU', 'ReLU', 'ReLU']
Accuracy: 53.00%
Training with layers: [1024, 512, 256, 128, 8] and activations: ['Tanh', 'Tanh', 'Tanh']
Accuracy: 53.25%
Training with layers: [1024, 512, 256, 128, 8] and activations: ['Softplus', 'Softplus', 'Softplus']
Accuracy: 55.17%
Training with layers: [1024, 512, 2

In [82]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(518, 512),
            nn.Softplus(),
            nn.Linear(512, 1024),
            nn.Dropout(p=0.4),
            nn.Linear(1024, 256),
            nn.Softplus(),
            nn.Linear(256, 128),
            nn.Softplus(),
            nn.Linear(128, 8)  # Corrected for 8 classes
        )
    
    def forward(self, x):
        return self.layers(x)

# Assuming 'train_loader' is already defined and your data is loaded into it
# Move the model to GPU if available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = MLP().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
# Training loop
model.train()  # Set the model to training mode
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)

for epoch in range(50):  # Assuming 100 epochs
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    scheduler.step()  # Adjust the learning rate
    # print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Inference mode, gradients not needed
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


Test Accuracy: 57.75%


ValueError: 
All the 12 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
12 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/envs/sml-practical-env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/var/folders/lq/x2t80c813gsbg58jgjy302hc0000gn/T/ipykernel_34683/4249849981.py", line 13, in fit
    dataset = TensorDataset(torch.tensor(X).float(), torch.tensor(y).long())
TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.


In [89]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Adjust the device setting for M1 Pro chip
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Define the model with 518 input features and 8 output classes
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(518, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            # nn.Dropout(p=0.4),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 8)  # Adjusted for 8 output classes
        )
    
    def forward(self, x):
        return self.layers(x)

def objective(trial):
    # Define hyperparameters
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_int('batch_size', 32, 256)

    # Assuming train_dataset and test_dataset are defined elsewhere
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    model = MLP().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training loop
    for epoch in range(50):  # Adjust epochs as needed
        model.train()
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
    
    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = correct / total
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # Adjust the number of trials as needed

print("Best trial:")
print(study.best_trial)


[I 2024-03-11 14:47:19,331] A new study created in memory with name: no-name-861229ad-abee-46f7-b3bd-b058bda432c0
[I 2024-03-11 14:47:28,094] Trial 0 finished with value: 0.5816666666666667 and parameters: {'lr': 0.004621111610174225, 'batch_size': 250}. Best is trial 0 with value: 0.5816666666666667.
[I 2024-03-11 14:47:47,888] Trial 1 finished with value: 0.12666666666666668 and parameters: {'lr': 0.06164688005015238, 'batch_size': 94}. Best is trial 0 with value: 0.5816666666666667.
[I 2024-03-11 14:48:15,462] Trial 2 finished with value: 0.5858333333333333 and parameters: {'lr': 0.0001702076601536129, 'batch_size': 65}. Best is trial 2 with value: 0.5858333333333333.
[I 2024-03-11 14:48:27,288] Trial 3 finished with value: 0.5375 and parameters: {'lr': 1.6622809417395127e-05, 'batch_size': 173}. Best is trial 2 with value: 0.5858333333333333.
[I 2024-03-11 14:48:35,939] Trial 4 finished with value: 0.21666666666666667 and parameters: {'lr': 0.05142903614571702, 'batch_size': 245}. 

Best trial:
FrozenTrial(number=22, state=TrialState.COMPLETE, values=[0.5983333333333334], datetime_start=datetime.datetime(2024, 3, 11, 14, 52, 24, 549245), datetime_complete=datetime.datetime(2024, 3, 11, 14, 52, 34, 469063), params={'lr': 6.0482748498522895e-05, 'batch_size': 207}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'lr': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'batch_size': IntDistribution(high=256, log=False, low=32, step=1)}, trial_id=22, value=None)


In [91]:
best_trial = study.best_trial.params
best_lr = best_trial['lr']
best_batch_size = best_trial['batch_size']
model = MLP().to(device)
optimizer = optim.Adam(model.parameters(), lr=best_lr)

train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
for epoch in range(50):  # Or however many epochs you deem appropriate
    model.train()
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    # scheduler.step()  # Adjust the learning rate based on the scheduler

model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Inference mode, gradients not needed
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = correct / total
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


Test Accuracy: 59.75%


In [94]:
import pickle
filename = "study-v1-59.75.pkl"
with open(filename, "wb") as f:
        pickle.dump(study, f)

In [128]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

model_storage = {}  # Dictionary to store state dicts

def objective(trial):
    # Optimize learning rate
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)

    # Define categorical hyperparameters for activation functions
    activation_name = trial.suggest_categorical('activation', ['ReLU', 'Tanh', 'SoftPlus'])
    
    # Map names to actual PyTorch activation functions
    activations = {'ReLU': nn.ReLU(), 'Tanh': nn.Tanh(), 'SoftPlus': nn.Softplus()}
    activation_fn = activations[activation_name]

    # Example of architectural choice: number of layers
    n_layers = trial.suggest_int('n_layers', 1, 10)

    model = nn.Sequential()
    input_size = 518  # Initial input size
    for i in range(n_layers):
        output_size = trial.suggest_int(f'n_units_l{i}', 8, 1024)
        model.add_module(f"linear_{i}", nn.Linear(input_size, output_size))
        model.add_module(f"activation_{i}", activation_fn)

        # Decide whether to add a dropout layer after this block
        use_dropout = trial.suggest_categorical(f"use_dropout_l{i}", [True, False])
        if use_dropout:
            # dropout_rate = trial.suggest_float(f"dropout_rate_l{i}", 0.2, 0.6)
            model.add_module(f"dropout_{i}", nn.Dropout(p=0.3))

        input_size = output_size  # Update input size for the next layer
    # for i in range(n_layers):
    #     output_size = trial.suggest_int(f'n_units_l{i}', 8, 1024)
    #     model.add_module(f"linear_{i}", nn.Linear(input_size, output_size))
    #     model.add_module(f"activation_{i}", activation_fn)
    #     input_size = output_size  # Update input size for the next layer

    model.add_module("final_linear", nn.Linear(input_size, 8))  # Assuming 8 output classes
    
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(40):
        model.train()
        for batch in train_loader:
            inputs, targets = batch[0].to(device), batch[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
    # Save Model
    model_storage[trial.number] = model.state_dict()
    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = correct / total
    return accuracy

# Create study
study_architecture = optuna.create_study(direction='maximize', study_name = 'architecture')
study_architecture.optimize(objective, n_trials=50)

print("Best trial:")
trial = study_architecture.best_trial
print(f"Value: {trial.value}")
print("Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2024-03-11 17:06:58,867] A new study created in memory with name: architecture
[I 2024-03-11 17:07:18,135] Trial 0 finished with value: 0.5883333333333334 and parameters: {'lr': 0.0030953376203045166, 'activation': 'ReLU', 'n_layers': 5, 'n_units_l0': 1004, 'use_dropout_l0': True, 'n_units_l1': 570, 'use_dropout_l1': True, 'n_units_l2': 574, 'use_dropout_l2': False, 'n_units_l3': 270, 'use_dropout_l3': False, 'n_units_l4': 333, 'use_dropout_l4': True}. Best is trial 0 with value: 0.5883333333333334.
[I 2024-03-11 17:07:30,649] Trial 1 finished with value: 0.4775 and parameters: {'lr': 0.014757113955850923, 'activation': 'Tanh', 'n_layers': 3, 'n_units_l0': 954, 'use_dropout_l0': True, 'n_units_l1': 456, 'use_dropout_l1': False, 'n_units_l2': 158, 'use_dropout_l2': False}. Best is trial 0 with value: 0.5883333333333334.
[I 2024-03-11 17:07:41,633] Trial 2 finished with value: 0.5708333333333333 and parameters: {'lr': 2.965313243224161e-05, 'activation': 'SoftPlus', 'n_layers': 3, 'n_

Best trial:
Value: 0.61
Params: 
    lr: 0.0006281146314166269
    activation: ReLU
    n_layers: 2
    n_units_l0: 818
    use_dropout_l0: False
    n_units_l1: 174
    use_dropout_l1: False


In [130]:
best_trial_number = study_architecture.best_trial.number
best_state_dict = model_storage[best_trial_number]
filepath = f"best_model_trial_{best_trial_number}.pth"

# Save the state dictionary to disk
torch.save(best_state_dict, filepath)

print(f"Saved best model state dict to {filepath}")


Saved best model state dict to best_model_trial_26.pth


In [133]:
best_trial = study_architecture.best_trial.params
print(best_trial)

{'lr': 0.0006281146314166269, 'activation': 'ReLU', 'n_layers': 2, 'n_units_l0': 818, 'use_dropout_l0': False, 'n_units_l1': 174, 'use_dropout_l1': False}


In [135]:
optuna.visualization.plot_optimization_history(study_architecture)
# optuna.visualization.plot_contour(study_architecture)