In [1]:
!pip install --quiet optuna

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.6/390.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import optuna

from sklearn.model_selection import train_test_split

In [22]:
# Define the CNN model architecture
class Net(nn.Module):
    def __init__(self, n_conv_layers, n_filters, n_fc_layers, n_neurons, dropout_rate):
        super(Net, self).__init__()
        layers = []
        in_channels = 3
        image_size = 32

        # Add convolutional layers
        for i in range(n_conv_layers):
            layers.append(nn.Conv2d(in_channels, n_filters, kernel_size=3, padding=1))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(2, 2))
            in_channels = n_filters

        # Calculate the final image size after convolution and pooling
        image_size = image_size // (2 ** n_conv_layers)

        # Add fully connected layers
        layers.append(nn.Flatten())
        in_features = n_filters * image_size * image_size  # Update in_features for the first linear layer
        for i in range(n_fc_layers):
            layers.append(nn.Linear(in_features, n_neurons))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=dropout_rate))
            in_features = n_neurons  # Update in_features for the next layer

        layers.append(nn.Linear(in_features, 100))  # 100 output classes for CIFAR-100
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Function to get the CIFAR-100 data loaders
def get_data_loaders(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

    return trainloader, testloader

# Function to train and evaluate the model
def train_and_evaluate(model, optimizer, criterion, trainloader, testloader, device, epochs=10):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        train_total = 0
        train_correct = 0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            running_loss += loss.item()

        ## Calculate accuracy for training phase
        train_accuracy = train_correct / train_total
        ## NORMAL running_loss over batches
        running_loss /= len(trainloader)

        model.eval()
        test_total = 0
        test_correct = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()

        ## Calculate accuracy for test phase
        test_accuracy = test_correct / test_total

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.3f}, TrAccuracy: {100 * train_accuracy:.2f}%, TeAccuracy: {100 * test_accuracy:.2f}")

    return 1 - test_accuracy

# Define the optimization objective
def objective(trial):
    # Set up the hyperparameters to optimize
    n_conv_layers = trial.suggest_int('n_conv_layers', 2, 4)
    n_filters = trial.suggest_int('n_filters', 60, 64)
    n_fc_layers = trial.suggest_int('n_fc_layers', 2, 3)
    n_neurons = trial.suggest_int('n_neurons', 60, 64)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1e-2)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])

    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Get data loaders
    trainloader, testloader = get_data_loaders(batch_size=128)

    # Create the model
    model = Net(n_conv_layers, n_filters, n_fc_layers, n_neurons, dropout_rate)

    # Set up the optimizer and loss criterion
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    else:
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss()

    # Train and evaluate the model
    error_rate = train_and_evaluate(model, optimizer, criterion, trainloader, testloader, device)

    return error_rate


In [23]:
# Set up Optuna study
study = optuna.create_study(direction='minimize')

try:
    study.optimize(objective, timeout=3600, n_jobs=1)
except optuna.exceptions.TrialPruned as e:
    print("Trial was pruned.")


[I 2023-06-30 15:14:12,309] A new study created in memory with name: no-name-746368f9-1105-4cf8-8d70-9cf6c06a0f60
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-3, 1e-2)


Files already downloaded and verified
Files already downloaded and verified
Epoch 1/10, Loss: 4.195, TrAccuracy: 5.44%, TeAccuracy: 7.05
Epoch 2/10, Loss: 3.711, TrAccuracy: 12.15%, TeAccuracy: 17.23
Epoch 3/10, Loss: 3.447, TrAccuracy: 16.32%, TeAccuracy: 19.27
Epoch 4/10, Loss: 3.287, TrAccuracy: 19.35%, TeAccuracy: 23.77
Epoch 5/10, Loss: 3.155, TrAccuracy: 21.86%, TeAccuracy: 23.58
Epoch 6/10, Loss: 3.050, TrAccuracy: 23.79%, TeAccuracy: 26.82
Epoch 7/10, Loss: 2.968, TrAccuracy: 25.27%, TeAccuracy: 27.84
Epoch 8/10, Loss: 2.891, TrAccuracy: 26.46%, TeAccuracy: 29.15
Epoch 9/10, Loss: 2.838, TrAccuracy: 27.58%, TeAccuracy: 29.96


[I 2023-06-30 15:43:13,904] Trial 0 finished with value: 0.7001 and parameters: {'n_conv_layers': 2, 'n_filters': 63, 'n_fc_layers': 2, 'n_neurons': 62, 'dropout_rate': 0.11006712947473624, 'learning_rate': 0.0023250749854313688, 'optimizer': 'RMSprop'}. Best is trial 0 with value: 0.7001.


Epoch 10/10, Loss: 2.766, TrAccuracy: 28.88%, TeAccuracy: 29.99
Files already downloaded and verified
Files already downloaded and verified
Epoch 1/10, Loss: 4.090, TrAccuracy: 6.53%, TeAccuracy: 10.62
Epoch 2/10, Loss: 3.587, TrAccuracy: 14.19%, TeAccuracy: 18.13
Epoch 3/10, Loss: 3.285, TrAccuracy: 19.51%, TeAccuracy: 21.28
Epoch 4/10, Loss: 3.084, TrAccuracy: 23.26%, TeAccuracy: 25.33
Epoch 5/10, Loss: 2.936, TrAccuracy: 26.10%, TeAccuracy: 26.89
Epoch 6/10, Loss: 2.820, TrAccuracy: 28.38%, TeAccuracy: 28.79
Epoch 7/10, Loss: 2.709, TrAccuracy: 30.75%, TeAccuracy: 28.41
Epoch 8/10, Loss: 2.629, TrAccuracy: 32.23%, TeAccuracy: 31.80
Epoch 9/10, Loss: 2.560, TrAccuracy: 33.91%, TeAccuracy: 30.36


[I 2023-06-30 16:13:36,170] Trial 1 finished with value: 0.6736 and parameters: {'n_conv_layers': 3, 'n_filters': 60, 'n_fc_layers': 2, 'n_neurons': 62, 'dropout_rate': 0.012727925703046539, 'learning_rate': 0.0015369773134676792, 'optimizer': 'RMSprop'}. Best is trial 1 with value: 0.6736.


Epoch 10/10, Loss: 2.494, TrAccuracy: 35.40%, TeAccuracy: 32.64
Files already downloaded and verified
Files already downloaded and verified
Epoch 1/10, Loss: 4.306, TrAccuracy: 3.42%, TeAccuracy: 7.51
Epoch 2/10, Loss: 3.993, TrAccuracy: 6.69%, TeAccuracy: 11.05
Epoch 3/10, Loss: 3.843, TrAccuracy: 8.88%, TeAccuracy: 13.07
Epoch 4/10, Loss: 3.742, TrAccuracy: 10.29%, TeAccuracy: 15.03
Epoch 5/10, Loss: 3.655, TrAccuracy: 11.36%, TeAccuracy: 16.51
Epoch 6/10, Loss: 3.584, TrAccuracy: 12.55%, TeAccuracy: 17.41
Epoch 7/10, Loss: 3.536, TrAccuracy: 13.53%, TeAccuracy: 18.78
Epoch 8/10, Loss: 3.482, TrAccuracy: 14.20%, TeAccuracy: 19.11
Epoch 9/10, Loss: 3.457, TrAccuracy: 14.62%, TeAccuracy: 20.55


[I 2023-06-30 16:42:06,410] Trial 2 finished with value: 0.788 and parameters: {'n_conv_layers': 3, 'n_filters': 60, 'n_fc_layers': 2, 'n_neurons': 64, 'dropout_rate': 0.3971681002979143, 'learning_rate': 0.002120029332257018, 'optimizer': 'Adam'}. Best is trial 1 with value: 0.6736.


Epoch 10/10, Loss: 3.426, TrAccuracy: 15.02%, TeAccuracy: 21.20


In [24]:
# Get the best hyperparameters and error rate
best_params = study.best_params
best_error = study.best_value

print("Best Hyperparameters:", best_params)
print("Best Error Rate:", best_error)

Best Hyperparameters: {'n_conv_layers': 3, 'n_filters': 60, 'n_fc_layers': 2, 'n_neurons': 62, 'dropout_rate': 0.012727925703046539, 'learning_rate': 0.0015369773134676792, 'optimizer': 'RMSprop'}
Best Error Rate: 0.6736
