In [None]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchsummary import summary

!pip install optuna
import optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [None]:
DEVICE = torch.device("cuda")  ##'cuda' or 'cpu'
BATCHSIZE = 128
CLASSES = 10   #CLASSES = 10 for cifar10 and 100 for cifar100
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = 1000
N_VALID_EXAMPLES = 300

In [None]:
def define_model(trial):

    layers = []

    output_channels1 = trial.suggest_int(name="filters_1", low=32, high=64, step=32)
    layers.append(nn.Conv2d(in_channels=3, out_channels=output_channels1, kernel_size=3, stride=1))
    layers.append(nn.BatchNorm2d(output_channels1))
    layers.append(nn.ReLU())
    p1 = trial.suggest_float(name="dropout_l", low=0.2, high=0.4)
    layers.append(nn.Dropout(p1))

    output_channels2 = trial.suggest_int(name="filters_2", low=64, high=128, step=32)
    layers.append(nn.Conv2d(in_channels=output_channels1, out_channels=output_channels2, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(output_channels2))
    layers.append(nn.ReLU())
    p2 = trial.suggest_float(name="dropout_2", low=0.2, high=0.4)
    layers.append(nn.Dropout(p2))

    layers.append(nn.Conv2d(in_channels=output_channels2, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(256))
    layers.append(nn.ReLU())

    layers.append(nn.Flatten())
    output_units1 = trial.suggest_int(name="linear_1", low=128, high=512, step=128)
    layers.append(nn.Linear(256*2*2, output_units1))  #output size found by printing the model detail using summary in torchsummary
    layers.append(nn.Dropout(0.2))
    layers.append(nn.Linear(output_units1, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100
    #cross entropy loss used as loss function, therefore no softmax layer here

    return nn.Sequential(*layers)


In [None]:
def get_cifar10():
    # Load cifar10 dataset.

    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root=DIR, train=True,
                                        download=True, transform=transform)

    #split training data into training-80% and validation-20%
    train_set, val_set = torch.utils.data.random_split(trainset, [int(0.8*len(trainset)), int(0.2*len(trainset))])

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCHSIZE,
                                          shuffle=True, num_workers=2)

    valid_loader = torch.utils.data.DataLoader(val_set, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)


    """
    testset = torchvision.datasets.CIFAR10(root=DIR, train=False,
                                       download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    """

    return train_loader, valid_loader

In [None]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) #for hp tuning
    #optimizer_name = "Adam"
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) #for hp tuning
    #lr = 0.001
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    CEloss = nn.CrossEntropyLoss()  ## this loss object must be used the loop. Directly using nn.CrossEntropyLoss() gives error

    # Get the MNIST dataset.
    train_loader, valid_loader = get_cifar10()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
            data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100

            optimizer.zero_grad()
            output = model(data)
            loss = CEloss(output, target)  ## used cross entropy loss
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            val_loss_batch = 0
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
                data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

                val_loss_batch += CEloss(output, target).item()  ## used cross entropy loss

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
        #val_loss_epoch = val_loss_batch / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)
        #trial.report(val_loss_epoch, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy #val_loss_epoch

In [None]:
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")  # 'maximize' because objective function is returning accuracy
    #study = optuna.create_study(direction="minimize")  # 'minimize' because objective function is returning loss
    study.optimize(objective, n_trials=30, timeout=600)

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[I 2024-05-06 03:24:14,123] A new study created in memory with name: no-name-a0c4d402-1751-4d13-9033-016a82c9b587


Files already downloaded and verified


[I 2024-05-06 03:24:22,421] Trial 0 finished with value: 0.5233333333333333 and parameters: {'filters_1': 32, 'dropout_l': 0.3884543192791649, 'filters_2': 128, 'dropout_2': 0.32675975482842734, 'linear_1': 256, 'optimizer': 'Adam', 'lr': 0.0007085570034905948}. Best is trial 0 with value: 0.5233333333333333.


Files already downloaded and verified


[I 2024-05-06 03:24:31,155] Trial 1 finished with value: 0.5666666666666667 and parameters: {'filters_1': 64, 'dropout_l': 0.2525825551345993, 'filters_2': 96, 'dropout_2': 0.20580845049839136, 'linear_1': 512, 'optimizer': 'Adam', 'lr': 0.00034617732389021107}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:24:39,876] Trial 2 finished with value: 0.33666666666666667 and parameters: {'filters_1': 32, 'dropout_l': 0.23481806990039553, 'filters_2': 96, 'dropout_2': 0.20999919306901535, 'linear_1': 512, 'optimizer': 'Adam', 'lr': 4.233582036064346e-05}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:24:47,475] Trial 3 finished with value: 0.3933333333333333 and parameters: {'filters_1': 32, 'dropout_l': 0.25112905478152847, 'filters_2': 96, 'dropout_2': 0.23758161821668722, 'linear_1': 256, 'optimizer': 'RMSprop', 'lr': 8.069917725107065e-05}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:24:56,103] Trial 4 finished with value: 0.44333333333333336 and parameters: {'filters_1': 64, 'dropout_l': 0.23069106251736005, 'filters_2': 96, 'dropout_2': 0.2196062200568732, 'linear_1': 256, 'optimizer': 'RMSprop', 'lr': 5.219319131293276e-05}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:25:04,720] Trial 5 finished with value: 0.45 and parameters: {'filters_1': 64, 'dropout_l': 0.3558726431002804, 'filters_2': 64, 'dropout_2': 0.21936492985465694, 'linear_1': 384, 'optimizer': 'Adam', 'lr': 0.0013479500955917607}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:25:09,695] Trial 6 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:11,267] Trial 7 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:12,830] Trial 8 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:18,991] Trial 9 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:22,688] Trial 10 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:24,327] Trial 11 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:32,964] Trial 12 finished with value: 0.5333333333333333 and parameters: {'filters_1': 64, 'dropout_l': 0.31448591318053365, 'filters_2': 128, 'dropout_2': 0.3208675323062212, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.005478826942061576}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:25:34,599] Trial 13 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:43,433] Trial 14 finished with value: 0.4766666666666667 and parameters: {'filters_1': 64, 'dropout_l': 0.29349556434517304, 'filters_2': 128, 'dropout_2': 0.32487491521795603, 'linear_1': 512, 'optimizer': 'Adam', 'lr': 0.004556428742264942}. Best is trial 1 with value: 0.5666666666666667.


Files already downloaded and verified


[I 2024-05-06 03:25:45,119] Trial 15 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:46,799] Trial 16 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:48,495] Trial 17 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:50,166] Trial 18 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:25:58,960] Trial 19 finished with value: 0.5833333333333334 and parameters: {'filters_1': 64, 'dropout_l': 0.3009936669709093, 'filters_2': 128, 'dropout_2': 0.26296603931076695, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.00212881609086277}. Best is trial 19 with value: 0.5833333333333334.


Files already downloaded and verified


[I 2024-05-06 03:26:00,628] Trial 20 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:26:09,390] Trial 21 finished with value: 0.5533333333333333 and parameters: {'filters_1': 64, 'dropout_l': 0.31197517011270587, 'filters_2': 128, 'dropout_2': 0.297011247151524, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.002221138512466591}. Best is trial 19 with value: 0.5833333333333334.


Files already downloaded and verified


[I 2024-05-06 03:26:17,236] Trial 22 finished with value: 0.56 and parameters: {'filters_1': 64, 'dropout_l': 0.26927032960090075, 'filters_2': 128, 'dropout_2': 0.2754613116040491, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.002131783419216426}. Best is trial 19 with value: 0.5833333333333334.


Files already downloaded and verified


[I 2024-05-06 03:26:25,999] Trial 23 finished with value: 0.5433333333333333 and parameters: {'filters_1': 64, 'dropout_l': 0.26880464435138396, 'filters_2': 128, 'dropout_2': 0.26641996014686936, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.002283025000469792}. Best is trial 19 with value: 0.5833333333333334.


Files already downloaded and verified


[I 2024-05-06 03:26:27,634] Trial 24 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:26:29,249] Trial 25 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:26:31,007] Trial 26 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:26:39,313] Trial 27 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:26:40,927] Trial 28 pruned. 


Files already downloaded and verified


[I 2024-05-06 03:26:42,530] Trial 29 pruned. 


Study statistics: 
  Number of finished trials:  30
  Number of pruned trials:  18
  Number of complete trials:  12
Best trial:
  Value:  0.5833333333333334
  Params: 
    filters_1: 64
    dropout_l: 0.3009936669709093
    filters_2: 128
    dropout_2: 0.26296603931076695
    linear_1: 128
    optimizer: Adam
    lr: 0.00212881609086277


In [None]:
study.best_trial

FrozenTrial(number=19, state=TrialState.COMPLETE, values=[0.5833333333333334], datetime_start=datetime.datetime(2024, 5, 6, 3, 25, 50, 167627), datetime_complete=datetime.datetime(2024, 5, 6, 3, 25, 58, 959660), params={'filters_1': 64, 'dropout_l': 0.3009936669709093, 'filters_2': 128, 'dropout_2': 0.26296603931076695, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.00212881609086277}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.22666666666666666, 1: 0.30333333333333334, 2: 0.23, 3: 0.38666666666666666, 4: 0.43, 5: 0.4633333333333333, 6: 0.51, 7: 0.5666666666666667, 8: 0.5166666666666667, 9: 0.5833333333333334}, distributions={'filters_1': IntDistribution(high=64, log=False, low=32, step=32), 'dropout_l': FloatDistribution(high=0.4, log=False, low=0.2, step=None), 'filters_2': IntDistribution(high=128, log=False, low=64, step=32), 'dropout_2': FloatDistribution(high=0.4, log=False, low=0.2, step=None), 'linear_1': IntDistribution(high=512, log=False, low=128, step=128), 'o

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_param_importances(study) ## this is important to figure out which hp is important

In [None]:
optuna.visualization.plot_slice(study)   ## this gives a clear picture

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
def define_best_model():

    layers = []


    layers.append(nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1))
    layers.append(nn.BatchNorm2d(64))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.3009936669709093))

    layers.append(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.26296603931076695))

    layers.append(nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(256))
    layers.append(nn.ReLU())

    layers.append(nn.Flatten())
    layers.append(nn.Linear(256*2*2, 128))  #output size found by printing the model detail using summary in torchsummary
    layers.append(nn.Dropout(0.2))
    layers.append(nn.Linear(128, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100
    #cross entropy loss used as loss function, therefore no softmax layer here

    return nn.Sequential(*layers)



In [None]:
 ''' filters_1: 64
  dropout_l: 0.3009936669709093
  filters_2: 128
  dropout_2: 0.26296603931076695
  linear_1: 128
  optimizer: Adam
  lr: 0.00212881609086277 '''

' filters_1: 64\n dropout_l: 0.3009936669709093\n filters_2: 128\n dropout_2: 0.26296603931076695\n linear_1: 128\n optimizer: Adam\n lr: 0.00212881609086277 '

In [None]:
model = define_best_model().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00212881609086277)
CEloss = nn.CrossEntropyLoss()  ## this loss object must be used the loop. Directly using nn.CrossEntropyLoss() gives error

    # Get the MNIST dataset.
train_loader, valid_loader = get_cifar10()

    # Training of the model.
for epoch in range(EPOCHS):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # Limiting training data for faster epochs.
        if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
            break

        #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
        data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100

        optimizer.zero_grad()
        output = model(data)
        loss = CEloss(output, target)  ## used cross entropy loss
        loss.backward()
        optimizer.step()

    # Validation of the model.
    model.eval()
    correct = 0
    with torch.no_grad():
        val_loss_batch = 0
        for batch_idx, (data, target) in enumerate(valid_loader):
            # Limiting validation data.
            if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                break
            #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
            data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100
            output = model(data)
            # Get the index of the max log-probability.
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

            val_loss_batch += CEloss(output, target).item()  ## used cross entropy loss

    accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
    #val_loss_epoch = val_loss_batch / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        # trial.report(accuracy, epoch)
        # #trial.report(val_loss_epoch, epoch)

        # # Handle pruning based on the intermediate value.
        # if trial.should_prune():
        #     raise optuna.exceptions.TrialPruned()

Files already downloaded and verified



os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.


os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.

