In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchsummary import summary

!pip install optuna
import optuna



In [2]:
DEVICE = torch.device("cuda")  ##'cuda' or 'cpu'
BATCHSIZE = 128
CLASSES = 10   #CLASSES = 10 for cifar10 and 100 for cifar100
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [3]:
def define_model(trial):

    layers = []

    output_channels1 = trial.suggest_int(name="filters_1", low=32, high=64, step=32)
    layers.append(nn.Conv2d(in_channels=3, out_channels=output_channels1, kernel_size=3, stride=1))
    layers.append(nn.BatchNorm2d(output_channels1))
    layers.append(nn.ReLU())
    p1 = trial.suggest_float(name="dropout_l", low=0.2, high=0.4)
    layers.append(nn.Dropout(p1))

    output_channels2 = trial.suggest_int(name="filters_2", low=64, high=128, step=32)
    layers.append(nn.Conv2d(in_channels=output_channels1, out_channels=output_channels2, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(output_channels2))
    layers.append(nn.ReLU())
    p2 = trial.suggest_float(name="dropout_2", low=0.2, high=0.4)
    layers.append(nn.Dropout(p2))

    layers.append(nn.Conv2d(in_channels=output_channels2, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(256))
    layers.append(nn.ReLU())
    
    layers.append(nn.Flatten())
    output_units1 = trial.suggest_int(name="linear_1", low=128, high=512, step=128)
    layers.append(nn.Linear(256*2*2, output_units1))  #output size found by printing the model detail using summary in torchsummary 
    layers.append(nn.Dropout(0.2))
    layers.append(nn.Linear(output_units1, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100
    #cross entropy loss used as loss function, therefore no softmax layer here

    return nn.Sequential(*layers)


In [4]:
def get_cifar10():
    # Load cifar10 dataset.

    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root=DIR, train=True,
                                        download=True, transform=transform)
    
    #split training data into training-80% and validation-20%
    train_set, val_set = torch.utils.data.random_split(trainset, [int(0.8*len(trainset)), int(0.2*len(trainset))])

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCHSIZE,
                                          shuffle=True, num_workers=2)
    
    valid_loader = torch.utils.data.DataLoader(val_set, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    

    """
    testset = torchvision.datasets.CIFAR10(root=DIR, train=False,
                                       download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    """

    return train_loader, valid_loader

In [5]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) #for hp tuning
    #optimizer_name = "Adam"
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) #for hp tuning
    #lr = 0.001
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    CEloss = nn.CrossEntropyLoss()  ## this loss object must be used the loop. Directly using nn.CrossEntropyLoss() gives error

    # Get the MNIST dataset.
    train_loader, valid_loader = get_cifar10()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
            data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100

            optimizer.zero_grad()
            output = model(data)
            loss = CEloss(output, target)  ## used cross entropy loss
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            val_loss_batch = 0
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
                data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

                val_loss_batch += CEloss(output, target).item()  ## used cross entropy loss

        #accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
        val_loss_epoch = val_loss_batch / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        #trial.report(accuracy, epoch)
        trial.report(val_loss_epoch, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_loss_epoch #accuracy 

In [6]:
if __name__ == "__main__":
    #study = optuna.create_study(direction="maximize")  # 'maximize' because objective function is returning accuracy
    study = optuna.create_study(direction="minimize")  # 'minimize' because objective function is returning loss
    study.optimize(objective, n_trials=30, timeout=600)

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2021-01-19 04:35:58,969][0m A new study created in memory with name: no-name-6a7b5d91-60f6-4b08-ba8a-f493d915ef6e[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:36:18,292][0m Trial 0 finished with value: 0.015790327172726392 and parameters: {'filters_1': 32, 'dropout_l': 0.2789879268375691, 'filters_2': 96, 'dropout_2': 0.32544635957156937, 'linear_1': 128, 'optimizer': 'SGD', 'lr': 0.003927883292982739}. Best is trial 0 with value: 0.015790327172726392.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:36:33,325][0m Trial 1 finished with value: 0.017347655817866327 and parameters: {'filters_1': 32, 'dropout_l': 0.3525421775064947, 'filters_2': 96, 'dropout_2': 0.26553517401678667, 'linear_1': 256, 'optimizer': 'SGD', 'lr': 0.0016954978872456329}. Best is trial 0 with value: 0.015790327172726392.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:36:48,643][0m Trial 2 finished with value: 0.018799667619168757 and parameters: {'filters_1': 32, 'dropout_l': 0.2983524035266557, 'filters_2': 96, 'dropout_2': 0.22042273804406629, 'linear_1': 384, 'optimizer': 'RMSprop', 'lr': 0.016688504866225685}. Best is trial 0 with value: 0.015790327172726392.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:37:04,280][0m Trial 3 finished with value: 0.016291816718876362 and parameters: {'filters_1': 32, 'dropout_l': 0.3996832063306419, 'filters_2': 128, 'dropout_2': 0.39568155627241763, 'linear_1': 256, 'optimizer': 'RMSprop', 'lr': 1.2801024210957622e-05}. Best is trial 0 with value: 0.015790327172726392.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:37:19,770][0m Trial 4 finished with value: 0.009796980116516352 and parameters: {'filters_1': 64, 'dropout_l': 0.26639574573833347, 'filters_2': 96, 'dropout_2': 0.2651358979458562, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.0009232134728986037}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:37:35,031][0m Trial 5 finished with value: 0.010554519016295671 and parameters: {'filters_1': 64, 'dropout_l': 0.3527824990674155, 'filters_2': 96, 'dropout_2': 0.36766582005686943, 'linear_1': 256, 'optimizer': 'Adam', 'lr': 0.0006767366576759073}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:37:49,725][0m Trial 6 finished with value: 0.011075667385011912 and parameters: {'filters_1': 64, 'dropout_l': 0.20068060138500676, 'filters_2': 96, 'dropout_2': 0.22583827532579354, 'linear_1': 384, 'optimizer': 'SGD', 'lr': 0.08234366907653651}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:38:05,106][0m Trial 7 finished with value: 0.010972072742879391 and parameters: {'filters_1': 32, 'dropout_l': 0.38766255120978566, 'filters_2': 96, 'dropout_2': 0.2859748869758585, 'linear_1': 128, 'optimizer': 'RMSprop', 'lr': 0.0007885358216055516}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:38:07,384][0m Trial 8 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:38:22,910][0m Trial 9 finished with value: 0.011912293080240489 and parameters: {'filters_1': 64, 'dropout_l': 0.29400261200991956, 'filters_2': 64, 'dropout_2': 0.3640300653236394, 'linear_1': 512, 'optimizer': 'RMSprop', 'lr': 8.831513544588313e-05}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:38:26,734][0m Trial 10 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:38:42,709][0m Trial 11 finished with value: 0.01133987195789814 and parameters: {'filters_1': 64, 'dropout_l': 0.24765555273285034, 'filters_2': 64, 'dropout_2': 0.37673872533290087, 'linear_1': 256, 'optimizer': 'Adam', 'lr': 0.0002828956679063054}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:38:58,467][0m Trial 12 finished with value: 0.011118773464113474 and parameters: {'filters_1': 64, 'dropout_l': 0.3388964010604307, 'filters_2': 128, 'dropout_2': 0.31902411157228294, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.00643644230182958}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:00,846][0m Trial 13 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:03,296][0m Trial 14 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:18,179][0m Trial 15 finished with value: 0.01078373584896326 and parameters: {'filters_1': 64, 'dropout_l': 0.3206230570136703, 'filters_2': 64, 'dropout_2': 0.35348360375399945, 'linear_1': 128, 'optimizer': 'Adam', 'lr': 0.0022470194406645993}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:20,584][0m Trial 16 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:22,884][0m Trial 17 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:28,154][0m Trial 18 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:43,741][0m Trial 19 finished with value: 0.010313332080841064 and parameters: {'filters_1': 64, 'dropout_l': 0.36843433951315374, 'filters_2': 128, 'dropout_2': 0.23649427629778488, 'linear_1': 384, 'optimizer': 'Adam', 'lr': 0.00146835100879809}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:39:59,274][0m Trial 20 finished with value: 0.010031948052346706 and parameters: {'filters_1': 64, 'dropout_l': 0.27955681852867353, 'filters_2': 128, 'dropout_2': 0.23830906745986002, 'linear_1': 384, 'optimizer': 'Adam', 'lr': 0.002487367967407538}. Best is trial 4 with value: 0.009796980116516352.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:14,743][0m Trial 21 finished with value: 0.009771103411912918 and parameters: {'filters_1': 64, 'dropout_l': 0.27826978490909676, 'filters_2': 128, 'dropout_2': 0.23467842343788, 'linear_1': 384, 'optimizer': 'Adam', 'lr': 0.0019607598053834477}. Best is trial 21 with value: 0.009771103411912918.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:17,081][0m Trial 22 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:19,418][0m Trial 23 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:35,262][0m Trial 24 finished with value: 0.009824879840016365 and parameters: {'filters_1': 64, 'dropout_l': 0.20637054295180657, 'filters_2': 128, 'dropout_2': 0.21675151312037888, 'linear_1': 384, 'optimizer': 'Adam', 'lr': 0.0029691845677805705}. Best is trial 21 with value: 0.009771103411912918.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:37,543][0m Trial 25 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:39,984][0m Trial 26 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:42,401][0m Trial 27 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:44,768][0m Trial 28 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:40:47,088][0m Trial 29 pruned. [0m


Study statistics: 
  Number of finished trials:  30
  Number of pruned trials:  14
  Number of complete trials:  16
Best trial:
  Value:  0.009771103411912918
  Params: 
    filters_1: 64
    dropout_l: 0.27826978490909676
    filters_2: 128
    dropout_2: 0.23467842343788
    linear_1: 384
    optimizer: Adam
    lr: 0.0019607598053834477


In [7]:
study.best_trial

FrozenTrial(number=21, values=[0.009771103411912918], datetime_start=datetime.datetime(2021, 1, 19, 4, 39, 59, 276218), datetime_complete=datetime.datetime(2021, 1, 19, 4, 40, 14, 743183), params={'filters_1': 64, 'dropout_l': 0.27826978490909676, 'filters_2': 128, 'dropout_2': 0.23467842343788, 'linear_1': 384, 'optimizer': 'Adam', 'lr': 0.0019607598053834477}, distributions={'filters_1': IntUniformDistribution(high=64, low=32, step=32), 'dropout_l': UniformDistribution(high=0.4, low=0.2), 'filters_2': IntUniformDistribution(high=128, low=64, step=32), 'dropout_2': UniformDistribution(high=0.4, low=0.2), 'linear_1': IntUniformDistribution(high=512, low=128, step=128), 'optimizer': CategoricalDistribution(choices=('Adam', 'RMSprop', 'SGD')), 'lr': LogUniformDistribution(high=0.1, low=1e-05)}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.015361118875443936, 1: 0.013182415906339884, 2: 0.012221780978143216, 3: 0.011809922475367784, 4: 0.011753248982131481, 5: 0.0107100318185

In [8]:
optuna.visualization.plot_optimization_history(study)

In [9]:
optuna.visualization.plot_param_importances(study) ## this is important to figure out which hp is important

In [10]:
optuna.visualization.plot_slice(study)   ## this gives a clear picture 

In [11]:
optuna.visualization.plot_parallel_coordinate(study)

In [12]:
# SKIP THIS
#### used for testing output sizes of layers in the model
#****important: only change the input filter to maintain the output size of each layer
"""
model = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
    ,nn.BatchNorm2d(32)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(256)
    ,nn.ReLU()
    ,nn.Flatten()
    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary 
    ,nn.Dropout(0.2)
    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100

print(summary(model,(3,32,32)))
"""

'\nmodel = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)\n    ,nn.BatchNorm2d(32)\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(128)\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(256)\n    ,nn.ReLU()\n    ,nn.Flatten()\n    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary \n    ,nn.Dropout(0.2)\n    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100\n\nprint(summary(model,(3,32,32)))\n'