In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchsummary import summary

!pip install optuna
import optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/59/b4/a1a80252cef3d8f5a0acdf6e678d6dc07e2e6964ee46d0453a2ae1af1ecb/optuna-2.4.0-py3-none-any.whl (282kB)
[K     |████████████████████████████████| 286kB 14.7MB/s 
[?25hCollecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/51/10/cd16601106782a879398fb3530388322d0ec293b927634eb94f18c185b14/alembic-1.5.0.tar.gz (1.1MB)
[K     |████████████████████████████████| 1.1MB 51.2MB/s 
[?25hCollecting cmaes>=0.6.0
  Downloading https://files.pythonhosted.org/packages/8d/3c/06c76ec8b54b9b1fad7f35e903fd25010fe3e0d41bd94cea5e6f12e0d651/cmaes-0.7.0-py3-none-any.whl
Collecting colorlog
  Downloading https://files.pythonhosted.org/packages/5e/39/0230290df0519d528d8d0ffdfd900150ed24e0076d13b1f19e279444aab1/colorlog-4.7.2-py2.py3-none-any.whl
Collecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/0f/8f/3c74fa4b6c3db1051b495385f5302fc5d5aa0f180d40ce3e9a13c82f8c82/cliff-3.6.0-py3-n

In [2]:
DEVICE = torch.device("cuda")  ##'cuda' or 'cpu'
BATCHSIZE = 128
CLASSES = 10   #CLASSES = 10 for cifar10 and 100 for cifar100
DIR = os.getcwd()
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10

In [3]:
def define_model(trial):

    layers = []

    layers.append(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1))
    layers.append(nn.BatchNorm2d(32))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(64))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(128))
    layers.append(nn.ReLU())
    layers.append(nn.Dropout(0.2))

    layers.append(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2))
    layers.append(nn.BatchNorm2d(256))
    layers.append(nn.ReLU())
    
    layers.append(nn.Flatten())
    layers.append(nn.Linear(256*2*2, 500))  #output size found by printing the model detail using summary in torchsummary 
    layers.append(nn.Dropout(0.2))
    layers.append(nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100
    #cross entropy loss used as loss function, therefore no softmax layer here

    return nn.Sequential(*layers)


In [4]:
def get_cifar10():
    # Load cifar10 dataset.

    transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root=DIR, train=True,
                                        download=True, transform=transform)
    
    #split training data into training-80% and validation-20%
    train_set, val_set = torch.utils.data.random_split(trainset, [int(0.8*len(trainset)), int(0.2*len(trainset))])

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCHSIZE,
                                          shuffle=True, num_workers=2)
    
    valid_loader = torch.utils.data.DataLoader(val_set, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    

    """
    testset = torchvision.datasets.CIFAR10(root=DIR, train=False,
                                       download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCHSIZE,
                                         shuffle=False, num_workers=2)
    """

    return train_loader, valid_loader

In [5]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    #optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) #for hp tuning
    optimizer_name = "Adam"
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) #for hp tuning
    #lr = 0.001
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    CEloss = nn.CrossEntropyLoss()  ## this loss object must be used the loop. Directly using nn.CrossEntropyLoss() gives error

    # Get the MNIST dataset.
    train_loader, valid_loader = get_cifar10()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
            data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100

            optimizer.zero_grad()
            output = model(data)
            loss = CEloss(output, target)  ## used cross entropy loss
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            val_loss_batch = 0
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                #data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)  ## for mnist
                data, target = data.to(DEVICE), target.to(DEVICE)  ## for cifar 10 and 100
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

                val_loss_batch += CEloss(output, target).item()  ## used cross entropy loss

        #accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)
        val_loss_epoch = val_loss_batch / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        #trial.report(accuracy, epoch)
        trial.report(val_loss_epoch, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_loss_epoch #accuracy

In [6]:
if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")  # 'maximize' because objective function is returning accuracy
    study.optimize(objective, n_trials=50, timeout=600) ## link: https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize

    pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
    complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2021-01-19 04:02:30,021][0m A new study created in memory with name: no-name-7ff94107-d522-4399-a90b-1e9f2f5ff74c[0m


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/cifar-10-python.tar.gz to /content


[32m[I 2021-01-19 04:03:00,395][0m Trial 0 finished with value: 0.015555588994175196 and parameters: {'lr': 1.2543880460151144e-05}. Best is trial 0 with value: 0.015555588994175196.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:03:16,945][0m Trial 1 finished with value: 0.011357270926237107 and parameters: {'lr': 0.00011872991255831091}. Best is trial 1 with value: 0.011357270926237107.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:03:33,103][0m Trial 2 finished with value: 0.00995731856673956 and parameters: {'lr': 0.001955574386337098}. Best is trial 2 with value: 0.00995731856673956.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:03:49,401][0m Trial 3 finished with value: 0.014221998862922192 and parameters: {'lr': 3.099368989851889e-05}. Best is trial 2 with value: 0.00995731856673956.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:05,689][0m Trial 4 finished with value: 0.010459072515368461 and parameters: {'lr': 0.0004468245824049621}. Best is trial 2 with value: 0.00995731856673956.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:08,185][0m Trial 5 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:24,862][0m Trial 6 finished with value: 0.0104415662586689 and parameters: {'lr': 0.0012521978970500614}. Best is trial 2 with value: 0.00995731856673956.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:28,825][0m Trial 7 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:32,821][0m Trial 8 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:36,836][0m Trial 9 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:39,310][0m Trial 10 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:41,780][0m Trial 11 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:04:56,399][0m Trial 12 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:12,629][0m Trial 13 finished with value: 0.010476954374462366 and parameters: {'lr': 0.0018123189442231407}. Best is trial 2 with value: 0.00995731856673956.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:15,134][0m Trial 14 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:17,554][0m Trial 15 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:23,127][0m Trial 16 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:27,158][0m Trial 17 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:32,742][0m Trial 18 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:35,240][0m Trial 19 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:39,218][0m Trial 20 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:41,713][0m Trial 21 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:05:44,188][0m Trial 22 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:00,412][0m Trial 23 finished with value: 0.01093084579333663 and parameters: {'lr': 0.001788846828674164}. Best is trial 2 with value: 0.00995731856673956.[0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:04,409][0m Trial 24 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:06,846][0m Trial 25 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:10,786][0m Trial 26 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:23,028][0m Trial 27 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:25,440][0m Trial 28 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:27,899][0m Trial 29 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:30,383][0m Trial 30 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:35,913][0m Trial 31 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:41,866][0m Trial 32 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:52,248][0m Trial 33 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:06:56,276][0m Trial 34 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:00,399][0m Trial 35 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:02,869][0m Trial 36 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:05,347][0m Trial 37 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:10,827][0m Trial 38 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:14,853][0m Trial 39 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:17,320][0m Trial 40 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:22,898][0m Trial 41 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:26,859][0m Trial 42 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:30,899][0m Trial 43 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:36,372][0m Trial 44 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:38,816][0m Trial 45 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:42,791][0m Trial 46 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:46,837][0m Trial 47 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:52,449][0m Trial 48 pruned. [0m


Files already downloaded and verified


[32m[I 2021-01-19 04:07:55,016][0m Trial 49 pruned. [0m


Study statistics: 
  Number of finished trials:  50
  Number of pruned trials:  42
  Number of complete trials:  8
Best trial:
  Value:  0.00995731856673956
  Params: 
    lr: 0.001955574386337098


In [7]:
study.best_trial

FrozenTrial(number=2, values=[0.00995731856673956], datetime_start=datetime.datetime(2021, 1, 19, 4, 3, 16, 947163), datetime_complete=datetime.datetime(2021, 1, 19, 4, 3, 33, 103052), params={'lr': 0.001955574386337098}, distributions={'lr': LogUniformDistribution(high=0.1, low=1e-05)}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.017669405415654184, 1: 0.013185181841254234, 2: 0.012251102458685636, 3: 0.014407828263938428, 4: 0.011502127069979906, 5: 0.010822379309684039, 6: 0.010874059237539768, 7: 0.0109376085922122, 8: 0.010340017359703779, 9: 0.00995731856673956}, trial_id=2, state=TrialState.COMPLETE, value=None)

In [8]:
optuna.visualization.plot_optimization_history(study)

In [9]:
optuna.visualization.plot_param_importances(study) ## this is important to figure out which hp is important

In [10]:
optuna.visualization.plot_slice(study)   ## this gives a clear picture 

In [11]:
optuna.visualization.plot_parallel_coordinate(study)

In [12]:
# SKIP THIS
#### used for testing output sizes of layers in the model
#****important: only change the input filter to maintain the output size of each layer
"""
model = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
    ,nn.BatchNorm2d(32)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(128)
    ,nn.ReLU()
    ,nn.Dropout(0.2)
    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)
    ,nn.BatchNorm2d(256)
    ,nn.ReLU()
    ,nn.Flatten()
    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary 
    ,nn.Dropout(0.2)
    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100

print(summary(model,(3,32,32)))
"""

'\nmodel = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)\n    ,nn.BatchNorm2d(32)\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=32, out_channels=128, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(128) #this must be same as the out_channel of the previous layer\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(128)\n    ,nn.ReLU()\n    ,nn.Dropout(0.2)\n    ,nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2)\n    ,nn.BatchNorm2d(256)\n    ,nn.ReLU()\n    ,nn.Flatten()\n    ,nn.Linear(256*2*2, 500)  #output size found by printing the model detail using summary in torchsummary \n    ,nn.Dropout(0.2)\n    ,nn.Linear(500, CLASSES))  #CLASSES = 10 for cifar10 and 100 for cifar100\n\nprint(summary(model,(3,32,32)))\n'