In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)
torch.set_grad_enabled(True)

from MNISTConvNet import MNISTConvNet

In [2]:
import skopt
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.plots import plot_histogram, plot_objective_2D
from skopt.utils import use_named_args

In [3]:
skopt.__version__

'0.8.1'

In [4]:
dim_learning_rate = Real(low=1e-6, high=1e-2, prior='log-uniform',
                        name='learning_rate')

dim_num_conv_layers = Integer(low=1, high=3, name='num_conv_layers')

dim_num_fc_units = Integer(low=5, high=512, name='num_fc_units')

dim_dropout_rate = Real(low=1e-5, high=1e-2, prior='log-uniform',
                        name='dropout_rate')

dimensions = [dim_learning_rate,
              dim_num_conv_layers,
              dim_num_fc_units,
              dim_dropout_rate]

default_parameters = [1e-5, 1, 16, 1e-4]

In [32]:
train_set = torchvision.datasets.MNIST(
                        root='./data/MNIST',
                        train=True,           #Training Set of 60,000 images
                        download=True,
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize(0, 1)
                        ])
)

test_set = torchvision.datasets.MNIST(
                        root='./data/MNIST',
                        train=False,          #Test Set of 10,000 images
                        download=True,
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize(0, 1)
                        ])
)

train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=100
)

test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=100
)

In [6]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [7]:
def train(model, lr, num_epoch, train_loader, test_loader):
    
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epoch):
        total_loss = 0
        total_correct = 0

        for batch in train_loader:
            images, labels = batch
            
            preds = model(images)
            loss = F.cross_entropy(preds, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            total_correct += get_num_correct(preds, labels)

        print('epoch:', epoch, 'total_correct:', total_correct, 'loss:', total_loss)

    print('Train Accuracy:', total_correct/len(train_set))

    test_loss = 0
    test_correct = 0

    for batch in test_loader:
        images, labels = batch

        preds = model(images)
        loss = F.cross_entropy(preds, labels)
        
        test_loss += loss.item()
        test_correct += get_num_correct(preds, labels)

    print('Test Accuracy:', test_correct/len(test_set))
    
    return test_correct/len(test_set)

In [8]:
best_accuracy = 0.0
best_model_path = './best_model.pth'

@use_named_args(dimensions=dimensions)
def fitness(learning_rate, num_conv_layers,
            num_fc_units, dropout_rate):

    print('\n\nlearning rate: {0:.1e}'.format(learning_rate))
    print('num_conv_layers:', num_conv_layers)
    print('num_fc_units:', num_fc_units)
    print('dropout_rate:', dropout_rate)
    
    model = MNISTConvNet(num_conv_layers=num_conv_layers,
                         num_fc_units=num_fc_units,
                         dropout_rate=dropout_rate)

    accuracy = train(model, learning_rate, 1, train_loader, test_loader)

    print('Accuracy:', accuracy)

    global best_accuracy

    if accuracy > best_accuracy:
        torch.save(model.state_dict(), best_model_path)
        best_accuracy = accuracy

    del model

    return -accuracy

In [9]:
fitness(x=default_parameters)



learning rate: 1.0e-05
num_conv_layers: 1
num_fc_units: 16
dropout_rate: 0.0001
epoch: 0 total_correct: 26059 loss: 12387.331575155258
Train Accuracy: 0.4343166666666667
Test Accuracy: 0.5897
Accuracy: 0.5897


-0.5897

In [10]:
best_accuracy

0.3344

In [10]:
search_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            acq_func='EI',
                            n_calls=12,
                            x0=default_parameters)



learning rate: 1.0e-05
num_conv_layers: 1
num_fc_units: 16
dropout_rate: 0.0001
epoch: 0 total_correct: 15729 loss: 13067.952606797218
Train Accuracy: 0.26215
Test Accuracy: 0.4374
Accuracy: 0.4374


learning rate: 7.6e-06
num_conv_layers: 2
num_fc_units: 242
dropout_rate: 0.0010504029270412094
epoch: 0 total_correct: 24842 loss: 13070.13498198986
Train Accuracy: 0.4140333333333333
Test Accuracy: 0.6666
Accuracy: 0.6666


learning rate: 4.4e-04
num_conv_layers: 3
num_fc_units: 260
dropout_rate: 0.0016246152039815
epoch: 0 total_correct: 45744 loss: 4319.715989653021
Train Accuracy: 0.7624
Test Accuracy: 0.8577
Accuracy: 0.8577


learning rate: 1.2e-06
num_conv_layers: 2
num_fc_units: 384
dropout_rate: 0.005346756084445474
epoch: 0 total_correct: 7529 loss: 13711.089743852615
Train Accuracy: 0.12548333333333334
Test Accuracy: 0.2522
Accuracy: 0.2522


learning rate: 1.6e-04
num_conv_layers: 3
num_fc_units: 140
dropout_rate: 0.003035356507066432
epoch: 0 total_correct: 36893 loss: 7065

In [11]:
from nas_wot import score

In [23]:
model = MNISTConvNet(num_conv_layers=2,
                    num_fc_units=393,
                    dropout_rate=2.1738843582354433e-05)

In [24]:
model

MNISTConvNet(
  (conv1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
  (dropout): Dropout(p=2.1738843582354433e-05, inplace=False)
  (fc1): Linear(in_features=125, out_features=393, bias=True)
  (fc2): Linear(in_features=393, out_features=10, bias=True)
)

In [25]:
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

MNISTConvNet(
  (conv1): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(3, 5, kernel_size=(3, 3), stride=(1, 1))
  (dropout): Dropout(p=2.1738843582354433e-05, inplace=False)
  (fc1): Linear(in_features=125, out_features=393, bias=True)
  (fc2): Linear(in_features=393, out_features=10, bias=True)
)

In [31]:
score(model, train_loader, 100)

-188.19160651956506

In [33]:
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

conv1.weight 	 torch.Size([3, 1, 3, 3])
conv1.bias 	 torch.Size([3])
conv2.weight 	 torch.Size([5, 3, 3, 3])
conv2.bias 	 torch.Size([5])
fc1.weight 	 torch.Size([393, 125])
fc1.bias 	 torch.Size([393])
fc2.weight 	 torch.Size([10, 393])
fc2.bias 	 torch.Size([10])


In [34]:
model.state_dict()

OrderedDict([('conv1.weight',
              tensor([[[[-0.0334,  0.2540,  0.2722],
                        [-0.1481,  0.1374,  0.2096],
                        [-0.0028,  0.0987,  0.1333]]],
              
              
                      [[[ 0.2304,  0.0182,  0.0352],
                        [-0.0602, -0.1293,  0.2283],
                        [ 0.2955,  0.1718,  0.0486]]],
              
              
                      [[[ 0.1488,  0.0918, -0.1624],
                        [-0.1039,  0.0403,  0.1150],
                        [-0.2176, -0.1634, -0.2305]]]])),
             ('conv1.bias', tensor([ 0.3181, -0.3295, -0.2656])),
             ('conv2.weight',
              tensor([[[[ 0.0187, -0.0344,  0.0789],
                        [-0.0025, -0.0050, -0.1529],
                        [ 0.1063, -0.1450,  0.1359]],
              
                       [[ 0.0309, -0.1372, -0.0374],
                        [ 0.1320,  0.0930,  0.0463],
                        [ 0.0705, -0.1357,  0.0