In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import numpy as np

# Set up dataset transforms
transform = torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor(),
                           torchvision.transforms.Normalize(0.1307, 0.3081),])

transform_train = torchvision.transforms.Compose([
                           torchvision.transforms.ToTensor(),
                           torchvision.transforms.Normalize(0.1307, 0.3081),
                           torchvision.transforms.RandomAffine(12, shear=12)])

# Set up datasets and dataloaders
train_data = torchvision.datasets.MNIST('./datafiles/', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True)

test_data = torchvision.datasets.MNIST('./datafiles/', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1000, shuffle=True)

In [14]:
class Network(nn.Module):
    def __init__(self, n_unit=None, nc=10, do=0.5, kernel=3, padding='valid', bn2d=True, mp=3, input_shape=28, pool='max', bs=100):
        super(Network, self).__init__()

        # Enable or disable batchnorm2d layers
        self.bn2d = True if bn2d==1 else False
        
        # Batch size
        self.bs = bs

        # First convolutional layer
        self.h1 = nn.Conv2d(1, nc, kernel_size=kernel, padding=padding)

        # Depending on parameters, use either max or average pooling
        if pool == 'max':
            self.mp1 = nn.MaxPool2d(mp)
            self.mp2 = nn.MaxPool2d(mp)
        elif pool == 'avg':
            self.mp1 = nn.AvgPool2d(mp)
            self.mp2 = nn.AvgPool2d(mp)

        self.bn1 =nn.BatchNorm2d(nc)

        # Second convolutional layer
        self.h2 = nn.Conv2d(nc, nc*5, kernel_size=kernel, padding=padding)
        
        self.bn2 =nn.BatchNorm2d(nc*5)
        
        # First linear layer
        # Depending on type of padding, adjust next layer expected input dimensions accordingly
        if padding=='valid':
            self.h3 = nn.Linear(int(nc*5 * np.floor((np.floor(((input_shape - (kernel-1))/mp))-(kernel-1))/mp)**2), 128)
        else:
            self.h3 = nn.Linear(int(nc*5 * np.floor((np.floor(((input_shape)/mp)))/mp)**2), 128)

        self.bn3 =nn.BatchNorm1d(128)
        
        # Second linear layer
        self.h4 = nn.Linear(128, 100)
        self.bn4 =nn.BatchNorm1d(100)

        # Dropout with dropout rate set by parameters
        self.dropout = nn.Dropout(p=do)
        # Second linear layer
        self.output = nn.Linear(100, 10)

    # Forward pass of model
    def forward(self, x):
        # First convolutional and pooling layer
        x = self.h1(x)
        x = self.mp1(x)

        # If batchnorm2d is enabled and batch size is greater than 1, apply batchnorm
        if self.bn2d and self.bs > 1:
            x = self.bn1(x)

        x = F.relu(x)

        # Second convolutional and pooling layer
        x = self.h2(x)
        x = self.mp2(x)

        # If batchnorm2d is enabled and batch size is greater than 1, apply batchnorm
        if self.bn2d and self.bs > 1:
            x = self.bn2(x)

        x = F.relu(x)

        # Flatten input to linear layers
        x = torch.flatten(x, 1)

        # First linear layer
        x = self.h3(x)

        # If batch size > 1, apply batchnorm
        if self.bs > 1:
            x = self.bn3(x)

        # Second linear layer
        x = self.h4(x)

        # If batch size > 1, apply batchnorm
        if self.bs > 1:
            x = self.bn4(x)
        x = F.relu(x)

        x = self.dropout(x)

        return self.output(x)

# Train the model
def train(model, train_loader, optimizer, epoch):
    model.train()
    total_loss = 0
    correct = 0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = nn.CrossEntropyLoss()(outputs, targets)
        total_loss += loss
        loss.backward()
        optimizer.step()

        predictions = outputs.argmax(dim=1, keepdim=True)
        correct += predictions.eq(targets.view_as(predictions)).sum()

        if batch_idx % 100 == 0:
            print('Epoch: {} {}/{} Training loss: {:.6f}'.format(
                epoch,
                batch_idx * len(inputs),
                len(train_loader.dataset),
                loss))

    print('Training loss: {:.6f}; Training accuracy: {}/{} ({:.1f}%)\n'.format(
        total_loss / len(train_loader.dataset) * len(inputs),
        correct,
        len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    
    return total_loss / len(train_loader.dataset) * len(inputs), 100. * correct / len(train_loader.dataset)

# Test the model
def test(model, test_loader):
    model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss += nn.CrossEntropyLoss()(outputs, targets)
            predictions = outputs.argmax(dim=1, keepdim=True)
            correct += predictions.eq(targets.view_as(predictions)).sum()

    loss = loss / len(test_loader.dataset) * len(inputs)

    print('Test loss: {:.6f}; Test accuracy: {}/{} ({:.1f}%)\n'.format(
        loss,
        correct,
        len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return loss, 100. * correct / len(test_loader.dataset)


In [15]:
# Run the model
def run(params=None):
    train_data = torchvision.datasets.MNIST('./datafiles/', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=params['batch_size'], shuffle=True)

    test_data = torchvision.datasets.MNIST('./datafiles/', train=False, download=True, transform=transform)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=params['batch_size'], shuffle=True)
    
    #model = Network(params['nodes'])
    model = Network(do=params['dropout'], kernel=params['kernel_size'], padding=params['padding'], bn2d=params['batchnorm2d'], mp=params['pool_size'], pool=params['pooling'], bs=params['batch_size'])

    if params:
        optimizer = getattr(optim, params['optimizer'])(model.parameters(), lr= params['learning_rate'])
    else:
        optimizer = optim.Adam(model.parameters())

    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []

    EPOCHS = 10

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    for epoch in range(0, EPOCHS):
        tr_loss, tr_acc = train(model, train_loader, optimizer, epoch)
        train_loss.append(tr_loss.detach().numpy())
        train_acc.append(tr_acc)

        test_loss, test_acc = test(model, test_loader)
        val_loss.append(test_loss.detach().numpy())
        val_acc.append(test_acc.detach().numpy())

    return val_acc[-1]


In [16]:
import optuna
from optuna import trial

# Set up parameters for Optuna to optimize
def objective(trial):

    params = {
              'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
              'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]),
              'padding': trial.suggest_categorical("padding", ["valid", "same"]),
              'pooling': trial.suggest_categorical("pooling", ["max", "avg"]),
              'batch_size': trial.suggest_int("batch_size", 1, 100, step=10),
              'pool_size': trial.suggest_int("pool_size", 1, 3),
              'dropout': trial.suggest_float("dropout", 0.2, 0.8),
              'batchnorm2d': trial.suggest_discrete_uniform("batchnorm2d", 0, 1, 1),
              'num_channel': trial.suggest_int("num_channel", 10, 100),
              'kernel_size': trial.suggest_int("kernel_size", 1, 3),
              }
    

    accuracy = run(params)

    return accuracy

# Run Optuna trials
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=30)


[32m[I 2022-11-03 13:10:15,261][0m A new study created in memory with name: no-name-a2a80a60-34e2-43d0-9fbb-7ee63a129838[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
  'batchnorm2d': trial.suggest_discrete_uniform("batchnorm2d", 0, 1, 1),


Epoch: 0 0/60000 Training loss: 2.742836


[33m[W 2022-11-03 13:10:23,168][0m Trial 0 failed because of the following error: KeyboardInterrupt()[0m
Traceback (most recent call last):
  File "c:\Users\Tia\Anaconda3\envs\deepenv\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Tia\AppData\Local\Temp\ipykernel_8696\655387829.py", line 20, in objective
    accuracy = run(params)
  File "C:\Users\Tia\AppData\Local\Temp\ipykernel_8696\713351247.py", line 28, in run
    tr_loss, tr_acc = train(model, train_loader, optimizer, epoch)
  File "C:\Users\Tia\AppData\Local\Temp\ipykernel_8696\3290242296.py", line 92, in train
    for batch_idx, (inputs, targets) in enumerate(train_loader):
  File "c:\Users\Tia\Anaconda3\envs\deepenv\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
    data = self._next_data()
  File "c:\Users\Tia\Anaconda3\envs\deepenv\lib\site-packages\torch\utils\data\dataloader.py", line 561, in _next_data
    data = self._d

KeyboardInterrupt: 

In [None]:
import pickle

# Best Optuna trial
best_trial = study.best_trial

for key, value in best_trial.params.items():
    print("{}: {}".format(key, value))

# Save the Optuna trials
with open("study.pkl","wb") as f:
    pickle.dump(study, f)

In [None]:
from torchsummary import summary

params =  {'learning_rate': 0.0018709287702689723, 'optimizer': 'Adam', 'padding': 'same', 'pooling': 'max', 'batch_size': 101, 'pool_size': 2, 'dropout': 0.27587651756009146, 'batchnorm2d': 0.0, 'num_channel': 35, 'kernel_size': 3}

model = Network(do=params['dropout'], kernel=params['kernel_size'], padding=params['padding'], bn2d=params['batchnorm2d'], mp=params['pool_size'], pool=params['pooling'], bs=params['batch_size'])

# Print summary for model with optimized parameters
summary(model, (1,28,28))



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             100
         MaxPool2d-2           [-1, 10, 14, 14]               0
            Conv2d-3           [-1, 50, 14, 14]           4,550
         MaxPool2d-4             [-1, 50, 7, 7]               0
            Linear-5                  [-1, 128]         313,728
       BatchNorm1d-6                  [-1, 128]             256
            Linear-7                  [-1, 100]          12,900
       BatchNorm1d-8                  [-1, 100]             200
           Dropout-9                  [-1, 100]               0
           Linear-10                   [-1, 10]           1,010
Total params: 332,744
Trainable params: 332,744
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.17
Params size (MB): 1.27
Estimated T