# CIFAR-10 Classification with CNN

In [1]:
import torch
import torchvision
from torchvision import datasets

from torch import nn
from torch import optim

from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor

import math
import numpy as np
import matplotlib.pyplot as plt

import neural_nets
import cnn_utils as util

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f63e4d487d0>

## Import Data

### Download Dataset

In [3]:
training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

Files already downloaded and verified
Files already downloaded and verified


### Create Loaders

This step is needed in order to automate the loading of images with set batch size.

In [4]:
batch_size = 256

In [5]:
train_loader = torch.utils.data.DataLoader(dataset=training_data,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                          batch_size=batch_size,
                                          shuffle=True)

### Set general parameters

In [6]:
input_dimensions = (32,32)
target_classes = 10

# Model Optimization

### Training hyperparameters

In [7]:
num_epochs = 30
learning_rate = 0.001

## 3: VGG Layers

Finally, VGG-style layers (using two consecutive convolutional layers before pooling) will be tried.

### 3 Blocks

In [8]:
filter_params = [[64, 64, 128, 128, 256, 256], [64, 128, 128, 256, 256, 512]]
kernel_params = [[3, 3, 3, 3, 3, 3]]
fulcon_params = [[32], [64], [128]]

#### Training

In [9]:
for f in filter_params:
    for k in kernel_params:
        for fci in range(len(fulcon_params)):
            fc = fulcon_params[fci].copy()
            cnn = neural_nets.CNN(n_conv_layers=6,
                                  filters=f,
                                  kernel=k,
                                  activation=['relu', 'relu', 'relu', 'relu', 'relu', 'relu'],
                                  norm=[False, False, False, False, False, False],
                                  pool=[0, 2, 0, 2, 0, 2],
                                  input_channels=3,
                                  fully_connected=fc,
                                  input_dims=input_dimensions,
                                  classes=target_classes)
            cross_entropy = nn.CrossEntropyLoss()
            adam = optim.Adam(cnn.parameters(), lr=learning_rate)
            %time losses, accuracies = util.train_loop(cnn, cross_entropy, adam, train_loader, test_loader, num_epochs, 0)
            
            print("Max accuracy:", max(accuracies), "Epoch:", np.argmax(accuracies)+1, "Params:", f, k, fc)
            print(76*"-")

CPU times: user 11h 2min 42s, sys: 1h 17min 3s, total: 12h 19min 46s
Wall time: 1h 40min 13s
Max accuracy: 0.7875999808311462 Epoch: 12 Params: [64, 64, 128, 128, 256, 256] [3, 3, 3, 3, 3, 3] [4096, 32, 10]
----------------------------------------------------------------------------
CPU times: user 10h 51min 52s, sys: 1h 6min 19s, total: 11h 58min 11s
Wall time: 1h 37min 43s
Max accuracy: 0.7936000227928162 Epoch: 28 Params: [64, 64, 128, 128, 256, 256] [3, 3, 3, 3, 3, 3] [4096, 64, 10]
----------------------------------------------------------------------------
CPU times: user 10h 47min 13s, sys: 1h 15min 47s, total: 12h 3min 1s
Wall time: 1h 38min 14s
Max accuracy: 0.7924000024795532 Epoch: 15 Params: [64, 64, 128, 128, 256, 256] [3, 3, 3, 3, 3, 3] [4096, 128, 10]
----------------------------------------------------------------------------
CPU times: user 18h 51min 29s, sys: 2h 9min 10s, total: 21h 40s
Wall time: 2h 54min 30s
Max accuracy: 0.7885000109672546 Epoch: 10 Params: [64, 12

### 4 Blocks

Different kernel sizes on the same block (for example `[32, 64, 64, 128, 128, 256, 256, 512]`) were not considered because the previous experiment showed that they perform suboptimally.

In [9]:
filter_params = [[64, 64, 128, 128, 256, 256, 512, 512]]
kernel_params = [[3, 3, 3, 3, 3, 3, 3, 3]]
fulcon_params = [[32], [64], [128], [256]]

#### Training

In [11]:
for f in filter_params:
    for k in kernel_params:
        for fci in range(len(fulcon_params)):
            fc = fulcon_params[fci].copy()
            cnn = neural_nets.CNN(n_conv_layers=8,
                                  filters=f,
                                  kernel=k,
                                  activation=['relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu'],
                                  norm=[False, False, False, False, False, False, False, False],
                                  pool=[0, 2, 0, 2, 0, 2, 0, 2],
                                  input_channels=3,
                                  fully_connected=fc,
                                  input_dims=input_dimensions,
                                  classes=target_classes)
            cross_entropy = nn.CrossEntropyLoss()
            adam = optim.Adam(cnn.parameters(), lr=learning_rate)
            %time losses, accuracies = util.train_loop(cnn, cross_entropy, adam, train_loader, test_loader, num_epochs, 0)
            
            print("Max accuracy:", max(accuracies), "Epoch:", np.argmax(accuracies)+1, "Params:", f, k, fc)
            print(76*"-")

CPU times: user 15h 24min 22s, sys: 1h 18min 34s, total: 16h 42min 56s
Wall time: 2h 16min 15s
Max accuracy: 0.7842000126838684 Epoch: 27 Params: [64, 64, 128, 128, 256, 256, 512, 512] [3, 3, 3, 3, 3, 3, 3, 3] [2048, 32, 10]
----------------------------------------------------------------------------
CPU times: user 16h 1min 52s, sys: 1h 37min 13s, total: 17h 39min 6s
Wall time: 2h 21min 38s
Max accuracy: 0.7757999897003174 Epoch: 12 Params: [64, 64, 128, 128, 256, 256, 512, 512] [3, 3, 3, 3, 3, 3, 3, 3] [2048, 64, 10]
----------------------------------------------------------------------------
CPU times: user 16h 40min 44s, sys: 1h 38min 31s, total: 18h 19min 16s
Wall time: 2h 21min 49s
Max accuracy: 0.7684999704360962 Epoch: 20 Params: [64, 64, 128, 128, 256, 256, 512, 512] [3, 3, 3, 3, 3, 3, 3, 3] [2048, 128, 10]
----------------------------------------------------------------------------
