# Network in Network on CIFAR-10





### Imports

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

### Settings and Dataset

In [2]:
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Hyperparameters
random_seed = 1
learning_rate = 0.001
num_epochs = 10
batch_size = 128
torch.manual_seed(random_seed)


# Architecture
num_features = 784
num_classes = 10


# Data
train_dataset = datasets.CIFAR10(root='data', 
                                 train=True, 
                                 transform=transforms.ToTensor(),
                                 download=True)

test_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transforms.ToTensor())


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size,
                         shuffle=False)


# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Files already downloaded and verified
Image batch dimensions: torch.Size([128, 3, 32, 32])
Image label dimensions: torch.Size([128])


### Model

In [3]:
class NiN(nn.Module):


    def __init__(self, num_classes):
        super(NiN, self).__init__()
        self.num_classes = num_classes
        self.layers = nn.Sequential(
                nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(160,  96, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(192,  10, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=8, stride=1, padding=0))


    def forward(self, x):
        x = self.layers(x)
        logits = x.view(x.size(0), -1)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [4]:
model = NiN(num_classes)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

### Training

In [5]:
def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):            
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

    
for epoch in range(num_epochs):
    model.train()
    
    for batch_idx, (features, targets) in enumerate(train_loader):
        features = features.to(device)
        targets = targets.to(device)
            
        # Forward and Backprop
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        # update model paramets
        optimizer.step()
        
        # Logging
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     len(train_loader), cost))

    model.eval()
    with torch.set_grad_enabled(False):
        print('Epoch: %03d/%03d | Train: %.3f%% ' %(
              epoch+1, num_epochs, 
              compute_accuracy(model, train_loader)))

Epoch: 001/010 | Batch 0000/0391 | Cost: 2.3016
Epoch: 001/010 | Batch 0050/0391 | Cost: 2.2997
Epoch: 001/010 | Batch 0100/0391 | Cost: 2.2267
Epoch: 001/010 | Batch 0150/0391 | Cost: 2.1164
Epoch: 001/010 | Batch 0200/0391 | Cost: 2.1267
Epoch: 001/010 | Batch 0250/0391 | Cost: 2.0275
Epoch: 001/010 | Batch 0300/0391 | Cost: 1.8015
Epoch: 001/010 | Batch 0350/0391 | Cost: 1.8745
Epoch: 001/010 | Train: 32.926% 
Epoch: 002/010 | Batch 0000/0391 | Cost: 1.8256
Epoch: 002/010 | Batch 0050/0391 | Cost: 1.7028
Epoch: 002/010 | Batch 0100/0391 | Cost: 1.8782
Epoch: 002/010 | Batch 0150/0391 | Cost: 1.8013
Epoch: 002/010 | Batch 0200/0391 | Cost: 1.7608
Epoch: 002/010 | Batch 0250/0391 | Cost: 1.6350
Epoch: 002/010 | Batch 0300/0391 | Cost: 1.6128
Epoch: 002/010 | Batch 0350/0391 | Cost: 1.6125
Epoch: 002/010 | Train: 46.710% 
Epoch: 003/010 | Batch 0000/0391 | Cost: 1.6045
Epoch: 003/010 | Batch 0050/0391 | Cost: 1.5237
Epoch: 003/010 | Batch 0100/0391 | Cost: 1.5233
Epoch: 003/010 | Batch

### Evaluation

In [6]:
with torch.set_grad_enabled(False):
  print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))

Test accuracy: 69.91%
