Convolutional Neural Networks for MNIST dataset

Configure training flags

In [1]:
import os
import sys

nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch
import matplotlib.pyplot as plt
import numpy as np

from cnn.mnist.cnn_files import files as _files

plt.ion()

def init_training_flags():
    
    tr_flags = type('TrainingFlags', (object,), {})
    
    tr_flags.batch_size = 64
    tr_flags.test_batch_size = 1000
    tr_flags.epochs = 10
    tr_flags.no_cuda = False
    tr_flags.seed = 1
    tr_flags.log_interval = 10
    tr_flags.weights = _files.model_file('mnist_weights.pth.tar')
    tr_flags.lr = 0.01
    tr_flags.momentum = 0.5
    
    # System configuration
    tr_flags.num_workers=8
    
    tr_flags.cuda = False

    return tr_flags
    


Prepare datasets

In [3]:
from torch import optim
from torch.autograd import Variable
from torchvision import (datasets, transforms)

flags = init_training_flags()

torch.manual_seed(flags.seed)
if flags.cuda:
  torch.cuda.manual_seed(flags.seed)

kwargs = {'num_workers': flags.num_workers, 'pin_memory': True} if flags.cuda else {}
train_loader = torch.utils.data.DataLoader(
  datasets.MNIST(_files.data_dir, train=True, download=True,
                 transform=transforms.Compose([
                     transforms.ToTensor(),
                     transforms.Normalize((0.1307,), (0.3081,))
                 ])),
  batch_size=flags.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
  datasets.MNIST(_files.data_dir, train=False, transform=transforms.Compose([
                     transforms.ToTensor(),
                     transforms.Normalize((0.1307,), (0.3081,))
                 ])),
  batch_size=flags.test_batch_size, shuffle=True, **kwargs)

Initialize model

In [11]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from collections import OrderedDict

from torch import nn

import torch.nn.functional as F

from utils.models.layers import Flatten


class LeNetClassic(nn.Module):
  """Network model without flatten layer
   for character recognition"""
  
  def __init__(self):
    super(LeNetClassic, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.conv2_drop = nn.Dropout2d()
    self.fc1 = nn.Linear(320, 50)
    self.fc2 = nn.Linear(50, 10)
  
  def forward(self, x):
      
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    x = x.view(x.size(0), 320)
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    result = F.log_softmax(x)
    
    return result


class LeNet(nn.Module):
  """Network model with flatten layer
   for character recognition"""
  
  def __init__(self):
    super(LeNet, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.conv2_drop = nn.Dropout2d()
    self.flatten = Flatten(50)
    self.fc2 = nn.Linear(50, 10)
  
  def forward(self, x):
      
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    x = self.flatten(x)
    x = F.relu(x)
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    result = F.log_softmax(x)
    
    return result


class LeNetSequential(nn.Module):
  """Network model with flatten layer
   for character recognition"""
  
  def __init__(self):
    super(LeNetSequential, self).__init__()
    self.conv_part = nn.Sequential(nn.Conv2d(1, 10, kernel_size=5),
                                   nn.MaxPool2d(2, 2),
                                   nn.ReLU(),
                                   nn.Conv2d(10, 20, kernel_size=5),
                                   nn.MaxPool2d(2, 2),
                                   nn.ReLU(),
                                   nn.Dropout2d())
    self.flatten = Flatten(50)
    self.fc2 = nn.Linear(50, 10)
  
  def forward(self, x):
      
    x = self.conv_part(x)
    x = self.flatten(x)
    x = F.relu(x)
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    result = F.log_softmax(x)
    
    return result
  
  
class LeNetSequentialDict(nn.Module):
  """Network model with flatten layer
   for character recognition"""
  
  def __init__(self):
    super(LeNetSequentialDict, self).__init__()
    self.conv_part = nn.Sequential(OrderedDict([
                                   ('conv1', nn.Conv2d(1, 10, kernel_size=5)),
                                   ('mxpl1', nn.MaxPool2d(2, 2)),
                                   ('relu1', nn.ReLU()),
                                   ('conv2', nn.Conv2d(10, 20, kernel_size=5)),
                                   ('mxol2', nn.MaxPool2d(2, 2)),
                                   ('relu2', nn.ReLU()),
                                   ('drop1', nn.Dropout2d())]))
    self.flatten = Flatten(50)
    self.fc2 = nn.Linear(50, 10)
  
  def forward(self, x):
      
    x = self.conv_part(x)
    x = self.flatten(x)
    x = F.relu(x)
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    result = F.log_softmax(x)
    
    return result



Traing the network

In [10]:
def train(epoch, training_config):
  """Train network model
    Args:
      epoch - current epoch
      training_config - training configuration tuple
  """
    
  (train_loader, model, optimizer, flags) = training_config
  model.train()
  for (batch_idx, (data, target)) in enumerate(train_loader):
    if flags.cuda:
        (data, target) = (data.cuda(), target.cusa())
    (data, target) = (Variable(data), Variable(target))
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()
    if batch_idx % flags.log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.data[0]))
  torch.save(model.state_dict(), flags.weights)
  

def test(test_loader, model, flags):
  """Test network
    test_loader - test data loader
    model - network model
    flags - configuration parameters
  """
    
  model.eval()
  test_loss = 0
  correct = 0
  for (data, target) in test_loader:
    if flags.cuda:
        (data, target) = (data.cuda(), target.cusa())
    (data, target) = (Variable(data, volatile=True), Variable(target))
    output = model(data)
    test_loss += F.nll_loss(output, target, size_average=False).data[0]  # sum up batch loss
    pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss /= len(test_loader.dataset)
  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
      test_loss, correct, len(test_loader.dataset),
      100. * correct / len(test_loader.dataset)))

model = LetterNet()

if flags.cuda:
    model.cuda()

print('End=attachment')
optimizer = optim.SGD(model.parameters(), lr=flags.lr, momentum=flags.momentum)
training_config = (train_loader, model, optimizer, flags)
for epoch in range(1, flags.epochs + 1):
    train(epoch, training_config)
    test(test_loader, model, flags)

End=attachment

Test set: Average loss: 0.7624, Accuracy: 8516/10000 (85%)




Test set: Average loss: 0.5352, Accuracy: 8975/10000 (90%)


Test set: Average loss: 0.4429, Accuracy: 9154/10000 (92%)




Test set: Average loss: 0.3700, Accuracy: 9261/10000 (93%)


Test set: Average loss: 0.3227, Accuracy: 9302/10000 (93%)




Test set: Average loss: 0.2875, Accuracy: 9383/10000 (94%)




Test set: Average loss: 0.2659, Accuracy: 9486/10000 (95%)


Test set: Average loss: 0.2344, Accuracy: 9529/10000 (95%)




Test set: Average loss: 0.2191, Accuracy: 9546/10000 (95%)


Test set: Average loss: 0.2152, Accuracy: 9553/10000 (96%)

