In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable 

In [2]:
batch_size = 64
train_dataset = datasets.MNIST(root = './data/',
                               train = True,
                               transform = transforms.ToTensor(),
                               download = True)

test_dataset = datasets.MNIST(root = './data/',
                              train = False,
                              transform = transforms.ToTensor(),
                              download = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!




In [3]:
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = batch_size,
                                          shuffle = False)

In [14]:
class InceptionA(nn.Module):

  def __init__(self, channels_in):
    super(InceptionA, self).__init__()
    self.branch1x1 = nn.Conv2d(channels_in, 16, kernel_size = 1)

    self.branch5x5_1 = nn.Conv2d(channels_in, 16, kernel_size = 1)
    self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size = 5, padding = 2)

    self.branch3x3_1 = nn.Conv2d(channels_in, 16, kernel_size = 1)
    self.branch3x3_2 = nn.Conv2d(16, 24, kernel_size = 3, padding = 1)
    self.branch3x3_3 = nn.Conv2d(24, 24, kernel_size = 3, padding = 1)

    self.branch_pool = nn.Conv2d(channels_in, 24, kernel_size = 1)

  def forward(self, x):
    branch1 = self.branch1x1(x)

    branch5 = self.branch5x5_1(x)
    branch5 = self.branch5x5_2(branch5)

    branch3 = self.branch3x3_1(x)
    branch3 = self.branch3x3_2(branch3)
    branch3 = self.branch3x3_3(branch3)

    branch_pool = F.avg_pool2d(x, kernel_size = 3, padding = 1, stride = 1)
    branch_pool = self.branch_pool(branch_pool)

    output = [branch1, branch5, branch3, branch_pool]
    return torch.cat(output, 1)




In [15]:
class FinalInception(nn.Module):

  def __init__(self):
    super(FinalInception, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)
    self.conv2 = nn.Conv2d(88, 20, kernel_size = 5)

    self.incept1 = InceptionA(channels_in = 10)
    self.incept2 = InceptionA(channels_in = 20)

    self.maxpool = nn.MaxPool2d(2)
    self.fc = nn.Linear(1408, 10)

  def forward(self, x):
    in_size = x.size(0)  #batch_dims
    x = F.relu(self.maxpool(self.conv1(x)))
    x = self.incept1(x)
    x = F.relu(self.maxpool(self.conv2(x)))
    x = self.incept2(x)

    x = x.view(in_size, -1)
    x = self.fc(x)
    return F.log_softmax(x)  

In [16]:
model = FinalInception()

optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum = 0.5)

In [19]:
def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data = Variable(data)
    target = Variable(target)
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, target)
    loss.backward()
    optimizer.step()

    if batch_idx % 10 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))
    


In [21]:
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        # sum up batch loss
        test_loss += F.nll_loss(output, target, size_average=False).item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


for epoch in range(1, 10):
    train(epoch)
    test()






  



Test set: Average loss: 0.0831, Accuracy: 9747/10000 (97%)


Test set: Average loss: 0.0702, Accuracy: 9786/10000 (98%)


Test set: Average loss: 0.0607, Accuracy: 9812/10000 (98%)


Test set: Average loss: 0.0765, Accuracy: 9752/10000 (98%)


Test set: Average loss: 0.0567, Accuracy: 9822/10000 (98%)


Test set: Average loss: 0.0511, Accuracy: 9833/10000 (98%)


Test set: Average loss: 0.0542, Accuracy: 9813/10000 (98%)


Test set: Average loss: 0.0507, Accuracy: 9839/10000 (98%)


Test set: Average loss: 0.0449, Accuracy: 9851/10000 (99%)

