In [2]:
import numpy as np

import torch
from torch.autograd import Variable
from torch import optim

from data_utils import load_mnist

In [8]:
class ConvNet(torch.nn.Module):
    def __init__(self, output_dim):
        super(ConvNet, self).__init__()

        self.conv = torch.nn.Sequential(
                torch.nn.Conv2d(1, 10, kernel_size=5),
                torch.nn.MaxPool2d(kernel_size=2),
                torch.nn.ReLU(),
                torch.nn.Conv2d(10, 20, kernel_size=5),
                torch.nn.Dropout(),
                torch.nn.MaxPool2d(kernel_size=2),
                torch.nn.ReLU()
        )
        
        self.fc = torch.nn.Sequential(
                torch.nn.Linear(320, 50),
                torch.nn.ReLU(),
                torch.nn.Dropout(),
                torch.nn.Linear(50, output_dim)
        )
        
    def forward(self, x):
        x = self.conv.forward(x)
        x = x.view(-1, 320)
        return self.fc.forward(x)

### Training Function

In [4]:
def train(model, loss, optimizer, x_val, y_val):
    x = Variable(x_val, requires_grad=False)
    y = Variable(y_val, requires_grad=False)

    # Reset gradient
    optimizer.zero_grad()

    # Forward
    fx = model.forward(x)
    output = loss.forward(fx, y)

    # Backward
    output.backward()

    # Update parameters
    optimizer.step()

    return output.item()


### Prediction Function

In [5]:
def predict(model, x_val):
    x = Variable(x_val, requires_grad=False)
    output = model.forward(x)
    return output.data.numpy().argmax(axis=1)

### Parameters

In [6]:
torch.manual_seed(42)
trX, teX, trY, teY = load_mnist(onehot=False)
trX = trX.reshape(-1, 1, 28, 28)
teX = teX.reshape(-1, 1, 28, 28)

trX = torch.from_numpy(trX).float()
teX = torch.from_numpy(teX).float()
trY = torch.from_numpy(trY).long()

n_examples = len(trX)
n_classes = 10
model = ConvNet(output_dim=n_classes)
loss = torch.nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
batch_size = 100



In [10]:
model

ConvNet(
  (conv): Sequential(
    (conv_1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
    (maxpool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_1): ReLU()
    (conv_2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
    (dropout_2): Dropout(p=0.5, inplace=False)
    (maxpool_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_2): ReLU()
  )
  (fc): Sequential(
    (fc1): Linear(in_features=320, out_features=50, bias=True)
    (relu_3): ReLU()
    (dropout_3): Dropout(p=0.5, inplace=False)
    (fc2): Linear(in_features=50, out_features=10, bias=True)
  )
)

# Training

In [7]:

for i in range(20):
    cost = 0.
    num_batches = n_examples // batch_size
    for k in range(num_batches):
        start, end = k * batch_size, (k + 1) * batch_size
        cost += train(model, loss, optimizer, trX[start:end], trY[start:end])
    predY = predict(model, teX)
    print("Epoch %d, cost = %f, acc = %.2f%%"
          % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))

Epoch 1, cost = 0.730928, acc = 90.86%
Epoch 2, cost = 0.270943, acc = 93.40%
Epoch 3, cost = 0.210942, acc = 94.62%
Epoch 4, cost = 0.177429, acc = 95.24%
Epoch 5, cost = 0.157559, acc = 96.19%
Epoch 6, cost = 0.141248, acc = 96.19%
Epoch 7, cost = 0.132196, acc = 96.64%
Epoch 8, cost = 0.123311, acc = 96.54%
Epoch 9, cost = 0.111277, acc = 97.19%
Epoch 10, cost = 0.109723, acc = 96.95%
Epoch 11, cost = 0.102575, acc = 97.41%
Epoch 12, cost = 0.095494, acc = 97.29%
Epoch 13, cost = 0.094187, acc = 97.16%
Epoch 14, cost = 0.090923, acc = 97.13%
Epoch 15, cost = 0.086524, acc = 97.39%
Epoch 16, cost = 0.085599, acc = 97.27%
Epoch 17, cost = 0.081234, acc = 97.45%
Epoch 18, cost = 0.081445, acc = 97.56%
Epoch 19, cost = 0.082314, acc = 97.72%
Epoch 20, cost = 0.074394, acc = 97.57%
