In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

In [2]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
    
    def forward(self, X):
        X = X.view(X.size(0), -1)
        return X

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.model = nn.Sequential(nn.Conv2d(1, 32, kernel_size=3),
                                   nn.ReLU(),
                                   nn.Conv2d(32, 64, kernel_size=3),
                                   nn.MaxPool2d(2),
                                   nn.Dropout(0.25),
                                   Flatten(),
                                   nn.Linear(9216, 128),
                                   nn.Dropout(0.5),
                                   nn.Linear(128, 10)
                                  )
    def forward(self, X):
        X = self.model(X)
        return X
    


In [3]:
transform = transforms.Compose([
                                transforms.ToTensor(), 
                                transforms.Normalize((0.5,), (1.0,))
                               ])
train_set = dset.MNIST(root='./data', train=True, 
                       transform=transform, download=True)
test_set = dset.MNIST(root='./data', train=False, transform=transform)

batch_size = 100
train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)

def run_model(loader, train=False, optimizer=None):
    avg_loss, avg_acc = 0, 0
    for i, (X, y) in enumerate(loader):
        if train:
            optimizer.zero_grad()
        X, y = Variable(X), Variable(y)

        y_hat = model(X)
        loss = criterion(y_hat, y)
        
        if train:
            loss.backward()
            optimizer.step()

        pred = np.argmax(y_hat.data.numpy(), axis=1)
        acc = float(len(np.where(pred == y.data.numpy())[0]))
        acc /= batch_size

        avg_loss += loss.data[0]
        avg_acc += acc
    
    avg_loss /= (i+1)
    avg_acc /= (i+1)
    return avg_loss, avg_acc
        
train_loss, train_acc = run_model(train_loader, 
                                  train=True, 
                                  optimizer=optimizer)
print("Average train loss and accuracy: %f, %f" 
                      % (train_loss, train_acc))

test_loss, test_acc = run_model(test_loader)
print("Average test loss and accuracy: %f, %f" 
                      % (test_loss, test_acc))

Average train loss and accuracy: 0.453809, 0.915717
Average test loss and accuracy: 0.132329, 0.959500
