In [1]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

import time

import pandas as pd
import seaborn as sns
import numpy as np
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", reshape=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
img_size = (28, 28)
n_classes = 10

imgs_train = mnist.train.images.reshape((-1, 1, *img_size)) / 255
imgs_val = mnist.validation.images.reshape((-1, 1, *img_size)) / 255
imgs_test = mnist.test.images.reshape((-1, 1, *img_size)) / 255

y_train = mnist.train.labels.astype(np.int32)
y_val = mnist.validation.labels.astype(np.int32)
y_test = mnist.test.labels.astype(np.int32)

def make_batch_iter(x, y, batch_size, shuffle=False):
    n = len(x)
    idx = np.arange(n)
    if shuffle:
        np.random.shuffle(idx)
    
    for i in range(0, n, batch_size):
        x_batch = x[i:i+batch_size]
        y_batch = y[i:i+batch_size]
        yield np.array(x_batch, np.float32), np.array(y_batch)

In [3]:
def build_fe(n_layers, n_chans_base, n_chans_mul, kernel_size):
    fe = []
    n_chans_prev = 1
    for i in range(n_layers):
        n_chans = n_chans_base * n_chans_mul ** i
        c = nn.Conv2d(n_chans_prev, n_chans, kernel_size, bias=False)
        bn = nn.BatchNorm2d(n_chans)
        a = nn.ReLU()
        m = nn.MaxPool2d(2)
        fe.extend([c, bn, a, m])
        n_chans_prev = n_chans
    return nn.Sequential(*fe)

def build_clf(in_features):
    n_layers = 2
    n_feat = 1024
    n_feat_prev = in_features
    
    clf = []
    for i in range(n_layers):
        d = nn.Linear(n_feat_prev, n_feat, bias=False)
        bn = nn.BatchNorm1d(n_feat)
        a = nn.ReLU()
        clf.extend([d, bn, a])
        n_feat_prev = n_feat
        
    d = nn.Linear(n_feat_prev, n_classes, bias=False)
    bn = nn.BatchNorm1d(n_classes)
    clf.extend([d, bn, a])
        
    return nn.Sequential(*clf)

class Model(torch.nn.Module):
    def __init__(self, n_layers, n_chans_base, n_chans_mul, kernel_size):
        super().__init__()
        self.fe = build_fe(n_layers, n_chans_base, n_chans_mul, kernel_size)
        self.clf = build_clf(n_chans_base*n_chans_mul**(n_layers-1))
    
    def forward(self, input):
        fm = self.fe(input)
        features = F.avg_pool2d(fm, fm.size()[-2:]).view(fm.size()[0], -1)
        return self.clf(features)

n_layers = 3
kernel_size = 3
n_chans_base = 16
n_chans_mul = 2

model = Model(n_layers, n_chans_base, n_chans_mul, kernel_size).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model

Model (
  (fe): Sequential (
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU ()
    (3): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (6): ReLU ()
    (7): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (10): ReLU ()
    (11): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (clf): Sequential (
    (0): Linear (64 -> 1024)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True)
    (2): ReLU ()
    (3): Linear (1024 -> 1024)
    (4): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True)
    (5): ReLU ()
    (6): Linear (1024 -> 10)
    (7): BatchNorm1d(10, eps=1e-05, momentu

In [4]:
n_epoch = 100
batch_size = 1024

def to_var(x):
    return Variable(torch.from_numpy(x)).cuda()

def to_numpy(x):
    return x.cpu().data.numpy()

for epoch in range(n_epoch):
    train_iter = make_batch_iter(imgs_train, y_train, batch_size=batch_size)
    val_iter = make_batch_iter(imgs_val, y_val, batch_size=batch_size)
    
    start = time.time()
    
    model.train()
    losses = []
    weights = []
    accs = []
    for x_batch, y_batch in train_iter:
        y_pred = model(to_var(x_batch))
        
        optimizer.zero_grad()
        loss = F.cross_entropy(y_pred, to_var(y_batch).long())
        loss.backward()
        optimizer.step()
                    
        acc = np.mean(y_batch == np.argmax(to_numpy(y_pred), axis=1))
        accs.append(acc)
        losses.append(loss.cpu().data.numpy())
        weights.append(len(x_batch))
            
    train_loss = np.average(np.array(losses).flatten(), weights=weights)
    train_acc = np.average(np.array(accs).flatten(), weights=weights)
    
    model.eval()
    losses = []
    weights = []
    accs = []
    for x_batch, y_batch in val_iter:
        y_pred = model(to_var(x_batch))
        
        optimizer.zero_grad()
        loss = F.cross_entropy(y_pred, to_var(y_batch).long())
                
        accs.append(np.mean(y_batch == np.argmax(y_pred.cpu().data.numpy(), axis=1)))
        losses.append(loss.cpu().data.numpy())
        weights.append(len(x_batch))
    
    end = time.time()
    
    val_loss = np.average(np.array(losses).flatten(), weights=weights)
    val_acc = np.average(np.array(accs).flatten(), weights=weights)
    
    print('Epoch {}'.format(epoch))
    print('Train:', train_loss, train_acc)
    print('Val  :', val_loss, val_acc)
    print('Time :', end - start)
    print('\n')

Epoch 0
Train: 1.28114379954 0.8406
Val  : 2.30022509766 0.1126
Time : 4.434333801269531


Epoch 1
Train: 1.02046114307 0.954690909091
Val  : 1.3398828886 0.8804
Time : 1.2668063640594482


Epoch 2
Train: 0.882066148628 0.971927272727
Val  : 0.844975233459 0.9772
Time : 1.273801565170288


Epoch 3
Train: 0.770833467544 0.980618181818
Val  : 0.73166961689 0.9816
Time : 1.2730700969696045


Epoch 4
Train: 0.6775517648 0.985472727273
Val  : 0.636654625416 0.9832
Time : 1.273677110671997


Epoch 5
Train: 0.598426613877 0.988854545455
Val  : 0.56145029726 0.9842
Time : 1.271754503250122


Epoch 6
Train: 0.530443669692 0.991236363636
Val  : 0.49567724824 0.986
Time : 1.2858302593231201


Epoch 7
Train: 0.471396162523 0.993236363636
Val  : 0.434874973869 0.987
Time : 1.2830417156219482


Epoch 8
Train: 0.420592580396 0.9948
Val  : 0.390161210251 0.9872
Time : 1.274703025817871


Epoch 9
Train: 0.376238509703 0.996054545455
Val  : 0.350940081978 0.986
Time : 1.274308204650879


Epoch 10
Train:

KeyboardInterrupt: 