In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision
import torchvision.transforms

In [2]:
from_numpy = torch.from_numpy


batch_size = 64
num_epochs = 10
cuda = torch.cuda.is_available()
if cuda:
    print('cuda is available')
else:
    print('cuda is not available')
store_every = 1000
lr0 = 0.02
#model_type = 'MLP'
model_type = 'CNN'

cuda is available


In [3]:
mnist_transforms = torchvision.transforms.Compose(
        [torchvision.transforms.ToTensor()])
mnist_train = torchvision.datasets.MNIST(
        root='./data', train=True, 
        transform=mnist_transforms, download=True)
mnist_test = torchvision.datasets.MNIST(
        root='./data', train=False, 
        transform=mnist_transforms, download=True)

train_loader = torch.utils.data.DataLoader(
        mnist_train, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(
        mnist_test, batch_size=batch_size, shuffle=True, num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [4]:
# building model
class ResLinear(nn.Module):

    def __init__(self, in_features, out_features, activation=nn.ReLU()):
        super(ResLinear, self).__init__()
        
        self.in_features = in_features
        self.out_features = out_features
        self.activation = activation
        
        self.linear = nn.Linear(in_features, out_features)
        if in_features != out_features:
            self.project_linear = nn.Linear(in_features, out_features)
        
    def forward(self, x):
        inner = self.activation(self.linear(x))
        if self.in_features != self.out_features:
            skip = self.project_linear(x)
        else:
            skip = x
        return inner + skip

In [5]:
class Flatten(nn.Module):
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return x

In [7]:
model = nn.Sequential(
        nn.Conv2d(1, 16, 5),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(16, 16, 5),
        nn.ReLU(),
        nn.MaxPool2d(2),
        Flatten(),
        ResLinear(256, 100),
        nn.ReLU(),
        ResLinear(100, 10)
    )

In [8]:
if cuda:
    model = model.cuda()

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr0)

In [10]:
def adjust_lr(optimizer, epoch, total_epochs):
    lr = lr0 * (0.1 ** (epoch / float(total_epochs)))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [11]:
def accuracy(proba, y):
    correct = torch.eq(proba.max(1)[1], y).sum().type(torch.FloatTensor)
    return correct / y.size(0)

In [12]:
def evaluate(dataset_loader, criterion):
    LOSSES = 0
    COUNTER = 0
    for batch in dataset_loader:
        optimizer.zero_grad()

        x, y = batch
        if model_type == 'MLP':
            x = x.view(-1,784)
            y = y.view(-1)
        elif model_type == 'CNN':
            x = x.view(-1,1,28,28)
            y = y.view(-1)
        if cuda:
            x = x.cuda()
            y = y.cuda()
            
        loss = criterion(model(x), y)
        n = y.size(0)
        LOSSES += loss.sum().data.cpu().numpy() * n
        COUNTER += n
    
    return LOSSES / float(COUNTER)

In [13]:
def train_model():
    
    LOSSES = 0
    COUNTER = 0
    ITERATIONS = 0
    learning_curve_nll_train = list()
    learning_curve_nll_test = list()
    learning_curve_acc_train = list()
    learning_curve_acc_test = list()
    for e in range(num_epochs):
        for batch in train_loader:
            optimizer.zero_grad()

            x, y = batch
            if model_type == 'MLP':
                x = x.view(-1,784)
                y = y.view(-1)
            elif model_type == 'CNN':
                x = x.view(-1,1,28,28)
                y = y.view(-1)
            if cuda:
                x = x.cuda()
                y = y.cuda()
                
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()
            
            n = y.size(0)
            LOSSES += loss.sum().data.cpu().numpy() * n
            COUNTER += n
            ITERATIONS += 1
            if ITERATIONS%(store_every/5) == 0:
                avg_loss = LOSSES / float(COUNTER)
                LOSSES = 0
                COUNTER = 0
                print(" Iteration {}: TRAIN {}".format(
                    ITERATIONS, avg_loss))
        
            if ITERATIONS%(store_every) == 0:     
                
                train_loss = evaluate(train_loader, criterion)
                learning_curve_nll_train.append(train_loss)
                test_loss = evaluate(test_loader, criterion)
                learning_curve_nll_test.append(test_loss)
                
                train_acc = evaluate(train_loader, accuracy)
                learning_curve_acc_train.append(train_acc)
                test_acc = evaluate(test_loader, accuracy)
                learning_curve_acc_test.append(test_acc)
                        
                print(" [NLL] TRAIN {} / TEST {}".format(
                    train_loss, test_loss))
                print(" [ACC] TRAIN {} / TEST {}".format(
                    train_acc, test_acc))
        
        adjust_lr(optimizer, e+1, num_epochs)
        
    return learning_curve_nll_train, \
           learning_curve_nll_test, \
           learning_curve_acc_train, \
           learning_curve_acc_test, 
           

if __name__ == '__main__':
    
    _ = train_model()

 Iteration 200: TRAIN 1.8038890406489372
 Iteration 400: TRAIN 0.45653281316161154
 Iteration 600: TRAIN 0.2718612297810614
 Iteration 800: TRAIN 0.21194624420255423
 Iteration 1000: TRAIN 0.1815738298540426
 [NLL] TRAIN 0.15185546285708745 / TEST 0.14618860087394714
 [ACC] TRAIN 0.95455 / TEST 0.9544
 Iteration 1200: TRAIN 0.14706243999302387
 Iteration 1400: TRAIN 0.13602764839306475
 Iteration 1600: TRAIN 0.12858284506946802
 Iteration 1800: TRAIN 0.11974911754950882
 Iteration 2000: TRAIN 0.10179303056165986
 [NLL] TRAIN 0.0982314137260119 / TEST 0.08828171434402465
 [ACC] TRAIN 0.9697666666666667 / TEST 0.9726
 Iteration 2200: TRAIN 0.09973444210365415
 Iteration 2400: TRAIN 0.09267908608540892
 Iteration 2600: TRAIN 0.09187613438814879
 Iteration 2800: TRAIN 0.09205739006400109
 Iteration 3000: TRAIN 0.07789612462496698
 [NLL] TRAIN 0.07731529317299525 / TEST 0.07236806674003601
 [ACC] TRAIN 0.9763666666666667 / TEST 0.978
 Iteration 3200: TRAIN 0.0873312658071518
 Iteration 3400