In [1]:
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import random
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.autograd import Variable


In [2]:
print(torch.cuda.is_available())
torch.cuda.set_device(0)

True


In [3]:
# Define a convolutional block
# A single layer of convolution if shortcut == False, and...
# Two layers of convolution and a residual convolution otherwise.

class ConvBlock(nn.Module):
    
    # midChannels will be of no use if shortcut == False
    def __init__(self, inChannels, midChannels, outChannels, 
                 kernelSize, stride = 1, padding = 0, bias = True, shortcut = False):
        super(ConvBlock, self).__init__()
        if shortcut is False:
            self.left = nn.Sequential(
                nn.Conv2d(inChannels, outChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(outChannels)
            )
            self.right = None
        else:
            self.left = nn.Sequential(
                nn.Conv2d(inChannels, midChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(midChannels),
                nn.ReLU(inplace = True),
                nn.Conv2d(midChannels, outChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(outChannels)
            )
            self.right = nn.Sequential(
                nn.Conv2d(inChannels, outChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(outChannels)
            )
        
    def forward(self, input):
        out = self.left(input)
        if self.right is not None:
            out += self.right(input)
        return F.relu(out)
    

In [4]:
# Define a fully connected block
# A single layer if shortcut == False, and...
# Two layers and a residual layer otherwise.

class FCBlock(nn.Module):
    
    # midChannels will be of no use if shortcut == False
    def __init__(self, inNodes, midNodes, outNodes, shortcut = False):
        super(FCBlock, self).__init__()
        if shortcut is False:
            self.left = nn.Sequential(
                nn.Linear(inNodes, outNodes)
            )
            self.right = None
        else:
            self.left = nn.Sequential(
                nn.Linear(inNodes, midNodes),
                nn.ReLU(inplace = True),
                nn.Linear(midNodes, outNodes)
            )
            self.right = nn.Sequential(
                nn.Linear(inNodes, outNodes)
            )
            
    def forward(self, input):
        out = self.left(input)
        if self.right is not None:
            out += self.right(input)
        return F.relu(out)
        

In [5]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [6]:
'''
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples
'''

'\nclass ChunkSampler(sampler.Sampler):\n    """Samples elements sequentially from some offset. \n    Arguments:\n        num_samples: # of desired datapoints\n        start: offset where we should start selecting from\n    """\n    def __init__(self, num_samples, start = 0):\n        self.num_samples = num_samples\n        self.start = start\n\n    def __iter__(self):\n        return iter(range(self.start, self.start + self.num_samples))\n\n    def __len__(self):\n        return self.num_samples\n'

In [7]:
class ChunkSampler(sampler.Sampler):
    """Samples elements randomly from the given index list. 
    """
    def __init__(self, index_list):
        self.index = index_list
        self.length = len(index_list)

    def __iter__(self):
        return iter(self.index)

    def __len__(self):
        return self.length


In [8]:
gpuDtype = torch.cuda.FloatTensor

def checkAccuracy(model, trainLoader, valLoader):
    numCorrect = 0
    numSamples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for x, y in trainLoader:
        with torch.no_grad():
            xVar = Variable(x.type(gpuDtype))
            scores = model(xVar)
            _, preds = scores.data.cpu().max(1)
            numCorrect += (preds == y).sum()
            numSamples += preds.size(0)
    acc = float(numCorrect) / numSamples
    print('Train: %d / %d correct (%.2f%%)' % (numCorrect, numSamples, 100 * acc))
    
    numCorrect = 0
    numSamples = 0
    for x, y in valLoader:
        with torch.no_grad():
            xVar = Variable(x.type(gpuDtype))
            scores = model(xVar)
            _, preds = scores.data.cpu().max(1)
            numCorrect += (preds == y).sum()
            numSamples += preds.size(0)
    acc = float(numCorrect) / numSamples
    print('Val: %d / %d correct (%.2f%%)' % (numCorrect, numSamples, 100 * acc))
    
def train(model, lossFunc, optimizer, numEpochs = 1, 
          l1Lambda = 0.0, l2Lambda = 0.0, printEvery = 100, checkEveryEpoch = True):
    for epoch in range(numEpochs):
        print('Starting epoch %d / %d' % (epoch + 1, numEpochs))
        model.train()
        for t, (x, y) in enumerate(trainLoader):
            xVar = Variable(x.type(gpuDtype))
            yVar = Variable(y.type(gpuDtype).long())
            scores = model(xVar)            
            loss = lossFunc(scores, yVar)
            
            for name, param in model.named_parameters():
                loss += l1Lambda * torch.norm(param, 1) + l2Lambda * torch.norm(param, 2)
            
            if printEvery > 0 and (t + 1) % printEvery == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        if checkEveryEpoch:
            checkAccuracy(model, trainLoader, valLoader)

In [9]:
class ResNet(nn.Module):
    
    def __init__(self):
        super(ResNet, self).__init__()
        
        self.blocks = nn.Sequential(
            ConvBlock(inChannels = 3, midChannels = 8, outChannels = 32, 
                      kernelSize = 3, padding = 1, shortcut = True),
            nn.MaxPool2d(kernel_size = 2),
            ConvBlock(inChannels = 32, midChannels = 128, outChannels = 512, 
                      kernelSize = 3, padding = 1, shortcut = True),
            nn.MaxPool2d(kernel_size = 2),
            
            Flatten(),
            
            FCBlock(inNodes = 32768, midNodes = 4096, outNodes = 512, 
                    shortcut = True),
            FCBlock(inNodes = 512, midNodes = 128, outNodes = 32, 
                    shortcut = True),
            FCBlock(inNodes = 32, midNodes = 0, outNodes = 10, 
                    shortcut = False)
            
        )
    
    def forward(self, input):
        out = self.blocks(input)
        return out

In [11]:
DATASET_SIZE = 50000

numTrain = 20000
numVal = 1000

random.seed(666)
index_all = random.sample(range(DATASET_SIZE), numTrain + numVal)
index_train = index_all[ : numTrain]
index_val = index_all[numTrain : ]

trainData = datasets.CIFAR10('./data', train = True,
                           transform = transforms.ToTensor())
trainLoader = DataLoader(trainData, batch_size = 64, 
                              sampler = ChunkSampler(index_train))

valData = datasets.CIFAR10('./data', train = True,
                           transform = transforms.ToTensor())
valLoader = DataLoader(valData, batch_size = 64, 
                            sampler = ChunkSampler(index_val))

testData = datasets.CIFAR10('./data', train = False,
                          transform = transforms.ToTensor())
testLoader = DataLoader(testData, batch_size = 64)

In [12]:
TORCH_SEED = 668
torch.manual_seed(TORCH_SEED)
torch.cuda.manual_seed(TORCH_SEED)

model = ResNet().type(gpuDtype)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.003)

train(model, criterion, optimizer, numEpochs = 20, 
      l1Lambda = 0.0, l2Lambda = 0.001, 
      printEvery = 0, checkEveryEpoch = True)

Starting epoch 1 / 20
Train: 5936 / 20000 correct (29.68%)
Val: 276 / 1000 correct (27.60%)
Starting epoch 2 / 20
Train: 7704 / 20000 correct (38.52%)
Val: 366 / 1000 correct (36.60%)
Starting epoch 3 / 20
Train: 10252 / 20000 correct (51.26%)
Val: 492 / 1000 correct (49.20%)
Starting epoch 4 / 20
Train: 12114 / 20000 correct (60.57%)
Val: 569 / 1000 correct (56.90%)
Starting epoch 5 / 20
Train: 12947 / 20000 correct (64.73%)
Val: 610 / 1000 correct (61.00%)
Starting epoch 6 / 20
Train: 14063 / 20000 correct (70.32%)
Val: 629 / 1000 correct (62.90%)
Starting epoch 7 / 20
Train: 14601 / 20000 correct (73.00%)
Val: 632 / 1000 correct (63.20%)
Starting epoch 8 / 20
Train: 15909 / 20000 correct (79.55%)
Val: 652 / 1000 correct (65.20%)
Starting epoch 9 / 20
Train: 16542 / 20000 correct (82.71%)
Val: 671 / 1000 correct (67.10%)
Starting epoch 10 / 20
Train: 16761 / 20000 correct (83.80%)
Val: 668 / 1000 correct (66.80%)
Starting epoch 11 / 20
Train: 17618 / 20000 correct (88.09%)
Val: 691 /