In [1]:
import torch
import torchvision
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import random
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from torch.autograd import Variable


In [2]:
print(torch.cuda.is_available())
torch.cuda.set_device(0)

True


In [3]:
# Define a convolutional block
# A single layer of convolution if shortcut == False, and...
# Two layers of convolution and a residual convolution otherwise.

class ConvBlock(nn.Module):
    
    # midChannels will be of no use if shortcut == False
    def __init__(self, inChannels, midChannels, outChannels, 
                 kernelSize, stride = 1, padding = 0, bias = True, shortcut = False):
        super(ConvBlock, self).__init__()
        if shortcut is False:
            self.left = nn.Sequential(
                nn.Conv2d(inChannels, outChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(outChannels)
            )
            self.right = None
        else:
            self.left = nn.Sequential(
                nn.Conv2d(inChannels, midChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(midChannels),
                nn.ReLU(inplace = True),
                nn.Conv2d(midChannels, outChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(outChannels)
            )
            self.right = nn.Sequential(
                nn.Conv2d(inChannels, outChannels, 
                          kernelSize, stride, padding, bias),
                nn.BatchNorm2d(outChannels)
            )
        
    def forward(self, input):
        out = self.left(input)
        if self.right is not None:
            out += self.right(input)
        return F.relu(out)
    

In [4]:
# Define a fully connected block
# A single layer if shortcut == False, and...
# Two layers and a residual layer otherwise.

class FCBlock(nn.Module):
    
    # midChannels will be of no use if shortcut == False
    def __init__(self, inNodes, midNodes, outNodes, shortcut = False):
        super(FCBlock, self).__init__()
        if shortcut is False:
            self.left = nn.Sequential(
                nn.Linear(inNodes, outNodes)
            )
            self.right = None
        else:
            self.left = nn.Sequential(
                nn.Linear(inNodes, midNodes),
                nn.ReLU(inplace = True),
                nn.Linear(midNodes, outNodes)
            )
            self.right = nn.Sequential(
                nn.Linear(inNodes, outNodes)
            )
            
    def forward(self, input):
        out = self.left(input)
        if self.right is not None:
            out += self.right(input)
        return F.relu(out)
        

In [5]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [6]:
'''
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """
    def __init__(self, num_samples, start = 0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples
'''

In [6]:
class ChunkSampler(sampler.Sampler):
    """Samples elements randomly from the given index list. 
    """
    def __init__(self, index_list):
        self.index = index_list
        self.length = len(index_list)

    def __iter__(self):
        return iter(index)

    def __len__(self):
        return self.length


In [7]:
gpuDtype = torch.cuda.FloatTensor

def checkAccuracy(model, trainLoader, valLoader):
    numCorrect = 0
    numSamples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for x, y in trainLoader:
        with torch.no_grad():
            xVar = Variable(x.type(gpuDtype))
            scores = model(xVar)
            _, preds = scores.data.cpu().max(1)
            numCorrect += (preds == y).sum()
            numSamples += preds.size(0)
    acc = float(numCorrect) / numSamples
    print('Train: %d / %d correct (%.2f%%)' % (numCorrect, numSamples, 100 * acc))
    
    numCorrect = 0
    numSamples = 0
    for x, y in valLoader:
        with torch.no_grad():
            xVar = Variable(x.type(gpuDtype))
            scores = model(xVar)
            _, preds = scores.data.cpu().max(1)
            numCorrect += (preds == y).sum()
            numSamples += preds.size(0)
    acc = float(numCorrect) / numSamples
    print('Val: %d / %d correct (%.2f%%)' % (numCorrect, numSamples, 100 * acc))
    
def train(model, lossFunc, optimizer, numEpochs = 1, printEvery = 100, checkEveryEpoch = True):
    for epoch in range(numEpochs):
        print('Starting epoch %d / %d' % (epoch + 1, numEpochs))
        model.train()
        for t, (x, y) in enumerate(trainLoader):
            xVar = Variable(x.type(gpuDtype))
            yVar = Variable(y.type(gpuDtype).long())
            scores = model(xVar)            
            loss = lossFunc(scores, yVar)
            
            if printEvery > 0 and (t + 1) % printEvery == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        if checkEveryEpoch:
            checkAccuracy(model, trainLoader, valLoader)

In [8]:
DATASET_SIZE = 50000

numTrain = 49000
numVal = 1000

index_all = random.sample(range(DATASET_SIZE), numTrain + numVal)
print(index_all[ : 200])
index_train = index_all[ : numTrain]
index_val = index_all[numTrain : ]

print(len(index_train), len(index_val))

trainData = datasets.CIFAR10('./data', train = True,
                           transform = transforms.ToTensor())
trainLoader = DataLoader(trainData, batch_size = 64, 
                              sampler = ChunkSampler(index_train)

valData = datasets.CIFAR10('./data', train = True,
                           transform = transforms.ToTensor())
valLoader = DataLoader(valData, batch_size = 64, 
                            sampler = ChunkSampler(index_val))

testData = datasets.CIFAR10('./data', train = False,
                          transform = transforms.ToTensor())
testLoader = DataLoader(testData, batch_size=64)

In [9]:
class ResNet(nn.Module):
    
    def __init__(self):
        super(ResNet, self).__init__()
        
        self.blocks = nn.Sequential(
            ConvBlock(inChannels = 3, midChannels = 8, outChannels = 32, 
                      kernelSize = 3, padding = 1, shortcut = True),
            nn.MaxPool2d(kernel_size = 2),
            ConvBlock(inChannels = 32, midChannels = 128, outChannels = 512, 
                      kernelSize = 3, padding = 1, shortcut = True),
            nn.MaxPool2d(kernel_size = 2),
            
            Flatten(),
            
            FCBlock(inNodes = 32768, midNodes = 4096, outNodes = 512, 
                    shortcut = True),
            FCBlock(inNodes = 512, midNodes = 128, outNodes = 32, 
                    shortcut = True),
            FCBlock(inNodes = 32, midNodes = 0, outNodes = 10, 
                    shortcut = False)
            
        )
    
    def forward(self, input):
        out = self.blocks(input)
        return out

In [10]:
model = ResNet().type(gpuDtype)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0005, weight_decay = 0.001)

train(model, criterion, optimizer, numEpochs = 10, printEvery = 0, checkEveryEpoch = True)

Starting epoch 1 / 10
Train: 92 / 1000 correct (9.20%)
Val: 103 / 1000 correct (10.30%)
Starting epoch 2 / 10
Train: 99 / 1000 correct (9.90%)
Val: 113 / 1000 correct (11.30%)
Starting epoch 3 / 10
Train: 134 / 1000 correct (13.40%)
Val: 146 / 1000 correct (14.60%)
Starting epoch 4 / 10
Train: 142 / 1000 correct (14.20%)
Val: 155 / 1000 correct (15.50%)
Starting epoch 5 / 10
Train: 144 / 1000 correct (14.40%)
Val: 158 / 1000 correct (15.80%)
Starting epoch 6 / 10
Train: 173 / 1000 correct (17.30%)
Val: 163 / 1000 correct (16.30%)
Starting epoch 7 / 10
Train: 174 / 1000 correct (17.40%)
Val: 173 / 1000 correct (17.30%)
Starting epoch 8 / 10
Train: 172 / 1000 correct (17.20%)
Val: 163 / 1000 correct (16.30%)
Starting epoch 9 / 10
Train: 173 / 1000 correct (17.30%)
Val: 166 / 1000 correct (16.60%)
Starting epoch 10 / 10
Train: 174 / 1000 correct (17.40%)
Val: 174 / 1000 correct (17.40%)
