In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset

In [2]:
import torchvision.models as models
from PIL import Image

In [3]:
import numpy as np
#from skimage import io, transform
import random
import os
import scipy.misc as misc

In [4]:
# define some constants to use
BATCH_SIZE = 4
TEST_BATCH_SIZE = 256
LOG_INTERVAL = 1
LEARNING_RATE = 0.0001
DROPOUT = 0.2
EPOCHS = 1
DATASET = "../sampleData/"

In [5]:
# arguments for training the model
#kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
kwargs = {}

## Model layer sizes

In [6]:
# convolution blocks
INPUT_SIZE = [224, 224]
INPUT_DEPTH = 3
BLOCK1_SIZE = (np.array(INPUT_SIZE) / 2).astype(int).tolist()
BLOCK1_DEPTH = 64
BLOCK2_SIZE = (np.array(BLOCK1_SIZE) / 2).astype(int).tolist()
BLOCK2_DEPTH = BLOCK1_DEPTH
BLOCK3_SIZE = (np.array(BLOCK2_SIZE) / 2).astype(int).tolist()
BLOCK3_DEPTH = BLOCK2_DEPTH * 2
BLOCK4_SIZE = (np.array(BLOCK3_SIZE) / 2).astype(int).tolist()
BLOCK4_DEPTH = BLOCK3_DEPTH * 2
BLOCK5_SIZE = (np.array(BLOCK4_SIZE) / 2).astype(int).tolist()
BLOCK5_DEPTH = BLOCK4_DEPTH * 2
FC_POOL_SIZE = [4, 4]
FC_POOL_DEPTH = BLOCK5_DEPTH

# fully connected sizes
FC1_SIZE = FC_POOL_SIZE[0]*FC_POOL_SIZE[1]*FC_POOL_DEPTH
OUTPUT_SIZE = 3

# check the sizes
print("convolutional layers")
print("input: ({0}, {1}, {2})".format(INPUT_DEPTH, INPUT_SIZE[0], INPUT_SIZE[1]))
print("block1: ({0}, {1}, {2})".format(BLOCK1_DEPTH, BLOCK1_SIZE[0], BLOCK1_SIZE[1]))
print("block2: ({0}, {1}, {2})".format(BLOCK2_DEPTH, BLOCK2_SIZE[0], BLOCK2_SIZE[1]))
print("block3: ({0}, {1}, {2})".format(BLOCK3_DEPTH, BLOCK3_SIZE[0], BLOCK3_SIZE[1]))
print("block4: ({0}, {1}, {2})".format(BLOCK4_DEPTH, BLOCK4_SIZE[0], BLOCK4_SIZE[1]))
print("block5: ({0}, {1}, {2})".format(BLOCK5_DEPTH, BLOCK5_SIZE[0], BLOCK5_SIZE[1]))
print("fcblock: ({0}, {1}, {2})".format(FC_POOL_DEPTH, FC_POOL_SIZE[0], FC_POOL_SIZE[1]))
print("fully connected layers")
print("fc1: ({0}, {1})".format(FC1_SIZE, OUTPUT_SIZE))
print("output: {0}".format(OUTPUT_SIZE))

convolutional layers
input: (3, 224, 224)
block1: (64, 112, 112)
block2: (64, 56, 56)
block3: (128, 28, 28)
block4: (256, 14, 14)
block5: (512, 7, 7)
fcblock: (512, 4, 4)
fully connected layers
fc1: (8192, 3)
output: 3


## Define the Model
18 Layer residual net model inspired by resnet-18

TODO:
* Make sure the reshapes (.view()) are correctly applied
    * Correct dimensions as each argument (depth, width, height) right now

In [7]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # define a dropout layer
        self.drop = nn.Dropout(p=DROPOUT)
        
        # block1
        self.bn0 = nn.BatchNorm2d(INPUT_DEPTH)
        self.layer1 = nn.Conv2d(INPUT_DEPTH, BLOCK1_DEPTH, kernel_size=7, stride=2, padding=3)
        
        # pooling layer
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # block2
        self.bn1 = nn.BatchNorm2d(BLOCK1_DEPTH)
        self.layer3 = nn.Conv2d(BLOCK1_DEPTH, BLOCK2_DEPTH, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(BLOCK2_DEPTH) 
        self.block2 = nn.Conv2d(BLOCK2_DEPTH, BLOCK2_DEPTH, kernel_size=3, padding=1) 
    
        # block3
        self.layer7 = nn.Conv2d(BLOCK2_DEPTH, BLOCK3_DEPTH, kernel_size=3, padding=1, stride=2)
        self.bn3 = nn.BatchNorm2d(BLOCK3_DEPTH)
        self.block3 = nn.Conv2d(BLOCK3_DEPTH, BLOCK3_DEPTH, kernel_size=3, padding=1)
        #self.block3_res = nn.Linear(BLOCK2_DEPTH*BLOCK2_SIZE[0]*BLOCK2_SIZE[1], 
        #                            BLOCK3_DEPTH*BLOCK3_SIZE[0]*BLOCK3_SIZE[1])
        
        # block4
        self.layer11 = nn.Conv2d(BLOCK3_DEPTH, BLOCK4_DEPTH, kernel_size=3, padding=1, stride=2)
        self.bn4 = nn.BatchNorm2d(BLOCK4_DEPTH)
        self.block4 = nn.Conv2d(BLOCK4_DEPTH, BLOCK4_DEPTH, kernel_size=3, padding=1)
        #self.block4_res = nn.Linear(BLOCK3_DEPTH*BLOCK3_SIZE[0]*BLOCK3_SIZE[1], 
        #                            BLOCK4_DEPTH*BLOCK4_SIZE[0]*BLOCK4_SIZE[1])
    
        # block5
        self.layer15 = nn.Conv2d(BLOCK4_DEPTH, BLOCK5_DEPTH, kernel_size=3, padding=1, stride=2)
        self.bn5 = nn.BatchNorm2d(BLOCK5_DEPTH)
        self.block5 = nn.Conv2d(BLOCK5_DEPTH, BLOCK5_DEPTH, kernel_size=3, padding=1)
        #self.block5_res = nn.Linear(BLOCK4_DEPTH*BLOCK4_SIZE[0]*BLOCK4_SIZE[1], 
        #                            BLOCK5_DEPTH*BLOCK5_SIZE[0]*BLOCK5_SIZE[1])
        
        # fully connected
        self.bn20 = nn.BatchNorm1d(FC1_SIZE)
        self.layer20 = nn.Linear(FC1_SIZE, OUTPUT_SIZE)

    def forward(self, x):
        
        # input
        f = x
        print(f.size())
        
        # block 1
        f = self.layer1(F.relu(self.bn0(self.drop(f))))
        print(f.size())
        
        # pool
        fres = self.pool(f)
        print(fres.size())
        
        # block 2
        f = self.layer3(F.relu(self.bn1(self.drop(fres))))
        fres = self.block2(F.relu(self.bn2(self.drop(f)))) + fres
        f = self.block2(F.relu(self.bn2(self.drop(fres))))
        fres = self.block2(F.relu(self.bn2(self.drop(f)))) + fres
        print(fres.size())
        
        # block 3
        f = self.layer7(F.relu(self.bn2(self.drop(fres))))
        fres = self.block3(F.relu(self.bn3(self.drop(f)))) #+ \
            #self.block3_res(fres.view(-1, BLOCK2_DEPTH*BLOCK2_SIZE[0]*BLOCK2_SIZE[1]))\
            #.view(BATCH_SIZE, BLOCK3_DEPTH, BLOCK3_SIZE[0], BLOCK3_SIZE[1])
        f = self.block3(F.relu(self.bn3(self.drop(fres))))
        fres = self.block3(F.relu(self.bn3(self.drop(f)))) + fres
        print(fres.size())
        
        # block 4
        f = self.layer11(F.relu(self.bn3(self.drop(fres))))
        fres = self.block4(F.relu(self.bn4(self.drop(f)))) #+ \
            #self.block4_res(fres.view(-1, BLOCK3_DEPTH*BLOCK3_SIZE[0]*BLOCK3_SIZE[1]))\
            #.view(BATCH_SIZE, BLOCK4_DEPTH, BLOCK4_SIZE[0], BLOCK4_SIZE[1])
        f = self.block4(F.relu(self.bn4(self.drop(fres))))
        fres = self.block4(F.relu(self.bn4(self.drop(f)))) + fres
        print(fres.size())
        
        # block 5
        f = self.layer15(F.relu(self.bn4(self.drop(fres))))
        f = self.block5(F.relu(self.bn5(self.drop(f)))) #+ \
            #self.block5_res(fres.view(-1, BLOCK4_DEPTH*BLOCK4_SIZE[0]*BLOCK4_SIZE[1]))\
            #.view(BATCH_SIZE, BLOCK5_DEPTH, BLOCK5_SIZE[0], BLOCK5_SIZE[1])
        f = self.block5(F.relu(self.bn5(self.drop(f))))
        f = self.block5(F.relu(self.bn5(self.drop(f))))
        print(f.size())
        
        # pool 
        f = self.pool(f)
        print(f.size())
        
        # fc
        print(f.size())
        print(f.view(BATCH_SIZE, FC1_SIZE).size())
        f = self.layer20(F.relu(self.bn20(f.view(BATCH_SIZE, FC1_SIZE))))
        print(f.size())
        
        # return the softmax of the probability
        return F.log_softmax(x)

## Instantiate the Model

In [8]:
model = Net()

# if we want to use gpu: 
#model.cuda()

## Create the data loaders
Load in the training data and test data from batches

TODO:
* configure the correct batch sizes

In [9]:
class MegaFaceDataset(Dataset):

    """
    All datasets are subclasses of torch.utils.data.Dataset i.e, they have
    __getitem__ and __len__ methods implemented.
    Hence, they can all be passed to a torch.utils.data.DataLoader which can
    load multiple samples parallelly using torch.multiprocessing workers

    Source: http://pytorch.org/docs/master/torchvision/datasets.html

    If you want to use "ImageFolder", the data in the root folder must be
    arranged in this way:

    root/dog/xxx.png
    root/dog/xxy.png
    root/dog/xxz.png

    root/cat/123.png
    root/cat/nsdf3.png
    root/cat/asd932_.png
    """

    def __init__(self, root_dir, transform = None):
        self.transform = transform #tranform the input (image augmentation)
        self.root_dir = root_dir
        self.image_names = os.listdir(root_dir)

    def __getitem__(self, index):
        """__getitem__ supports the indexing such that dataset[i] can be used to
        get the ith sample"""

        img_name = os.path.join(self.root_dir, self.image_names[index])
        image = np.array(Image.open(img_name))
        label = int(img_name.split("_")[1].split(".")[0]) - 1
        #print("index: {2}, img_name: {0}, label: {1}".format(img_name, label, index+1))
        oh_label = np.zeros(OUTPUT_SIZE, dtype=int)
        oh_label[label] = 1
        #print("image shape: {0}, label shape: {1}".format(image.shape, oh_label.shape))
        sample = {'image': image, 'label': oh_label}

        if self.transform:
            sample = self.transform(sample)
            # image = self.transform(image)
            # label = self.transform(label)

        return sample

    def __len__(self):
        """__len__ returns the size of the dataset. Use by calling len(dataset)"""
        return len(self.image_names)

In [19]:
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, label = sample['image'], sample['label']
        image = misc.imresize(image, (INPUT_SIZE[0], INPUT_SIZE[1]))
        image = image.transpose((2, 0, 1))
        #print(image.shape)
        #print("input size: ({0}, {1}, {2})".format(INPUT_DEPTH, INPUT_SIZE[0], INPUT_SIZE[1]))
        image = torch.from_numpy(image).float()
        label = torch.from_numpy(label).long()
        sample = {'image': image, 'label': label}
        return sample

In [20]:
# read the data
megaface_dataset = MegaFaceDataset(root_dir=DATASET,
                                        transform=ToTensor())

train_loader = DataLoader(megaface_dataset, batch_size=BATCH_SIZE,
                        shuffle=True, num_workers=2)

In [21]:
#for i in train_loader:
#    print("loaded")

In [22]:
#transform = transforms.Compose(
#    [transforms.ToTensor()])

#trainset = torchvision.datasets.STL10(root='./data', split='train',
#                                        download=True, transform=transform)
#train_loader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
#                                          shuffle=True, num_workers=2)

#testset = torchvision.datasets.STL10(root='./data', split='test',
#                                       download=True, transform=transform)
#test_loader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
#                                         shuffle=False, num_workers=2)

#classes = ('plane', 'car', 'bird', 'cat',
#           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [23]:
# train_loader = torch.utils.data.DataLoader(
#     datasets.MNIST('../data', train=True, download=True,
#                    transform=transforms.Compose([
#                        transforms.ToTensor()
#                    ])),
#     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
# test_loader = torch.utils.data.DataLoader(
#     datasets.MNIST('../data', train=False, transform=transforms.Compose([
#                        transforms.ToTensor()
#                    ])),
# batch_size=BATCH_SIZE, shuffle=True, **kwargs)


# print out the data to check
#for batch_idx, (data, target) in enumerate(train_loader):
#    print(data)

## Optimizer

In [24]:
# define the optimizer
"""
params (iterable) – iterable of parameters to optimize or dicts defining parameter groups
lr (float, optional) – learning rate (default: 1e-3)
betas (Tuple[float, float], optional) – coefficients used for computing running averages of gradient and its square (default: (0.9, 0.999))
eps (float, optional) – term added to the denominator to improve numerical stability (default: 1e-8)
weight_decay (float, optional) – weight decay (L2 penalty) (default: 0)
"""
optimizer = optim.Adam(model.parameters())

## Specifiy what training will take place

In [25]:
# define training function
def train(epoch, model):
    """
        Train the model
        Inputs:
            epoch - number of the current epoch
            
        Outputs:
            
    """
    model.train()
    for batch_idx, data in enumerate(train_loader):
        print("batch index: {0}, data: {1}, target: {2}".format(batch_idx, data['image'].shape, data['label'].shape))
        image, target = Variable(data['image']), Variable(data['label'])
        optimizer.zero_grad()
        output = model(image)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % LOG_INTERVAL == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

## How will we test the model

In [26]:
def test(model):
    """
        Test the model's accuracy
        Inputs:
            None
        Outputs: 
            Prints the test output results
    """
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

## Run the Training & Testing

In [27]:
for epoch in range(1, EPOCHS+1):
    train(epoch, model)
    test(model)

batch index: 0, data: torch.Size([4, 3, 224, 224]), target: torch.Size([4, 3])
torch.Size([4, 3, 224, 224])
torch.Size([4, 64, 112, 112])
torch.Size([4, 64, 56, 56])
torch.Size([4, 64, 56, 56])
torch.Size([4, 128, 28, 28])
torch.Size([4, 256, 14, 14])
torch.Size([4, 512, 7, 7])
torch.Size([4, 512, 4, 4])
torch.Size([4, 512, 4, 4])
torch.Size([4, 8192])
torch.Size([4, 3])


RuntimeError: invalid argument 3: only batches of spatial targets supported (3D tensors) but got targets of dimension: 2 at /opt/conda/conda-bld/pytorch_1503965122592/work/torch/lib/THNN/generic/SpatialClassNLLCriterion.c:40

# Some extra cells to print testing stuff

In [28]:
print(model.parameters())

<generator object Module.parameters at 0x7f21f2de95c8>
