In [1]:
import torch as tc
import numpy as np
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 1. CNN

![optional caption text](Figures/CNNArchitecture.jpg)
(http://parse.ele.tue.nl/cluster/2/CNNArchitecture.jpg)

## 1.2 Convolution

Convolve the filter with the image (i.e., slide over the image spatially, computing dot products).

We are going to look at only small portion of image at once; In other words, we look at **patch** via **filter (also called "kernel" or "feature detector")**.
Each element in a filter is a weight. You can consider filter as a **weight vector for a patch.**

We move filter at each time, and the length of the distance we move the filter is called **stride.**

We can add **padding** by adding certain constant to the boundaries of images. It is often called **zero-padding** because we use zero as the value for the padding.

As a result of application of filter to an image, you get a **feature map (or activation map).**




After applying convolutional layer, apply **pooling layer** to reduce the amount of information. There are mainly two kinds of poolings:
1. Max pooling
2. Average pooling.

### The advantage of using convolutional neural net:
1. the size of weight matrix is smaller than fully connected net
2. more flexible to handle images.
![optional](Figures/cnn1.png)

In [271]:
class Net(tc.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # You can also do
        # super().__init__()
        self.conv1 = tc.nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = tc.nn.Conv2d(10, 20, kernel_size=5)
        self.mp = tc.nn.MaxPool2d(2)
        
        self.relu = tc.nn.functional.relu
        self.fc = tc.nn.Linear(320,10)
        # self.fc = tc.nn.Linear(??,10)
        # In order to determine what should go in ??, you can either use
        ## 1. Error: If you put ??, you'll run into a RuntimeError:size mismatch, m1:[64x320], m2:[100x10]
        ## and "320" is the number that should go into ??.
        ## 2. x.size: You can print(x.size) and check the size of x.
        self.log_softmax = tc.nn.functional.log_softmax
        
        
    def forward(self, x):
        in_size = x.size(0)
        x = self.conv1(x)
        x = self.relu(self.mp(x))
        x = self.conv2(x)
        x = self.relu(self.mp(x))
        x = x.view(in_size, -1)
        x = self.fc(x)
        return self.log_softmax(x)

In [257]:
data.size() # [0]: Batch size, 1: Number of channels, 28*28: Input size

torch.Size([64, 1, 28, 28])

In [272]:
train_dataset = datasets.MNIST(root="./Data/MNIST_Data/", train=True,
#                                transform=transforms.Compose([transforms.ToTensor()]))
                               transform=transforms.Compose([transforms.ToTensor(),
                                                             transforms.Normalize((0.13066062, ), (0.30810776, ))]))
test_dataset = datasets.MNIST(root="./Data/MNIST_Data/", train=False,
                              transform=transforms.Compose([transforms.ToTensor(),
                                                             transforms.Normalize((0.13066062, ), (0.30810776, ))]))
train_loader = DataLoader(dataset=train_dataset, shuffle = True, batch_size=64, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=64)

In [273]:
# model = Net()
# criterion = tc.nn.BCELoss()
# optimizer = tc.optim.Adam(model.parameters(), lr=0.01)

model = Net()
criterion = tc.nn.CrossEntropyLoss()
# CHOICE OF CRITERION IS VERY IMPORTANT!!!
# criterion = tc.nn.BCELoss() does not work.
optimizer = tc.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [274]:
def train(epoch):
    model.train()
    for batch_idx, (data, label) in enumerate(train_loader):
        data, label = Variable(data), Variable(label)
        pred_label = model(data)
        loss = criterion(pred_label, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
                 .format(epoch, batch_idx*len(data), len(train_loader.dataset), 100.*batch_idx/len(train_loader),
                 loss.data[0]))

In [275]:
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, label in test_loader:
        data, label = Variable(data, volatile=True), Variable(label)
        pred_label = model(data)
        test_loss += criterion(pred_label, label).data[0]
#         test_loss += tc.nn.functional.nll_loss(pred_label, label).data[0]
                
        pred = pred_label.data.max(1, keepdim=True)[1]
        correct += pred.eq(label.data.view_as(pred)).cpu().sum()
        
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [276]:
for epoch in range(2):
    train(epoch)
    test()




Test set: Average loss: 0.0017, Accuracy: 9664/10000 (97%)




Test set: Average loss: 0.0013, Accuracy: 9734/10000 (97%)



# 2. Advanced CNN

## 2.1 Inception modules

When not know which size of filter to choose, **use all different filters at the same time.**

You use $1\times1$ filter because **it reduces the number of operations to a huge extent.**

<img align="left" src="./Figures/noreduce.png" width=450>
<img align="left" src="./Figures/reduced1.png"width=450>

<!-- ![optional](./Figures/noreduce.png)![optional](./Figures/reduced1.png) -->

### Let's make an inception module
<img align="center" src="./Figures/inception_module.png"/>

In [42]:
class InceptionA(tc.nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        
        self.branch_pool_1 = tc.nn.Conv2d(in_channels=in_channels, out_channels=24, kernel_size=1)
        
        self.branch1x1 = tc.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=1)
        
        self.branch5x5_1 = tc.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=1)
        self.branch5x5_2 = tc.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=5, padding=2)
        
        self.branch3x3_1 = tc.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=1)
        self.branch3x3_2 = tc.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=3, padding=1)
        self.branch3x3_3 = tc.nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, padding=1)

        
    def forward(self, x):
        branch_pool = self.branch_pool_1(tc.nn.functional.avg_pool2d(x, kernel_size=3, stride=1, padding=1))
        
        branch1x1 = self.branch1x1(x)
        branch5x5 = self.branch5x5_2(self.branch5x5_1(x))
        branch3x3 = self.branch3x3_3(self.branch3x3_2(self.branch3x3_1(x)))
        
        outputs = [branch_pool, branch1x1, branch5x5, branch3x3]
        return tc.cat(outputs, 1) # Concatenate output list

In [43]:
class Net(tc.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = tc.nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = tc.nn.Conv2d(88, 20, kernel_size=5)
        
        self.incept1 = InceptionA(in_channels=10)
        self.incept2 = InceptionA(in_channels=20)
        
        self.mp = tc.nn.MaxPool2d(2)
        self.fc = tc.nn.Linear(1408, 10)
        
    def forward(self, x):
        in_size = x.size(0)
        x = tc.nn.functional.relu(self.mp(self.conv1(x)))
        x = self.incept1(x)
        x = tc.nn.functional.relu(self.mp(self.conv2(x)))
        x = self.incept2(x)
        x = x.view(in_size, -1)
        x = self.fc(x)
        return tc.nn.functional.log_softmax(x)
        

In [44]:
train_dataset = datasets.MNIST(root="./Data/MNIST_Data/", train=True,
           transform=transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((0.13066062, ), (0.30810776, ))]), download=True)
test_dataset = datasets.MNIST(root="./Data/MNIST_Data/", train=False,
                              transform=transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize((0.13066062, ), (0.30810776, ))]))

train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=64, num_workers=2)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=64, num_workers=2)

In [45]:
model = Net()
criterion = tc.nn.CrossEntropyLoss()
# CHOICE OF CRITERION IS VERY IMPORTANT!!!
# criterion = tc.nn.BCELoss() does not work.
optimizer = tc.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [46]:
def train(epoch):
    model.train()
    for batch_idx, (data, label) in enumerate(train_loader):
        data, label = Variable(data), Variable(label)
        pred_label = model(data)
        loss = criterion(pred_label, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'
                 .format(epoch, batch_idx*len(data), len(train_loader.dataset), 100.*batch_idx/len(train_loader),
                 loss.data[0]))

In [47]:
def test():
    model.eval()
    correct = 0
    test_loss = 0
    for data, label in test_loader:
        data, label = Variable(data, volatile=True), Variable(label)
        pred_label = model(data)
        test_loss += criterion(pred_label, label).data[0]
        
        pred = pred_label.data.max(1, keepdim=True)[1]
        correct += pred.eq(label.data.view_as(pred)).sum()
        
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))

In [48]:
for epoch in range(2):
    train(epoch)
    test()




Test set: Average loss: 0.0018, Accuracy: 9641/10000 (96%)




Test set: Average loss: 0.0013, Accuracy: 9739/10000 (97%)

