# the (in) famous MNIST

<img src='digit.gif'>

In [None]:
# Create, train and test a FC NN for MNIST images


# import the modules
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable


In [None]:
# Setup user defined requirements and dataset + loader
input_size = 28 * 28
hidden1 = 200
hidden2 = 100
num_classes = 10
batch_size = 100
learning_rate = 0.001
num_epochs = 10

train_dataset = dsets.MNIST(root='./data/',
                         train=True,
                         transform=transforms.ToTensor(),
                         download = True)


test_dataset = dsets.MNIST(root='./data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download = True)

# Dataloader 

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                           batch_size=batch_size,
                                           shuffle=False)

In [None]:
class Net(nn.Module):
    def __init__(self, input_size, hidden1, hidden2, num_classes):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden1)
        self.fc2 = nn.Linear(hidden1, hidden2)
        self.fc3 = nn.Linear(hidden2, num_classes)
        self.relu = nn.ReLU() 
        
    def forward(self, x):
        
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        
        return out
        

In [None]:
net = Net(input_size, hidden1, hidden2, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    for i, (img, labels) in enumerate(train_loader):
        
        img = Variable(img.view(-1, 28*28))
        labels = Variable(labels)
        
        output = net(img)
        
        optimizer.zero_grad()
        
        loss = criterion(output, labels)
        loss.backward()
        
        optimizer.step()
        
        if (i+1)%100 == 0:
            print('Epoch [%d/%d], Step[%d/%d], Loss: %0.4f' 
                  %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

In [None]:
total=0
correct=0
for i, (img, labels) in enumerate(test_loader):
    img = Variable(img.view(-1, 28 * 28))
    
    output = net(img)
    
    _, predicted = torch.max(output.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    
print('Accuracy of our FC n/w on 10000 test images: %d %%'
     %(100 * correct/total))

# Convolutional Networks

 <img src='./convnet.png'>

# Conv mechanics

<img src='./convanim.gif'>

# Many filters

<img src='conv2filt.png'>

<img src='stride1.gif'>
<img src='stride2.gif'>

# Padding

<img src='stridepad.gif'>

# Implementation

<img src='convnet2.png'>

In [None]:
import torch 
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable


# Hyper Parameters
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# MNIST Dataset
train_dataset = dsets.MNIST(root='./data/',
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data/',
                           train=False, 
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

In [None]:
# CNN Model (2 conv layer)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

    

In [1]:
cnn = CNN()


# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

# Train the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

# Test the Model
cnn.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images)
    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

Epoch [1/5], Iter [100/600] Loss: 0.1554
Epoch [1/5], Iter [200/600] Loss: 0.0746
Epoch [1/5], Iter [300/600] Loss: 0.0373
Epoch [1/5], Iter [400/600] Loss: 0.0765
Epoch [1/5], Iter [500/600] Loss: 0.0813
Epoch [1/5], Iter [600/600] Loss: 0.0739
Epoch [2/5], Iter [100/600] Loss: 0.0505
Epoch [2/5], Iter [200/600] Loss: 0.0169
Epoch [2/5], Iter [300/600] Loss: 0.0317
Epoch [2/5], Iter [400/600] Loss: 0.0294
Epoch [2/5], Iter [500/600] Loss: 0.1262
Epoch [2/5], Iter [600/600] Loss: 0.0164
Epoch [3/5], Iter [100/600] Loss: 0.0806
Epoch [3/5], Iter [200/600] Loss: 0.0496
Epoch [3/5], Iter [300/600] Loss: 0.0259
Epoch [3/5], Iter [400/600] Loss: 0.0649
Epoch [3/5], Iter [500/600] Loss: 0.1099
Epoch [3/5], Iter [600/600] Loss: 0.0525
Epoch [4/5], Iter [100/600] Loss: 0.0059
Epoch [4/5], Iter [200/600] Loss: 0.0632
Epoch [4/5], Iter [300/600] Loss: 0.0114
Epoch [4/5], Iter [400/600] Loss: 0.0152
Epoch [4/5], Iter [500/600] Loss: 0.0084
Epoch [4/5], Iter [600/600] Loss: 0.1671
Epoch [5/5], Ite

In [None]:
import torch 
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable

# Image Preprocessing 
'''
transform = transforms.Compose([
    transforms.Scale(40),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])
'''

# CIFAR-10 Dataset
train_dataset = dsets.MNIST(root='./data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = dsets.MNIST(root='./data/',
                              train=False, 
                              transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100, 
                                          shuffle=False)

In [None]:
# 3x3 Convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)

# Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

# ResNet Module
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(1, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[0], 2)
        self.layer3 = self.make_layer(block, 64, layers[1], 2)
        self.avg_pool = nn.AvgPool2d(7)
        self.fc = nn.Linear(64, num_classes)
        
    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
 

In [10]:
   
resnet = ResNet(ResidualBlock, [2, 2, 2, 2])


# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
lr = 0.001
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)

# Training 
for epoch in range(80):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = resnet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" %(epoch+1, 80, i+1, 500, loss.data[0]))

    # Decaying Learning Rate
    if (epoch+1) % 20 == 0:
        lr /= 3
        optimizer = torch.optim.Adam(resnet.parameters(), lr=lr) 

# Test
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images)
    outputs = resnet(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Accuracy of the model on the test images: %f %%' % (100 * correct / total))

print('Accuracy of the model on the test images: %f %%' % (100 * correct / total))

Epoch [1/80], Iter [100/500] Loss: 0.1766
Epoch [1/80], Iter [200/500] Loss: 0.1170
Epoch [1/80], Iter [300/500] Loss: 0.1191
Epoch [1/80], Iter [400/500] Loss: 0.0424
Epoch [1/80], Iter [500/500] Loss: 0.0236
Epoch [1/80], Iter [600/500] Loss: 0.0624
Epoch [2/80], Iter [100/500] Loss: 0.0408
Epoch [2/80], Iter [200/500] Loss: 0.0399
Epoch [2/80], Iter [300/500] Loss: 0.0160
Epoch [2/80], Iter [400/500] Loss: 0.0260
Epoch [2/80], Iter [500/500] Loss: 0.0553
Epoch [2/80], Iter [600/500] Loss: 0.0301
Epoch [3/80], Iter [100/500] Loss: 0.0219
Epoch [3/80], Iter [200/500] Loss: 0.0517
Epoch [3/80], Iter [300/500] Loss: 0.0334
Epoch [3/80], Iter [400/500] Loss: 0.0823
Epoch [3/80], Iter [500/500] Loss: 0.0209
Epoch [3/80], Iter [600/500] Loss: 0.0066
Epoch [4/80], Iter [100/500] Loss: 0.0032
Epoch [4/80], Iter [200/500] Loss: 0.0377
Epoch [4/80], Iter [300/500] Loss: 0.0380
Epoch [4/80], Iter [400/500] Loss: 0.0084
Epoch [4/80], Iter [500/500] Loss: 0.0225
Epoch [4/80], Iter [600/500] Loss:

Epoch [33/80], Iter [100/500] Loss: 0.0000
Epoch [33/80], Iter [200/500] Loss: 0.0001
Epoch [33/80], Iter [300/500] Loss: 0.0000
Epoch [33/80], Iter [400/500] Loss: 0.0000
Epoch [33/80], Iter [500/500] Loss: 0.0007
Epoch [33/80], Iter [600/500] Loss: 0.0001
Epoch [34/80], Iter [100/500] Loss: 0.0000
Epoch [34/80], Iter [200/500] Loss: 0.0001
Epoch [34/80], Iter [300/500] Loss: 0.0001
Epoch [34/80], Iter [400/500] Loss: 0.0000
Epoch [34/80], Iter [500/500] Loss: 0.0025
Epoch [34/80], Iter [600/500] Loss: 0.0005
Epoch [35/80], Iter [100/500] Loss: 0.0000
Epoch [35/80], Iter [200/500] Loss: 0.0000
Epoch [35/80], Iter [300/500] Loss: 0.0000
Epoch [35/80], Iter [400/500] Loss: 0.0000
Epoch [35/80], Iter [500/500] Loss: 0.0000
Epoch [35/80], Iter [600/500] Loss: 0.0000
Epoch [36/80], Iter [100/500] Loss: 0.0000
Epoch [36/80], Iter [200/500] Loss: 0.0090
Epoch [36/80], Iter [300/500] Loss: 0.0006
Epoch [36/80], Iter [400/500] Loss: 0.0001
Epoch [36/80], Iter [500/500] Loss: 0.0000
Epoch [36/8

Epoch [64/80], Iter [600/500] Loss: 0.0000
Epoch [65/80], Iter [100/500] Loss: 0.0000
Epoch [65/80], Iter [200/500] Loss: 0.0000
Epoch [65/80], Iter [300/500] Loss: 0.0000
Epoch [65/80], Iter [400/500] Loss: 0.0000
Epoch [65/80], Iter [500/500] Loss: 0.0000
Epoch [65/80], Iter [600/500] Loss: 0.0000
Epoch [66/80], Iter [100/500] Loss: 0.0000
Epoch [66/80], Iter [200/500] Loss: 0.0000
Epoch [66/80], Iter [300/500] Loss: 0.0000
Epoch [66/80], Iter [400/500] Loss: 0.0000
Epoch [66/80], Iter [500/500] Loss: 0.0000
Epoch [66/80], Iter [600/500] Loss: 0.0000
Epoch [67/80], Iter [100/500] Loss: 0.0000
Epoch [67/80], Iter [200/500] Loss: 0.0000
Epoch [67/80], Iter [300/500] Loss: 0.0000
Epoch [67/80], Iter [400/500] Loss: 0.0001
Epoch [67/80], Iter [500/500] Loss: 0.0000
Epoch [67/80], Iter [600/500] Loss: 0.0000
Epoch [68/80], Iter [100/500] Loss: 0.0000
Epoch [68/80], Iter [200/500] Loss: 0.0000
Epoch [68/80], Iter [300/500] Loss: 0.0000
Epoch [68/80], Iter [400/500] Loss: 0.0000
Epoch [68/8