# Sensing and Prediction Learning Task 1:
## MNIST Image Classification using PyTorch Convolutional Neural Network

## 0. Import libraries

In [1]:
import numpy as np
import pandas as pd

import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torchvision.utils import make_grid
from torch.utils.data import TensorDataset, DataLoader

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

import os
print(os.listdir("./input"))

['test.csv', 'train.csv']


## 1. Data Acquisition
#### Load the training dataset into Pandas DataFrame

In [2]:
train = pd.read_csv('./input/train.csv')
test = pd.read_csv('./input/test.csv')


print(train.shape)
train.head()

(42000, 785)


Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
print(test.shape)
test.head()

(28000, 784)


Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Split the training dataset to features and labels

In [4]:
x_train_df = train.iloc[:,1:]
y_train_df = train.iloc[:,0]

print(x_train_df.shape, y_train_df.shape)

(42000, 784) (42000,)


#### Convert the data to numeric arrays and normalize the features

In [5]:
x_train = x_train_df.values/255.
y_train = y_train_df.values

x_test = test.values/255

#### Reshape the test and training dataset to (28,28) image arrays

In [6]:
x_train = np.reshape(x_train, (-1, 1, 28,28))
x_test = np.reshape(x_test, (-1, 1, 28,28))


x_train.shape, x_test.shape

((42000, 1, 28, 28), (28000, 1, 28, 28))

#### Split the training dataset into training and validation datasets, and making sure that you understand what training datasets are and validation dataset and why we use validation datasets?

In [9]:
# TODO: split the dataset
# This is to ensure reproducibility
# hints: use train_test_split() from sklearn.model_selection
random_seed = 234
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.4, random_state=random_seed)


x_train.shape, x_val.shape, y_train.shape, y_val.shape

((25200, 1, 28, 28), (16800, 1, 28, 28), (25200,), (16800,))

#### Helper function to display an array of images

In [None]:
def display(rows, columns, images, values=[], predictions=[]):
    fig = plt.figure(figsize=(9, 11))

    ax = []

    for i in range( columns*rows ):
        img = images[i]
        ax.append(fig.add_subplot(rows, columns, i+1))
        
        title = ""
        
        if(len(values) == 0):
            title = "Pred:" + str(predictions[i])
        elif(len(predictions) == 0):
            title = "Value:" + str(values[i])
        elif(len(values) != 0 and len(predictions) != 0):
            title = "Value:" + str(values[i]) + "\nPred:" + str(predictions[i])
        
        ax[-1].set_title(title)  # set title
        plt.imshow(img)

    plt.show()
    
idx = np.random.randint(1, 1000, size=9)

images = x_train[idx,:]
images = images[:,0]

values = y_train[idx]

display(rows=3, columns=3, images=images, values=values, predictions=[])

#### Using the GPU

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)

## 2. Define the Convolutional Neural Network Model
#### This link will help you to build the model: https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py

In [None]:
"""
  In this part, you will design a simple CNN model along with your own 
  convolutional network for a more realistic dataset
"""
# TODO: define the layers and the forward function
class CNN(nn.Module):
    def __init__(self):
      self.conv1 = torch.nn.Sequential(
          torch.nn.Conv2d(in_channels=1,
                          out_channels=16,
                          kernel_size=3,
                          stride=2,
                          padding=1),
          torch.nn.BatchNorm2d(16),
          torch.nn.ReLU()
      )
      self.conv2 = torch.nn.Sequential(
          torch.nn.Conv2d(16,32,3,2,1),
          torch.nn.BatchNorm2d(32),
          torch.nn.ReLU()
      )
      self.conv3 = torch.nn.Sequential(
          torch.nn.Conv2d(32,64,3,2,1),
          torch.nn.BatchNorm2d(64),
          torch.nn.ReLU()
      )
      self.conv4 = torch.nn.Sequential(
          torch.nn.Conv2d(64,64,2,2,0),
          torch.nn.BatchNorm2d(64),
          torch.nn.ReLU()
      )
      self.mlp1 = torch.nn.Linear(2*2*64,100)
      self.mlp2 = torch.nn.Linear(100,10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.mlp1(x.view(x.size(0),-1))
        x = self.mlp2(x)
        return x        
    
net = CNN()

net.to(device)

#### Define the optimizer and loss function

In [None]:
"""
  Define the optimizer, you can check the documentation to define optimizer and 
  criterion and try to understand why we use optimizer
"""

# TODO: define the loss finction and the optimizer
# we can use nn.CrossEntropyLoss() as our criterion and optim.SGD() as optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


#### Define out tensors and dataloader

In [None]:
a = torch.from_numpy(x_train).type(torch.FloatTensor)
b = torch.from_numpy(y_train).type(torch.LongTensor)

train = torch.utils.data.TensorDataset(a,b)

train_loader = torch.utils.data.DataLoader(train, batch_size = 32, shuffle = False)

## 3. Model Training and Validation
#### Train for 100 epoch: Write the training procedures
#### This link will help you to build the training procedures: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py

In [None]:
%%time

#Seed
torch.manual_seed(1234)

for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        
        # TODO: write the training procedures

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 500 == 499:    # print every 500 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i+1, loss.item()))
#             print('[%d, %5d] loss: %.3f' % (epoch + 1, i, running_loss / 500))
#             running_loss = 0.0

print('Finished Training')


#### Try the model on the validation dataset

In [None]:
# Validate trained model
x = torch.from_numpy(x_val).type(torch.FloatTensor)
y = torch.from_numpy(y_val).type(torch.LongTensor)

x, y = x.to(device), y.to(device)

val = net(x)

_, predicted = torch.max(val.data, 1)

# Get accuration
print('Accuracy is %d %%' % (100 * torch.sum(y==predicted) / len(y_val)))

#### Lets display a sample of the predictions on the validation dataset

In [None]:
# Get random data from the valication dataset and the predicted values
idx = np.random.randint(1, 1000, size=9)

images = x_val[idx,:]
images = images[:,0]

values = y_val[idx]

predicted = predicted.cpu()

predictions = predicted.data.numpy()
predictions = predictions[idx]

display(rows=3, columns=3, images=images, values=values, predictions=predictions)

## 4. Model Testing
#### Test on the test dataset

In [None]:
torch_x_test = torch.from_numpy(x_test).type(torch.FloatTensor)

torch_x_test = torch_x_test.to(device)

y_test = net(torch_x_test)

_, predicted = torch.max(y_test.data, 1)

#### Display predictions of the test dataset

In [None]:
idx = np.random.randint(1, 1000, size=9)

images = x_test[idx,:]
images = images[:,0]

predicted = predicted.cpu()

predictions = predicted.data.numpy()
predictions = predictions[idx]

display(rows=3, columns=3, images=images, values=[], predictions=predictions)

#### Save predictions to submit

In [None]:
ImageId = np.arange(1, len(x_test)+1)
Label = predicted.data.numpy()

sbm = pd.DataFrame({'ImageId': ImageId, 'Label': Label})
sbm.to_csv('submission.csv', index=False)

sbm.head()