In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import numpy as np 
import pandas as pd 

from matplotlib.gridspec import GridSpec
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix
import cv2 
import torch
import torch.nn.functional as F

import gc


# Loading Data and Processing the Data

In [None]:
data = np.genfromtxt("/kaggle/input/digit-recognizer/train.csv", delimiter=',', skip_header=1)

In [None]:
print(data.shape)
X, y = data[:, 1:], data[:, 0]
# Centring images to be of mean 0 and standard deviation of 1
X = (X - np.mean(X, axis=1).reshape(-1, 1))
X = X/np.std(X).reshape(-1, 1)
print(f"X-shape: {X.shape}")
print(f"y-shape: {y.shape}")
print(np.mean(X[10, :]))
print(np.std(X[10, :]))

In [None]:
X_transformed = X.copy().reshape(X.shape[0], 28, 28)
print(X_transformed.shape)
indeces = []

for i in range(0, 10):
    indeces.append(np.where(y == i)[0][0])

gs = GridSpec(2, 5)

axes = []
for row in range(0, 2):
    for col in range(0, 5):
        axes.append(plt.subplot(gs[row, col]))

for row in range(0, 2):
    for col in range(0, 5):
        axes[row*5 + col].imshow(X_transformed[indeces[row* 5 + col]], cmap="gray")
        axes[row*5 + col].set_xticks([])
        axes[row*5 + col].set_yticks([])

        

# Data Augmentation

In [None]:
class DifferentTransformation(object):
    
    @staticmethod
    def rotation(X, y, cols, rows, arr):
        X_tr = []
        y_tr = []
        tx = 1
        ty = 1
        counter = 0
        #iterate over examples
        while counter < X.shape[0]:
            for x_angle in arr:
                M = cv2.getRotationMatrix2D((cols/2, rows/2), x_angle, 1)
                X_tr.append(cv2.warpAffine(X[counter], M, (cols, rows)))
                y_tr.append(y[counter])
            counter += 1
        
        return np.asarray(X_tr), np.asarray(y_tr)
    
    @staticmethod
    def translation(X, y, cols, rows, arr):
        X_tr = []
        y_tr = []
        tx = 1
        ty = 1
        M = np.float32([[1, 0, tx], [0, 1, ty]])
        counter = 0
        #iterate over examples
        while counter < X.shape[0]:
            for i in  arr:
                tx = i
                for j in arr:
                    M[0, 2] = tx
                    M[1, 2] = ty
                    X_tr.append(cv2.warpAffine(X[counter], M, (cols, rows)))
                    y_tr.append(y[counter])
                    ty = j
            counter += 1
        
        return np.asarray(X_tr), np.asarray(y_tr)

In [None]:
# 
X_augmented, y_augmented = DifferentTransformation.rotation(X_transformed, y, X_transformed[0].shape[1], X_transformed[0].shape[0], [0, 15, 30, 45]) #In order to ensure that some numbers aren't flipped to be another number, like 6 and 9
#X_augmented, y_augmented = DifferentTransformation.translation(X_transformed, y, X_transformed[0].shape[1], X_transformed[0].shape[0], list(range(-2, 2, 2)))


In [None]:
X_augmented.shape

In [None]:
fig, axes = plt.subplots(1, 4)
axes[0].imshow(X_augmented[np.where(y_augmented == 6)[0][0]])
axes[1].imshow(X_augmented[np.where(y_augmented == 6)[0][1]])
axes[2].imshow(X_augmented[np.where(y_augmented == 6)[0][2]])
axes[3].imshow(X_augmented[np.where(y_augmented == 6)[0][3]])
axes[0].set_xticks([])
axes[1].set_xticks([])
axes[2].set_xticks([])
axes[3].set_xticks([])
axes[0].set_yticks([])
axes[1].set_yticks([])
axes[2].set_yticks([])
axes[3].set_yticks([])
plt.show()

# Splitting Data into Train and validation set

In [None]:
X_train = None
X_validation = None
y_train = None
y_validation = None

splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

for train_ind, test_ind in splitter.split(X_augmented, y_augmented):
    X_train = X_augmented[train_ind]
    y_train = y_augmented[train_ind]
    X_validation = X_augmented[test_ind]
    y_validation = y_augmented[test_ind]

print(X_train.shape)
print(X_validation.shape)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1)

ax1.hist(y_train, density=True)
ax2.hist(y_train, density=True)
plt.show()

# Transforming our data from numpy to tensors

In [None]:
X_train_tensor = torch.from_numpy(X_train)
y_train_tensor = torch.from_numpy(y_train)
X_validation_tensor = torch.from_numpy(X_validation)
y_validation_tensor = torch.from_numpy(y_validation)

X_train_tensor_4d = torch.ones((len(X_train_tensor), 1, 28, 28))
X_train_tensor_4d[:, 0, :, :] = X_train_tensor
X_validation_tensor_4d = torch.ones((len(X_validation_tensor), 1, 28, 28))
X_validation_tensor_4d[:, 0, :, :] = X_validation_tensor
X_train_tensor_4d.shape

# Building the LeNet-5 model from scratch

In [None]:
class LeNet5(torch.nn.Module):
    
    def __init__(self, *args, **kwargs):
        super(LeNet5, self).__init__()
        self.cnn1 = torch.nn.Conv2d(1, 6, (5, 5), 1)
        self.pool = torch.nn.AvgPool2d(kernel_size=(2, 2), stride=2)
        self.cnn2 = torch.nn.Conv2d(6, 16, (5, 5), 1)
        self.cnn3 = torch.nn.Conv2d(16, 120, (5, 5), 1)
        self.fc1 = torch.nn.Linear(120, 84)
        self.fc2 = torch.nn.Linear(84, 10)
        
    def forward(self, x):
        #Pad the image because in the original paper the first CNN had same padding
        x = F.pad(x, (2, 2, 2, 2))#pad it from all sides
        x = torch.tanh(self.cnn1(x))
        x = self.pool(x)
        x = torch.tanh(self.cnn2(x))
        x = self.pool(x)
        x = torch.tanh(self.cnn3(x))
        # flatten the layer
        x = x.view(-1, 120)#nx120
        x = torch.tanh(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=1)#Apply softmax to the rows of the column vector.
        
        return x



## Initialize parameters

In [None]:
def init_param(layer):
    if type(layer) == torch.nn.Linear:
        torch.nn.init.xavier_normal_(layer.weight)
    if type(layer) == torch.nn.Conv2d:
        torch.nn.init.xavier_uniform_(layer.weight)


In [None]:
model = LeNet5()
model.apply(init_param)
counter = 1
# Check the layers with their initalized values
# for param in model.parameters():
#     print("Level: ", counter)
#     print(param)
#     counter += 1

# Training Model

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
loss_fn = torch.nn.CrossEntropyLoss()
training_error = []
validation_error = []
print(model)

In [None]:
import time

batch_size = 256
num_batches = int(len(X_train_tensor_4d)/batch_size)

begin = time.time()

for epoch in range(0, 5):
    counter = 0
    current_error = []
    for batch in range(0, num_batches):
        optimizer.zero_grad()
        y_pred = model(X_train_tensor_4d[counter: counter + batch_size])
        loss = loss_fn(y_pred, y_train_tensor[counter: counter + batch_size].type(torch.LongTensor))
        loss.backward()
        optimizer.step()
        current_error.append(loss.item())
        counter += batch_size
    if counter < len(X_train_tensor_4d):
        optimizer.zero_grad()
        y_pred = model(X_train_tensor_4d[counter: ])
        loss = loss_fn(y_pred, y_train_tensor[counter: ].type(torch.LongTensor))
        loss.backward()
        optimizer.step()
        current_error.append(loss.item())
        
    y_pred = model(X_validation_tensor_4d)
    
    validation_error.append(loss_fn(y_pred, y_validation_tensor.type(torch.LongTensor)).item())
    training_error.append(np.mean(current_error))
    print(f"#epoch: {epoch} and the -log training error is {np.mean(current_error)}")    
    print(f"#epoch: {epoch} and the -log validation error is {validation_error[epoch]}")

    print(f"Time taken to finish training the model is {time.time() - begin} sec")

# Training  vs Validation error curve

In [None]:
# Training vs Validation error
plt.plot(training_error, c="green")
plt.plot(validation_error, c="red")
plt.legend(["Training_error", "Validation_error"])
plt.xlabel("Epochs")
plt.ylabel("-log-likelihood")
plt.show()

# Confusion Matrix performance for the Training, and Validation set

In [None]:
#Training confusion matrix 
y_pred = model(X_train_tensor_4d)
y_p = torch.argmax(y_pred, dim=1)
cnf = confusion_matrix(y_train_tensor, y_p)
# plt.matshow(cnf)
# plt.colorbar()
print(f"Accuracy {np.sum(np.diag(cnf))/len(X_train_tensor_4d)}")
cnf

In [None]:
del X_train_tensor_4d
del y_train_tensor
del cnf
del y_pred

In [None]:
#Validation confusion matrix 
y_pred = model(X_validation_tensor_4d)
y_p = torch.argmax(y_pred, dim=1)
cnf = confusion_matrix(y_validation_tensor, y_p)
print(f"Accuracy {np.sum(np.diag(cnf))/len(X_validation_tensor_4d)}")
cnf

In [None]:
del X_validation_tensor_4d
del y_validation_tensor
del cnf
del y_pred

# Test Set Prediction

In [None]:
data = np.genfromtxt("/kaggle/input/digit-recognizer/test.csv", delimiter=',', skip_header=1)
data = data - np.mean(data, axis=1).reshape(-1, 1)
data = data/np.std(data, axis=1).reshape(-1, 1)
print(np.mean(data[0]))
print(np.std(data[0]))
data.shape

In [None]:
X_transformed = data.reshape(data.shape[0], 28, 28)
X_tensor = torch.from_numpy(X_transformed)
X_tensor_4d = torch.ones((len(X_tensor), 1, 28, 28))
X_tensor_4d[:, 0, :, :] = X_tensor
X_tensor_4d.shape

In [None]:
y_pred = model(X_tensor_4d)

In [None]:
y_pred.shape

In [None]:
results = pd.DataFrame(np.c_[torch.IntTensor(list(range(1, len(y_pred) + 1))), torch.argmax(y_pred, dim=1)], columns=["ImageId", "Label"])
results.to_csv("submission.csv", index=False)
results.shape

In [None]:
fig, axes = plt.subplots(5, 5)

counter = 0
for row in range(0, 5):
    for col in range(0, 5):
        axes[row, col].imshow(X_tensor_4d[counter, 0, :, :])
        axes[row, col].set_xticks([])
        axes[row, col].set_yticks([])
        counter += 1
plt.show()

In [None]:
torch.argmax(y_pred[0:25], dim=1)

# Reference 
http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf