In [None]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd

In [None]:
# Loading the training dataset
dataset = torchvision.datasets.MNIST(root='./data',train = True, download=True,transform = torchvision.transforms.ToTensor())

# data gets loaded in the train_dataset variable
# root parameter specifies the loacation where the data exists/has to be downloaded to
# download parameter being set as 'True' instructs to download the MNIST dataset if not present in the root specified location
# transfrom parameter transforms the dataset to the specified datatype, here it is torch.FloatTensor and the values are scaled down to [0,1]

In [None]:
print(type(dataset))           # printing datatype of 'training_dataset'
print(dataset)                 # printing 'training_dataset'
print(type(dataset[0]))        # printing datatype of what is stored at index 0 in the 'training_dataset'
print(dataset[0])              # printing what is present at index 0 in the 'training_dataset'

In [None]:
# printing the first 10 tensors along with there corresponding labels
for i in range(10) :
    print(f"tensors for image {i+1} with label {dataset[i][1]} : \n{dataset[i][0]} ")

In [None]:
# printing the first 10 labels seperately :
for i in range(10) :
    print(f"label for image {i+1} : {dataset[i][1]}")

In [None]:
# using matplotlib to print the first 10 image 
for i in range(10) :

    pixel_values = dataset[i][0][0]       # selecting the tensor that stores the pixel-information of the 'i'th image

# displaying the grayscale image using matplotlib.pyplot
    plt.imshow(pixel_values, cmap='gray_r', vmin=0, vmax=1)
    plt.title(dataset[i][1])
    plt.axis('on')
    plt.show()
    print("----------------------------------------------------")
    
# values on top of the image correspond to the label for that image

In [None]:
# flattening the images :

# defining flatten function which takes in a 1 x 28 x 28 tensor and returns a tranformed 2D tensor with shape (1,784)  
def flatten(x) :
    a = torch.empty(0)
    for i in range(28) :
        a = torch.cat((a,x[0,i]))
    return a

# defining X_train, which is the matrix which will contain all 60k datapoints, which we will be using to training our model
X_train = torch.empty(60000,784)

for i in range(60000):
    X_train[i] = flatten(dataset[i][0])

print(X_train.shape)        # it is supposed to have a shape (60000,784)

In [None]:
# printing first 5 datapoints/rows from X_train :
for i in range(5) :
    print(f"Row {i+1} : {X_train[i]}")

In [None]:
# defining y_train which will store the correct labels corresponding to data-tensors in X_train
y_train = torch.empty(60000,1,dtype = torch.int32)
for i in range(60000):
    y_train[i] = dataset[i][1]
   
print(y_train.shape)   # it is supposed to have a shape (60000,1)

In [None]:
# Normalization : it is done for all features independently
# We'll be simply dividing each entry of all the feature columns(784) by the respective maximum value which that feature takes
# uncomment code below to run Normalization :
'''
 for i in range(784) :
     X_train[:,i] /= X_train[:,i].max().item()
'''

In [None]:
# checking for missing values in X_train and Y_train :
# if any exists, then I'll be removing that datapoint from both X_train and y_train
# NOTE : this may take a minute as all the values in X_train and y_train are being checked
for i in range(len(y_train)) : 
    if (y_train[i].item() == None) :                                
        y_train = torch.cat((y_train[0:i],y_train[i+1,len(y_train)]), axis = 0)
        X_train = torch.cat((X_train[0:i],X_train[i+1,len(X_train)]), axis = 0)
        continue
    for j in range(X_train.shape[1]) :
        if (X_train[i,j].item() == None) : 
            y_train = torch.cat((y_train[0:i],y_train[i+1,len(y_train)]), axis = 0)
            X_train = torch.cat((X_train[0:i],X_train[i+1,len(X_train)]), axis = 0)
            break

In [None]:
print(X_train.shape)
print(y_train.shape)    
# if X_train.shape : [60000,784] and y_train.shape : [60000,1] => no missing values in data

In [None]:
# distribution of labels :
Total_count = 0
for i in range(10) :
    count = torch.eq(y_train, i).sum().item()     
    Total_count += count;
    print(f"No. of data points with true label {i} are {count}")
print(f"Total count : {Total_count}")    

In [None]:
# some examples from each class :
# total 50 examples shown in increasing order...
for i in range(10) :
    print(f"Some samples with label {i} are : ")
    for j in range(50) :
        if (i==dataset[j][1]):
            pixel_values = dataset[j][0][0]
            plt.imshow(pixel_values, cmap='gray_r', vmin=0, vmax=1)
            plt.axis('on')
            plt.show()
    print("------------------------------\n")
    

In [None]:
# printing 10 samples randomly along with there labels
for i in range(10) :
    random_number = random.randrange(-1, 60000) 
    pixel_values = dataset[random_number][0][0]
    plt.imshow(pixel_values, cmap='gray_r', vmin=0, vmax=1)
    plt.axis('on')
    print(f"Label for sample below is : {y_train[random_number].item()}")
    plt.show()

In [None]:
# BAR CHART : showing the distribution of different digits in the dataset
a = np.zeros(10,dtype = np.int32)
for i in range(10) :
    count = torch.eq(y_train, i).sum().item()
    a[i] = count
print(a)

plt.bar([0,1,2,3,4,5,6,7,8,9], a, align='center', alpha=1)
plt.xticks([0,1,2,3,4,5,6,7,8,9])
plt.show()

In [None]:
# this function will rotate x number of randomly selected images from dataset by any random angle between -theta and theta 
# it returns a tensor with shape (x,784)
def rotate(dataset,x,theta = 30) :
    a = torch.empty(x,784)
    b = torch.empty(x,1)
    rotation_transform = torchvision.transforms.RandomAffine(degrees=(-theta,theta),scale=(1,1))
    for i in range(x) :
        rnum = random.randrange(-1, 60000)
        rotated_image = rotation_transform(dataset[rnum][0])
        a[i] = flatten(rotated_image)          # function is defined in some cell above
        b[i] = dataset[rnum][1]
        # printing some samples of rotated images
        if (i%600==0) :
            print(f"Label corresponding to this image is {int(b[i].item())}")
            print("This is a rotated version")
            show_image(rotated_image)
            print("----------------------------------------------\n")
    return a,b


# this function will scale x number of randomly selected images from dataset by any factor rangeing from scale_min to scale max
# it returns a tensor with shape (x,784)
def scale(dataset,x,scale_min = 0.8,scale_max = 1.2) :
    a = torch.empty(x,784)
    b = torch.empty(x,1)
    scaling_transform = torchvision.transforms.RandomAffine(degrees=0, scale=(scale_min, scale_max))
    for i in range(x) :
        rnum = random.randrange(-1, 60000)
        scaled_image = scaling_transform(dataset[rnum][0])
        a[i] = flatten(scaled_image)          # function is defined in some cell above
        b[i] = dataset[rnum][1]
        # printing some samples of scaled images
        if (i%600==0) :
            print(f"Label corresponding to this image is {int(b[i].item())}")
            print("This is a scaled version")
            show_image(scaled_image)
            print("----------------------------------------------\n")
    return a,b 


# this is a combination of above two functions
# it returns a tensor with shape (x,784)
def rotate_scale(dataset,x,theta = 30,scale_min = 0.8,scale_max = 1.2) :
    a = torch.empty(x,784)
    b = torch.empty(x,1)
    transform_ = torchvision.transforms.RandomAffine(degrees=(-theta,theta), scale=(scale_min, scale_max))
    for i in range(x) :
        rnum = random.randrange(-1, 60000)
        image = transform_(dataset[rnum][0])
        a[i] = flatten(image)          # function is defined in some cell above
        b[i] = dataset[rnum][1]
        # printing some samples of rotated-scaled images
        if (i%600==0) :
            print(f"Label corresponding to this image is {int(b[i].item())}")
            print("This is a rotated-scaled version")
            show_image(image)
            print("----------------------------------------------\n")
    return a,b 

# this functions prints the image corresponding to the given 1 x 28 x 28 tensor
def show_image(tensor):
    plt.imshow(tensor.numpy()[0], cmap='gray_r')
    plt.axis('on')
    plt.show()

In [None]:
# now I'll be rotating 3000, scaling 3000 and rotating-scaling 4000 randomly selected images from the current dataset
# these will be concatenated to the original training set X_train

a,b = rotate(dataset,3000,25)
X_train = torch.cat((X_train,a), axis = 0)
y_train = torch.cat((y_train,b), axis = 0)

c,d = scale(dataset,3000,0.8,1.2)
X_train = torch.cat((X_train,c), axis = 0)
y_train = torch.cat((y_train,d), axis = 0)

e,f = rotate_scale(dataset,4000,25,0.8,1.2)
X_train = torch.cat((X_train,e), axis = 0)
y_train = torch.cat((y_train,f), axis = 0)

print(X_train.shape)
print(y_train.shape)

In [None]:
# There were too many zeros in the dataset... hence the cross entropy loss went to infinity due to operations on these zero valued dataset...
# to avoid this, I subtracted 1 from all the values in the training and testing set... This won't cause any change in the procedure or so... 
# used broadcasting
X_train = 1 - X_train

In [None]:
# Z-Normalization :
# (x - mu)/std , where mu(mean) and std(standard deviation) are of respective feature columns
# storing mu and std for each feature column for Normalizing the test_dataset with the same parameters

'''
mu = torch.empty(784)
std = torch.empty(784)
for i in range(784) :
    mu[i] = X_train[:,i].mean().item();
    std[i] = X_train[:,i].std().item();
    X_train[:,i] = (X_train[:,i]-mu[i])/std[i]
'''

In [None]:
'''
# splitting the data into train and test set :
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = 0.15, random_state = 41) 
# the dataset made above X_train will be split into 2 parts, 85 % of it will be used to train the model and rest for testing

y_train = y_train.long()   # necessary step, else will give error while using criterion (error calc. step)
'''

In [None]:
# Downlading the test dataset
testset = torchvision.datasets.MNIST(root='./data',train = False, download=True,transform = torchvision.transforms.ToTensor())

In [None]:
# Creating small batches of images , batch size = 10
train_loader = torch.utils.data.DataLoader(dataset,batch_size = 10,shuffle = True) 
test_loader = torch.utils.data.DataLoader(testset,batch_size = 10,shuffle = False) 

In [None]:
# Creating a "CNN" class for defining the behaviour of my neural network
class CNN(torch.nn.Module) :
    def __init__(self):
        super().__init__()
        
        # Convolution Layers :
        self.conv1 = torch.nn.Conv2d(1 , 16 , 5 , 1)    # first convolution layer has a kernel size of 5, strides by 1 unit, input features = 1 and outputs 16 feature maps
        self.conv2 = torch.nn.Conv2d(16 , 32 , 3 , 1)   # first convolution layer has a kernel size of 3, strides by 1 unit, input features = 16 and outputs 32 feature maps
         
        self.bn1 = torch.nn.BatchNorm2d(16)             # BATCH NORMALIZATION
        self.bn2 = torch.nn.BatchNorm2d(32)
        self.bn3 = torch.nn.BatchNorm1d(120)
        self.bn4 = torch.nn.BatchNorm1d(70)
        
        # Fully connected layers : 
        self.fc1 = torch.nn.Linear(5*5*32 , 120)        # no. of inputs to each neuron in this layer will bw 5*5*32 because we will be left with 32 feature maps after 2nd pooling and each will have a size of 5*5
                                                        # setting number of neurons to be 120 in this layer
        
        self.fc2 = torch.nn.Linear(120 , 70)            # setting number of neurons to be 70 in this layer
        self.fc3 = torch.nn.Linear(70 , 10)             # setting number of neurons to be 10 in this layer
        
    def forward(self, x) :
        # 1st pass
        x = self.conv1(x)
        x = torch.nn.functional.relu(self.bn1(x))     # using RelU as activation function
        x = torch.nn.functional.max_pool2d(x,2,2)     # pooling down the feature map with a filter with kernal = 2 and stride = 2
        # 2nd pass
        x = self.conv2(x)
        x = torch.nn.functional.relu(self.bn2(x))
        x = torch.nn.functional.max_pool2d(x,2,2)
    
        # processing for fully connected layers, flattening out the 2D tensor 
        
        x = x.view(-1,5*5*32)
        
        # passing through fully connected layers 
        
        x = self.fc1(x)
        x = torch.nn.functional.relu(self.bn3(x))       # using RelU as activation function
        x = self.fc2(x)
        x = torch.nn.functional.relu(self.bn4(x))
        x = self.fc3(x) 
        x = torch.nn.functional.log_softmax(x , dim = 1)  # using log_softmax as the last activation function to normalize the output of a network to a probability distribution over the output class
        
        return x
        

In [None]:
# Instantiating the Model
torch.manual_seed(10425)
model = CNN()
model

In [None]:
# setting criterion of the model to measure error. We have chosen to measure the Cross Entropy Error.
criterion = torch.nn.CrossEntropyLoss()
# Using Adam Optimizer to optimize the parameters of the CNN
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # setting learning rate to be 0.0001 , can be changed in future
# Here, model.parameters() are the parameter which that Model class consists of...
print(model.parameters)

In [None]:
# Training our Model :
num_iterations = 500   # the num of times we will be sending our data across the model/NN
for i in range(num_iterations) :
    netloss = 0 

    # forward propagation :
    
    for b, (X_train,y_train) in enumerate(train_loader) :
        b+=1
        y_pred = model(X_train)
        loss = criterion(y_pred,y_train)
        netloss += loss
        if (b%1000 == 0) :
            print(f"iteration number : {i+1}  , batch number : {b},  loss : {netloss}")  
    print(f"Net loss after iteration number {i+1} : {netloss}")
    
    # back propagation and updation of weights and biases :
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
# Testing
Y_test = torch.empty(0)
Predicted = torch.empty(0)
with torch.no_grad():      # No grad because we don't want any gradient as we don't want to update the weights and biases
    correct = 0
    for i,(X_test,y_test) in enumerate(test_loader) :
        y_val = model.forward(X_test)
        predicted = torch.max(y_val.data, 1)[1]
        
        Y_test = torch.cat((Y_test,y_test),0)
        Predicted = torch.cat((Predicted,predicted),0)
        
        correct += (predicted == y_test).sum() 
    print(f"Accuracy = {correct/100}%")       

Achieved an accuracy of 96.85%

In [None]:
# Building the confusion matrix using Scikit-Learn functions

from sklearn.metrics import confusion_matrix
confusion_matrix(Y_test,Predicted)

In [None]:
from sklearn.utils.multiclass import unique_labels

In [None]:
labels = unique_labels(Y_test)
column = [f"{label}" for label in labels]
row = [f"{label}" for label in labels]
table = pd.DataFrame(confusion_matrix(Y_test,Predicted),columns= column, index = row)
print(table)