In [None]:
import torch                      # importing core library for pytorch 
import torchvision                # importing pytorch library used for CV tasks
import numpy as np                # used for addressing array/matrix operations
import matplotlib.pyplot as plt   # used for visualizing data
from torchvision import datasets  # from 'torchvision' package(used for CV tasks) importing module 'datasets' which has various standard datasets used for CV tasks 
import random

# Loading the training dataset 
train_dataset = datasets.MNIST(root='./data',train = True, download=True,transform = torchvision.transforms.ToTensor())
# data gets loaded in the train_dataset variable
# root parameter specifies the loacation where the data exists/has to be downloaded to
# download parameter being set as 'True' instructs to download the MNIST dataset if not present in the root specified location
# transfrom parameter transforms the dataset to the specified datatype, here it is torch.FloatTensor and the values are scaled down to [0,1]

In [None]:
print(type(train_dataset))           # printing datatype of 'training_dataset'
print(train_dataset)                 # printing 'training_dataset'
print(type(train_dataset[0]))        # printing datatype of what is stored at index 0 in the 'training_dataset'
print(train_dataset[0])              # printing what is present at index 0 in the 'training_dataset'

In [None]:
# printing the first 10 tensors along with there corresponding labels
for i in range(10) :
    print(f"tensors for image {i+1} with label {train_dataset[i][1]} : \n{train_dataset[i][0]} ")

In [None]:
# printing the first 10 labels seperately :
for i in range(10) :
    print(f"label for image {i+1} : {train_dataset[i][1]}")

In [None]:
# using matplotlib to print the first 10 image 
for i in range(10) :

    pixel_values = train_dataset[i][0][0]       # selecting the tensor that stores the pixel-information of the 'i'th image

# displaying the grayscale image using matplotlib.pyplot
    plt.imshow(pixel_values, cmap='gray_r', vmin=0, vmax=1)
    plt.title(train_dataset[i][1])
    plt.axis('on')
    plt.show()
    print("----------------------------------------------------")
    
# values on top of the image correspond to the label for that image

In [None]:
# flattening the images :

# defining flatten function which takes in a 1 x 28 x 28 tensor and returns a tranformed 2D tensor with shape (1,784)  
def flatten(x) :
    a = torch.empty(0)
    for i in range(28) :
        a = torch.cat((a,x[0,i]))
    return a

# defining X_train, which is the matrix which will contain all 60k datapoints, which we will be using to training our model
X_train = torch.empty(60000,784)

for i in range(60000):
    X_train[i] = flatten(train_dataset[i][0])

print(X_train.shape)        # it is supposed to have a shape (60000,784)

In [None]:
# printing first 5 datapoints/rows from X_train :
for i in range(5) :
    print(f"Row {i+1} : {X_train[i]}")

In [None]:
# defining y_train which will store the correct labels corresponding to data-tensors in X_train
y_train = torch.empty(60000,1,dtype = torch.int32)
for i in range(60000):
    y_train[i] = train_dataset[i][1]
   
print(y_train.shape)   # it is supposed to have a shape (60000,1)

In [None]:
# Normalization : it is done for all features independently
# We'll be simply dividing each entry of all the feature columns(784) by the respective maximum value which that feature takes
# uncomment code below to run Normalization :
'''
 for i in range(784) :
     X_train[:,i] /= X_train[:,i].max().item()
'''

In [None]:
# checking for missing values in X_train and Y_train :
# if any exists, then I'll be removing that datapoint from both X_train and y_train
# NOTE : this may take a minute as all the values in X_train and y_train are being checked
for i in range(len(y_train)) : 
    if (y_train[i].item() == None) :                                
        y_train = torch.cat((y_train[0:i],y_train[i+1,len(y_train)]), axis = 0)
        X_train = torch.cat((X_train[0:i],X_train[i+1,len(X_train)]), axis = 0)
        continue
    for j in range(X_train.shape[1]) :
        if (X_train[i,j].item() == None) : 
            y_train = torch.cat((y_train[0:i],y_train[i+1,len(y_train)]), axis = 0)
            X_train = torch.cat((X_train[0:i],X_train[i+1,len(X_train)]), axis = 0)
            break

In [None]:
print(X_train.shape)
print(y_train.shape)    
# if X_train.shape : [60000,784] and y_train.shape : [60000,1] => no missing values in data

In [None]:
# distribution of labels :
Total_count = 0
for i in range(10) :
    count = torch.eq(y_train, i).sum().item()     
    Total_count += count;
    print(f"No. of data points with true label {i} are {count}")
print(f"Total count : {Total_count}")    

In [None]:
# some examples from each class :
# total 50 examples shown in increasing order...
for i in range(10) :
    print(f"Some samples with label {i} are : ")
    for j in range(50) :
        if (i==train_dataset[j][1]):
            pixel_values = train_dataset[j][0][0]
            plt.imshow(pixel_values, cmap='gray_r', vmin=0, vmax=1)
            plt.axis('on')
            plt.show()
    print("------------------------------\n")
    

In [None]:
# printing 10 samples randomly along with there labels
for i in range(10) :
    random_number = random.randrange(-1, 60000) 
    pixel_values = train_dataset[random_number][0][0]
    plt.imshow(pixel_values, cmap='gray_r', vmin=0, vmax=1)
    plt.axis('on')
    print(f"Label for sample below is : {y_train[random_number].item()}")
    plt.show()

In [None]:
# BAR CHART : showing the distribution of different digits in the dataset
a = np.zeros(10,dtype = np.int32)
for i in range(10) :
    count = torch.eq(y_train, i).sum().item()
    a[i] = count
print(a)

plt.bar([0,1,2,3,4,5,6,7,8,9], a, align='center', alpha=1)
plt.xticks([0,1,2,3,4,5,6,7,8,9])
plt.show()

In [None]:
# this function will rotate x number of randomly selected images from dataset by any random angle between -theta and theta 
# it returns a tensor with shape (x,784)
def rotate(dataset,x,theta = 30) :
    a = torch.empty(x,784)
    b = torch.empty(x,1)
    rotation_transform = torchvision.transforms.RandomAffine(degrees=(-theta,theta),scale=(1,1))
    for i in range(x) :
        rnum = random.randrange(-1, 60000)
        rotated_image = rotation_transform(dataset[rnum][0])
        a[i] = flatten(rotated_image)          # function is defined in some cell above
        b[i] = dataset[rnum][1]
        # printing some samples of rotated images
        if (i%600==0) :
            print(f"Label corresponding to this image is {int(b[i].item())}")
            print("This is a rotated version")
            show_image(rotated_image)
            print("----------------------------------------------\n")
    return a,b


# this function will scale x number of randomly selected images from dataset by any factor rangeing from scale_min to scale max
# it returns a tensor with shape (x,784)
def scale(dataset,x,scale_min = 0.8,scale_max = 1.2) :
    a = torch.empty(x,784)
    b = torch.empty(x,1)
    scaling_transform = torchvision.transforms.RandomAffine(degrees=0, scale=(scale_min, scale_max))
    for i in range(x) :
        rnum = random.randrange(-1, 60000)
        scaled_image = scaling_transform(dataset[rnum][0])
        a[i] = flatten(scaled_image)          # function is defined in some cell above
        b[i] = dataset[rnum][1]
        # printing some samples of scaled images
        if (i%600==0) :
            print(f"Label corresponding to this image is {int(b[i].item())}")
            print("This is a scaled version")
            show_image(scaled_image)
            print("----------------------------------------------\n")
    return a,b 


# this is a combination of above two functions
# it returns a tensor with shape (x,784)
def rotate_scale(dataset,x,theta = 30,scale_min = 0.8,scale_max = 1.2) :
    a = torch.empty(x,784)
    b = torch.empty(x,1)
    transform_ = torchvision.transforms.RandomAffine(degrees=(-theta,theta), scale=(scale_min, scale_max))
    for i in range(x) :
        rnum = random.randrange(-1, 60000)
        image = transform_(dataset[rnum][0])
        a[i] = flatten(image)          # function is defined in some cell above
        b[i] = dataset[rnum][1]
        # printing some samples of rotated-scaled images
        if (i%600==0) :
            print(f"Label corresponding to this image is {int(b[i].item())}")
            print("This is a rotated-scaled version")
            show_image(image)
            print("----------------------------------------------\n")
    return a,b 

# this functions prints the image corresponding to the given 1 x 28 x 28 tensor
def show_image(tensor):
    plt.imshow(tensor.numpy()[0], cmap='gray_r')
    plt.axis('on')
    plt.show()

In [None]:
# now I'll be rotating 3000, scaling 3000 and rotating-scaling 4000 randomly selected images from the current dataset
# these will be concatenated to the original training set X_train

a,b = rotate(train_dataset,3000,25)
X_train = torch.cat((X_train,a), axis = 0)
y_train = torch.cat((y_train,b), axis = 0)

c,d = scale(train_dataset,3000,0.8,1.2)
X_train = torch.cat((X_train,c), axis = 0)
y_train = torch.cat((y_train,d), axis = 0)

e,f = rotate_scale(train_dataset,4000,25,0.8,1.2)
X_train = torch.cat((X_train,e), axis = 0)
y_train = torch.cat((y_train,f), axis = 0)

print(X_train.shape)
print(y_train.shape)

In [None]:
# There were too many zeros in the dataset... hence the cross entropy loss went to infinity due to operations on these zero valued dataset...
# to avoid this, I subtracted 1 from all the values in the training and testing set... This won't cause any change in the procedure or so... 
for i in range(X_train.shape[0]) :
    for j in range(X_train.shape[1]) :
        X_train[i][j] = 1 - X_train[i][j]

In [None]:
# Z-Normalization :
# (x - mu)/std , where mu(mean) and std(standard deviation) are of respective feature columns
# storing mu and std for each feature column for Normalizing the test_dataset with the same parameters

'''
mu = torch.empty(784)
std = torch.empty(784)
for i in range(784) :
    mu[i] = X_train[:,i].mean().item();
    std[i] = X_train[:,i].std().item();
    X_train[:,i] = (X_train[:,i]-mu[i])/std[i]
'''

In [None]:
import torch.nn as nn

In [None]:
# Creating a Model class that inherits nn.Module
class Model(nn.Module) :
    # input layer(784 features)  --->   Hidden Layer 1 (25 neurons)   --->  Hidden Layer 2 (15 neurons)   --->   output layer(1 neuron)
    def __init__(self , in_features = 784 , h1 = 25 , h2 = 15 , out_features = 10) :
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)
        self.fc2 = nn.Linear(h1,h2)
        self.out = nn.Linear(h2,out_features)
        
    # creating function for forward propagation :
    def forward_prop(self,x) : 
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = torch.nn.functional.relu(self.out(x))
        
        return x
        

In [None]:
torch.manual_seed(3966) 
model = Model()

In [None]:
# splitting the data into train and test set :
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = 0.15, random_state = 41) 
# the dataset made above X_train will be split into 2 parts, 85 % of it will be used to train the model and rest for testing

y_train = y_train.long()   # necessary step, else will give error while using criterion (error calc. step)

In [None]:
# setting criterion of the model to measure error. We have chosen to measure the Cross Entropy Error.
criterion = nn.CrossEntropyLoss()
# Using Adam Optimizer to optimize the parameters of the NN
optimizer = torch.optim.Adam(model.parameters(), lr=0.003) # setting learning rate to be 0.01 , can be changed in future
# Here, model.parameters() are the parameter which that Model class consists of...
print(model.parameters)

In [None]:
# Training our model :
num_iterations = 100000   # the num of times we will be sending our data across the model/NN
for i in range(num_iterations) :
    
    # forward propagation :
    
    y_pred = model.forward_prop(X_train)
    loss = criterion(y_pred,y_train.squeeze(1))
    
    if i%100 == 0 :
        print(f"iteration number : {i} , loss : {loss}")
    
    # back propagation and updation of weights and biases :
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
# checking for accuracy of trained model
correct = 0
with torch.no_grad():
    for i,data in enumerate(X_test) :
        y_val = model.forward_prop(data)
        print(f'{i+1}.) {y_test[i].item()} \t {y_val.argmax().item()}')
        if y_test[i].item() == y_val.argmax().item() :
            correct = correct + 1 
    print(correct*100/y_test.shape[0])

In [None]:
# achieved an accuracy of 89.4 %