In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

In [2]:
"""
    If GPU is available then run on GPU else run on CPU
"""
if torch.cuda.is_available():  
    dev = "cuda:0" 
else:  
    dev = "cpu"  
device = torch.device(dev) 

In [3]:
"""
    . torchvision.datasets.MNIST downloads PIL images 
    . Hence we need to define transform object to convert downloaded MNIST PIL images to Tensors
    . Batch size of 32 was used. 
    
    Reference --> https://pytorch.org/vision/stable/datasets.html#mnist
    
"""
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

# Creating Data Loader object for creating the Training set by using the field train=True  

train_data = torchvision.datasets.MNIST("./",train=True,download=True ,transform=transform)
train_data_loaded = torch.utils.data.DataLoader(train_data,
                                          batch_size=32,
                                          shuffle=True)


# Creating Data Loader object for creating the Test set by using the field train=False 
test_data = torchvision.datasets.MNIST("./",train=False,download=True,transform=transform)
test_data_loaded = torch.utils.data.DataLoader(test_data,
                                          batch_size=32,
                                          shuffle=True)

In [4]:
"""
Creating the Model

input (inp) ==> 28*28 neurons
layer1      ==> 1000 neurons
layer2      ==> 1000 neurons
layer3      ==> 500 neurons
layer4      ==> 200 neurons
output(out) ==> 10 neurons  Since number of classes are 10

* Softmax layer is not being used as the final layer becuase Crossentropy loss
  is being used which takes the input as logits and calculates softmax before computing loss. 
  
  Reference --> https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
"""

class Model(torch.nn.Module):
    
    # Intialising the number of neurons in the layers
    def __init__(self):
        super(Model,self).__init__()
        self.inp = nn.Linear(28*28,1000)
        self.layer1 = nn.Linear(1000,1000)
        self.layer2 = nn.Linear(1000,1000)
        self.layer3 = nn.Linear(1000,500)
        self.layer4 = nn.Linear(500,200)
        self.out = nn.Linear(200,10)
    
    # Defining the forward pass of the model
    
    def forward(self,x):
        x = torch.flatten(x,1)
        x = self.inp(x)
        x = F.relu(x)
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = F.relu(x)
        x = self.out(x)
        #x = F.softmax(x,dim = 1)  
        return x

In [5]:
model = Model()
model.to(torch.device("cuda:0"))                   # Loading the model to GPU is avalible else CPU

Model(
  (inp): Linear(in_features=784, out_features=1000, bias=True)
  (layer1): Linear(in_features=1000, out_features=1000, bias=True)
  (layer2): Linear(in_features=1000, out_features=1000, bias=True)
  (layer3): Linear(in_features=1000, out_features=500, bias=True)
  (layer4): Linear(in_features=500, out_features=200, bias=True)
  (out): Linear(in_features=200, out_features=10, bias=True)
)

In [6]:
"""
   1) Cross Entropy loss is being used because this is a classification model.
   2) Softmax of the ouput logits is first calculated and then classified to calculate the loss 
   3) Adam Optimizer is used. 
"""
criterion = torch.nn.CrossEntropyLoss()            # initializing Loss function
optimizer = torch.optim.Adam(model.parameters())   # initializing Adam Optimizer 

In [7]:
"""
   Train the model for 50 epochs and printing loss for each batch and epoch.
"""
epochs = 50                                        # number of epochs


for epoch in range(epochs):                        # iterate over number of epochs
    running_loss = 0.0
    for i,images in enumerate(train_data_loaded):  # iterate over the batches of training data
        batch_images,batch_labels = images
        batch_images = batch_images.to(device)     
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad()  
        outputs = model(batch_images)              # computing forward pass 
        loss = criterion(outputs, batch_labels)    # Calculating loss
        loss.backward()                            # Calculating the gradients
        optimizer.step()                           # Updating the weights
        
        """
            Code below for printing of Losses for each batch and epoch was refered from the pytorch Documentation
            https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
            
        """
        running_loss += loss.item()
        if i % 200 == 199:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

[1,   200] loss: 0.076
[1,   400] loss: 0.032
[1,   600] loss: 0.026
[1,   800] loss: 0.024
[1,  1000] loss: 0.021
[1,  1200] loss: 0.019
[1,  1400] loss: 0.018
[1,  1600] loss: 0.017
[1,  1800] loss: 0.016
[2,   200] loss: 0.012
[2,   400] loss: 0.012
[2,   600] loss: 0.014
[2,   800] loss: 0.016
[2,  1000] loss: 0.013
[2,  1200] loss: 0.011
[2,  1400] loss: 0.012
[2,  1600] loss: 0.011
[2,  1800] loss: 0.013
[3,   200] loss: 0.009
[3,   400] loss: 0.010
[3,   600] loss: 0.010
[3,   800] loss: 0.008
[3,  1000] loss: 0.010
[3,  1200] loss: 0.010
[3,  1400] loss: 0.009
[3,  1600] loss: 0.010
[3,  1800] loss: 0.008
[4,   200] loss: 0.008
[4,   400] loss: 0.008
[4,   600] loss: 0.007
[4,   800] loss: 0.007
[4,  1000] loss: 0.008
[4,  1200] loss: 0.006
[4,  1400] loss: 0.007
[4,  1600] loss: 0.007
[4,  1800] loss: 0.007
[5,   200] loss: 0.007
[5,   400] loss: 0.007
[5,   600] loss: 0.006
[5,   800] loss: 0.006
[5,  1000] loss: 0.006
[5,  1200] loss: 0.007
[5,  1400] loss: 0.006
[5,  1600] 

[39,   800] loss: 0.002
[39,  1000] loss: 0.001
[39,  1200] loss: 0.001
[39,  1400] loss: 0.002
[39,  1600] loss: 0.003
[39,  1800] loss: 0.002
[40,   200] loss: 0.001
[40,   400] loss: 0.001
[40,   600] loss: 0.001
[40,   800] loss: 0.007
[40,  1000] loss: 0.001
[40,  1200] loss: 0.001
[40,  1400] loss: 0.002
[40,  1600] loss: 0.001
[40,  1800] loss: 0.003
[41,   200] loss: 0.001
[41,   400] loss: 0.001
[41,   600] loss: 0.002
[41,   800] loss: 0.001
[41,  1000] loss: 0.001
[41,  1200] loss: 0.001
[41,  1400] loss: 0.001
[41,  1600] loss: 0.002
[41,  1800] loss: 0.001
[42,   200] loss: 0.001
[42,   400] loss: 0.000
[42,   600] loss: 0.001
[42,   800] loss: 0.000
[42,  1000] loss: 0.000
[42,  1200] loss: 0.001
[42,  1400] loss: 0.002
[42,  1600] loss: 0.001
[42,  1800] loss: 0.000
[43,   200] loss: 0.004
[43,   400] loss: 0.004
[43,   600] loss: 0.002
[43,   800] loss: 0.001
[43,  1000] loss: 0.000
[43,  1200] loss: 0.001
[43,  1400] loss: 0.001
[43,  1600] loss: 0.002
[43,  1800] loss

In [8]:
"""
   Saving the trained model
"""
torch.save(model.state_dict(), "mnist.pth")