<a href="https://colab.research.google.com/github/ram130849/Deep_Learning_Systems_Assignments/blob/main/PyTorch/Nithin/DLS_Assignment_1_nvaradha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Deep Learning Systems Fall 2022 Assignment 1

### Nithin Varadharajan (nvaradha)

### Problem 1: A Detailed View to MNIST Classification

1. Train a fully-connected net for MNIST classification (sorry, no CNN please, yet). It should
be with 5 hidden layers each of which is with 1024 hidden units. Feel free to use whatever
techniques you learned in class. You should be able to get the test accuracy above 98%.




In [1]:
import torch
import numpy as np
import time
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
print(f"Using {device} device")

Using cuda:0 device


In [4]:
import torchvision
mnist_train=torchvision.datasets.MNIST('mnist',
                                      train=True,
                                      download=True,
                                      transform=torchvision.transforms.Compose([
                                                                                torchvision.transforms.ToTensor(),
                                                                                torchvision.transforms.Normalize((0.1307,), (0.3081,))
                                                                                ]))

mnist_test=torchvision.datasets.MNIST('mnist',
                                      train=False,
                                      download=True,
                                      transform=torchvision.transforms.Compose([
                                                                                torchvision.transforms.ToTensor(),
                                                                                torchvision.transforms.Normalize((0.1307,), (0.3081,))
                                                                                ]))


In [5]:
# some hyperparameters
n_epochs = 50
batch_size_train =7000
batch_size_test = 1000

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x7f17d9aeb710>

In [6]:
#Reading number of samples in each set
n_train = len(mnist_train)
n_test = len(mnist_test)

print(n_train)
print(n_test)

60000
10000


In [7]:
# Define data loaders
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size_test, shuffle=True)

In [8]:
# Building the network
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [9]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Inputs to hidden layer linear transformation
        self.input = nn.Linear(784, 1024)
        # Output layer, 10 units - one for each digit
        self.fc1 = nn.Linear(1024, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, 1024)
        self.fc4 = nn.Linear(1024, 1024)
        self.fc5 = nn.Linear(1024, 1024)
        self.output = nn.Linear(1024, 10)
        
        # Define sigmoid activation and softmax output 
        self.ReLU = nn.ReLU()

        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout()
        
    def forward(self, x):
        
        #x = self.flatten(x)
        #print(x.size())
        # Pass the input tensor through each of our operations
        x = self.input(x)
        x = self.ReLU(x)

        x = self.fc1(x)
        x = self.ReLU(x)

        x = self.fc2(x)
        x = self.ReLU(x)

        x = self.fc3(x)
        x = self.ReLU(x)

        x = self.fc4(x)
        x = self.ReLU(x)

        x = self.fc5(x)
        x = self.ReLU(x)

        x = self.output(x)
        x = F.softmax(x, dim = 1)
        
        return x




In [10]:
#Uniform initialization      
def weights_init_uniform_rule(m):
  classname = m.__class__.__name__
  # for every Linear layer in a model..
  if classname.find('Linear') != -1:
      # get the number of the inputs
      n = m.in_features
      y = 1.0/np.sqrt(n)
      m.weight.data.uniform_(-y, y)
      m.bias.data.fill_(0)

model = Network().to(device)
model.apply(weights_init_uniform_rule)


print(model)

Network(
  (input): Linear(in_features=784, out_features=1024, bias=True)
  (fc1): Linear(in_features=1024, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=1024, bias=True)
  (fc5): Linear(in_features=1024, out_features=1024, bias=True)
  (output): Linear(in_features=1024, out_features=10, bias=True)
  (ReLU): ReLU()
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [11]:
# send parameters to device for gpu

optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_func = nn.CrossEntropyLoss()

In [12]:
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]


In [13]:
def train(epoch):
  train_loss= 0
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    images_reshaped = data.reshape(-1,784).to(device)
    labels = target.to(device)


    # Forward pass
    outputs = model(images_reshaped)
    loss = loss_func(outputs, labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_loss += loss.item()

    # with torch.no_grad():
    #   val_images,val_labels = next(iter(val_loader))
    #   val_images_reshaped = val_images.reshape(-1, input_dm1*input_dm2).to(device)
    #   val_labels_pred = model.forward(val_images_reshaped).argmax(1).cpu().numpy()
    #   performance = eval_func(val_labels.numpy(),val_labels_pred)


    #optimizer.zero_grad()
  print("Average training loss per sample  = "+str(train_loss/n_train))
  train_loss = 0

In [16]:
from sklearn.metrics import accuracy_score
def test():
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      #output = model(data.to(device))
      #test_loss += loss_func(output.to(device), target.to(device))
   
      #pred = (output.data.max(1, keepdim=True)[1]).cpu()
      #correct += pred.eq(target.data.view_as(pred)).cpu().sum()
      
      val_images_reshaped = data.reshape(-1, 784).to(device)
      val_labels_pred = model.forward(val_images_reshaped).argmax(1).cpu().numpy()
      performance = accuracy_score(target.numpy(),val_labels_pred)
      print(performance)
  #test_loss /= len(test_loader.dataset)
  #test_losses.append(test_loss)
  #print(performance)

In [17]:
test()

0.978
0.968
0.978
0.981
0.972
0.979
0.974
0.972
0.976
0.979


In [15]:
test()
for epoch in range(1, n_epochs + 1):
  train(epoch)
  test()

0.112
Average training loss per sample  = 0.0003395279804865519
0.342
Average training loss per sample  = 0.0003020373543103536
0.556
Average training loss per sample  = 0.0002784874657789866
0.633
Average training loss per sample  = 0.0002671024481455485
0.784
Average training loss per sample  = 0.0002490809758504232
0.876
Average training loss per sample  = 0.00023654850721359252
0.901
Average training loss per sample  = 0.00023153042197227478
0.924
Average training loss per sample  = 0.00022921574314435322
0.926
Average training loss per sample  = 0.0002281187335650126
0.938
Average training loss per sample  = 0.00022701450983683268
0.944
Average training loss per sample  = 0.00022617329557736714
0.961
Average training loss per sample  = 0.00022556158105532329
0.95
Average training loss per sample  = 0.00022504740953445434
0.964
Average training loss per sample  = 0.00022437609831492107
0.958
Average training loss per sample  = 0.0002241708258787791
0.968
Average training loss per s