<a href="https://colab.research.google.com/github/lamini-H/DeepLearningProjects/blob/main/Feed_Forward_Neural_Network_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [3]:
input_size = 784 #number of input neurons
hidden_size = 400 #number of hidden neurons
out_size = 10 #number of classes (0-9)
epochs = 10 #How many times we pass our entire dataset into our network
batch_size = 100 #Input size of the data during one iteration
learning_rate = 0.001 #How fast we are learning

In [26]:
train_dataset = datasets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = datasets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

In [27]:
#make data iterable by loading it to a loader. Shuffle the training data to make it independant of the order
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [6]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) #First Layer
        self.relu = nn.ReLU() #First Layer Activation
        self.fc2 = nn.Linear(hidden_size, hidden_size) #Second Layer
        self.fc3 = nn.Linear(hidden_size, out_size) #Second Layer Activation

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [7]:
#Create an object of the class to represent the network
net=Net(input_size, hidden_size, out_size)
CUDA = torch.cuda.is_available()
if CUDA:
  net = net.cuda()
#The Loss function. The Cross Ent
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [13]:
#visualize the train loader
for i, (images, labels) in enumerate(train_loader):
  print(images.size())
  images = images.view(-1, 28*28)
  print(images.size())
  break

torch.Size([100, 1, 28, 28])
torch.Size([100, 784])


In [16]:
#Train the network
correct_train =0
total_train =0
for epoch in range(epochs):
  for i, (images, labels) in enumerate(train_loader):
    #Flatten the image from size (batch,1,28,28) --> (100,1,28,28) where 1 represents
    #to size (100,784) and wrap it in a variable
    images = Variable(images.view(-1, 28*28))
    labels = Variable(labels)
    if CUDA:
      images = images.cuda()
      labels = labels.cuda()
      #Clear the param_grad in param = param - lr*param_grad, so it won't be accumulated
    optimizer.zero_grad()
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1) #Forward pass
    total_train += labels.size(0) #Return the second argument
    if CUDA:
      correct_train += (predicted.cpu() == labels.cpu()).sum().item()
    else:
      correct_train += (predicted == labels).sum().item()
    loss = criterion(outputs, labels) #Difference between the actual and predicted (loss function)
    loss.backward() #Backpropagation
    optimizer.step() #Upadate the weights
  if(i+1) % 100 ==0:
    print("Epoch [{}/{}], Iteration [{}/{}], Training Loss: {}, Training Accurancy: {}%".format(epoch+1, epochs, i+1, len(train_dataset)//batch_size, loss.data[0], (100*correct_train/total_train)))

print("Finished Training")

Epoch [1/10], Iteration [600/600], Training Loss: 0.05147162452340126, Training Accurancy: 97.08833333333334%
Epoch [2/10], Iteration [600/600], Training Loss: 0.05759261175990105, Training Accurancy: 97.57%
Epoch [3/10], Iteration [600/600], Training Loss: 0.04583241418004036, Training Accurancy: 97.89833333333333%
Epoch [4/10], Iteration [600/600], Training Loss: 0.012083543464541435, Training Accurancy: 98.16291666666666%
Epoch [5/10], Iteration [600/600], Training Loss: 0.029212022200226784, Training Accurancy: 98.349%
Epoch [6/10], Iteration [600/600], Training Loss: 0.11228980123996735, Training Accurancy: 98.50944444444444%
Epoch [7/10], Iteration [600/600], Training Loss: 0.005967712961137295, Training Accurancy: 98.63214285714285%
Epoch [8/10], Iteration [600/600], Training Loss: 0.005918025970458984, Training Accurancy: 98.7325%
Epoch [9/10], Iteration [600/600], Training Loss: 0.004324506502598524, Training Accurancy: 98.8212962962963%
Epoch [10/10], Iteration [600/600], Tra

In [28]:
#Test the network (No Loss and weight calculation, no weight update)

correct = 0
total = 0

for images, labels in test_loader:
  images = Variable(images.view(-1, 28*28))
  if CUDA:
    images = images.cuda()
  outputs = net(images)
  _, predicted = torch.max(outputs.data, 1)
  total += labels.size(0)
  if CUDA:
    correct += (predicted.cpu() == labels.cpu()).sum()
  else:
    correct += (predicted == labels).sum().sum()
print("Accuracy of the network on the 10000 test images: {}%".format(100*correct/total))



Accuracy of the network on the 10000 test images: 98.0199966430664%
