<a href="https://colab.research.google.com/github/kaustubh41096/Neural-Networks/blob/main/MLP_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [9]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
input_size = 784      # 28x28 is the size of image
hidden_size1 = 350    # No of neurons in hidden layer 1
hidden_size2 = 200    # No of neurons in hidden layer 2
num_classes = 10      # No of target labels ( 0 - 9 )  
num_epochs = 10       # No of times network gets trained
batch_size = 100      # No of samples in batch size
learning_rate = 0.001 # Learning rate defines the step the optmizer takes down the gradient slope


In [10]:
# MNIST dataset 
# Download training data from torchvision library and transform it to tensor
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

# Download testing data from torchvision library and transform it to tensor
test_dataset = torchvision.datasets.MNIST(root='./data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
# Load training and testing data into data loader in batches and shuffle them for greater accuracy, shuffle doesnt matter for testing
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [11]:
# Fully connected neural network with two hidden layers
class NeuralNet(nn.Module):
    # Define structure of neural network and initialize the class with the in built nn class in PyTorch
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(input_size, hidden_size1) 
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size1, hidden_size2)
        self.l3 = nn.Linear(hidden_size2, num_classes)  
    
    # Define feed forward method for neural network
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        # no activation and no softmax at the end
        # softmax is included in the CrossEntropyLoss function
        return out


In [12]:
model = NeuralNet(input_size, hidden_size1, hidden_size2, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  


In [None]:
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs): # This loop repeats the process of training the network with all batches till it reaches its optimal configuration
    for i, (images, labels) in enumerate(train_loader): # This loop repeats the process of feed forward and backpropagation for each batch of inputs 
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        # Images and labels needs to be flattened to 1D Array for MLP
        images = images.reshape(-1, 28*28).to(device) 
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad() # Necessary so that each batch has a clean slate to optimize from
        loss.backward() # Backpropogate
        optimizer.step() # Takes a step to local minima of the gradiant in the size of the learning rate 
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')


In [14]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader: # Test loader will load one batch of images at a time
        # Images and labels needs to be flattened to 1D Array for MLP
        images = images.reshape(-1, 28*28).to(device) 
        labels = labels.to(device)
        outputs = model(images)   # Output classes for each batch
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)                       # Count the no of samples in each batch
        n_correct += (predicted == labels).sum().item()   # Count no of correct predctions for each batch size and add them to the previous count

    acc = 100.0 * n_correct / n_samples                   # Calculate after all the batches have been executed
    print(f'Accuracy of the network on the 10000 test images: {acc} %')   # Print accuracy

Accuracy of the network on the 10000 test images: 98.08 %
