<h2 align="center">Codebasics DL Course: Handwritten Digits Classification Using Multilayer Perceptron</h2>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

from matplotlib import pyplot as plt

ModuleNotFoundError: No module named 'torchvision'

### Load the Dataset

In [None]:
# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

In [None]:
len(train_dataset), len(test_dataset)

In [None]:
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

data_iter = iter(train_loader)
images, labels = next(data_iter)

In [None]:
images.shape, labels.shape

In [None]:
images[0].shape

In [None]:
images[0].squeeze().shape

In [None]:
plt.figure(figsize=(2,2))
plt.imshow(images[4].squeeze(), cmap="gray")
plt.show()

In [None]:
labels[4]

### Train a Fully Connected Neural Network (FCNN)

In [None]:
class DigitClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Flatten(), # this Flatten() will convert a 28*28 image pixel grid, into single array of 28*28=784 elements, which will be fed to input layer 784 neurons
            nn.Linear(28*28, 128), # Hidden layer1 : 784 input  to hidden layer and it gives out 128 output
            nn.ReLU(), # this is activation function used in hidden layer1
            nn.Linear(128, 64),# Hidden layer2 : Takes 128 input from Hiddenlayer1 and gives 64 output values
            nn.ReLU(), # this is activation function used in hidden layer2
            
            nn.Linear(64, 10) #lastly, the output layer takes 64 from Hiddenlayer2 and outputs to 10      #output neurons , note there is no activation function written for this last output layer.
        )
        
    def forward(self, x):
        return self.network(x)

In [None]:
# Instantiate the model, define loss function and optimizer

# IN Regression prblm we used loss function as MSE, In classification prblm of binary type , i.e where
# we have only 2 classes we use BinaryCrossEntropyLoss(),  and in Multiclass classificaiton problm as we have here multiple classes 0-9 digits, we use CrossENtropyLoss()


model = DigitClassifier()
criterion = nn.CrossEntropyLoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam is Grad.Descent only, but an Optimised one.
                                                        # Adam is most often used as optimizer

# Training the model
epochs = 5 # just limited to 5 only,
for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()          # Zero the gradients of last epochs if any
        outputs = model(images)        # Forward pass, i.e predict what the images are
        loss = criterion(outputs, labels) # Compute loss , labels are the actual values, outputs variable #has predicted values
        loss.backward()                # Backpropagate, i.e calculate gradients
        optimizer.step()               # Update weights

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}")

In [None]:
# Testing the model
model.eval()  # Switch to evaluation mode
#Counters to record the correct predictions in total records
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0) # Just total number of pictures
        correct += (predicted == labels).sum().item() # Number of pictures correctly predicted

print(f'Accuracy on the test set: {100 * correct / total:.2f}%')  #Correct prediction accuracy

### Classification Report & Confusion Matrix

In [None]:
# Testing the model
model.eval()  # Switch to evaluation mode

all_predicted = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1) # the torch.max gives the highest weight in array of #weights of 9 digits, if image comes as 3, and the prediction is also 3 , then if the prediction is #right, the array should
        #have highest number in array at position 4, i.e for digit3
        # 1 in torch.max parameter is saying i want max column wise, 0 for row wise.
        # ex: [0.1,0.5,-0.2,3.5,1.1,-0.7.-0.55,1.65,1.8,1.45] >> In this array here 3.5 (the max) >> is for Digit 3        
        # Append labels and predictions to lists
        all_labels.extend(labels.numpy())    
        all_predicted.extend(predicted.numpy()) 

In [None]:
len(all_predicted)

In [None]:
from sklearn.metrics import classification_report

report = classification_report(all_labels, all_predicted)
print(report)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

cm = confusion_matrix(all_labels, all_predicted)
cm

In [None]:
# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(len(cm)), yticklabels=range(len(cm)))
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()