<a href="https://colab.research.google.com/github/sreenesh-reddy/Infosys-Springboard-Internship/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Define an improved MLP model with batch normalization and dropout for better regularization and stability
class ImprovedMLP(nn.Module):
    def __init__(self):
        super(ImprovedMLP, self).__init__()
        # Fully connected layers
        self.fc1 = nn.Linear(28 * 28, 512)  # Input: 784 (flattened image), Output: 512 neurons
        self.fc2 = nn.Linear(512, 256)      # Input: 512, Output: 256 neurons
        self.fc3 = nn.Linear(256, 10)       # Input: 256, Output: 10 (number of classes)

        self.relu = nn.ReLU()               # Activation function: ReLU for non-linearity
        self.softmax = nn.Softmax(dim=1)    # Softmax for probabilistic outputs (not used in training)

        # Regularization techniques
        self.dropout = nn.Dropout(0.3)      # Dropout to prevent overfitting
        self.bn1 = nn.BatchNorm1d(512)      # Batch normalization after the first layer
        self.bn2 = nn.BatchNorm1d(256)      # Batch normalization after the second layer

    def forward(self, x):
        # Flatten the input tensor: (batch_size, 28, 28) -> (batch_size, 784)
        x = x.view(-1, 28 * 28)

        # First layer: Fully connected -> BatchNorm -> ReLU -> Dropout
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)

        # Second layer: Fully connected -> BatchNorm -> ReLU -> Dropout
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)

        # Output layer: Fully connected
        x = self.fc3(x)
        return x  # Return logits (no softmax when using CrossEntropyLoss)

# Set the device for training and testing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transform the data: Convert to tensors and normalize to [-1, 1] range
transform = transforms.Compose([
    transforms.ToTensor(),                # Convert image to tensor
    transforms.Normalize((0.5,), (0.5,)) # Normalize pixel values
])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# DataLoaders for batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  # Shuffle training data
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)   # No shuffle for testing

# Instantiate the model, loss function, and optimizer
model = ImprovedMLP().to(device)           # Initialize the model and move to device (CPU/GPU)
criterion = nn.CrossEntropyLoss()          # Loss function: CrossEntropyLoss (combines softmax and log loss)
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Optimizer: Adam with a learning rate of 0.001

# Training loop
epochs = 10                                # Number of epochs to train
for epoch in range(epochs):
    model.train()                          # Set the model to training mode
    running_loss = 0.0                     # Accumulate the loss for this epoch
    correct = 0                            # Count of correctly predicted samples
    total = 0                              # Total number of samples

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to device

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)  # Calculate the loss

        # Backward pass and optimization
        optimizer.zero_grad()              # Clear previous gradients
        loss.backward()                    # Compute gradients
        optimizer.step()                   # Update model weights

        # Track statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)  # Get predicted class indices
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # Count correct predictions

    # Print epoch statistics
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Testing loop
model.eval()                               # Set the model to evaluation mode
correct = 0                                # Count of correctly predicted samples
total = 0                                  # Total number of samples

with torch.no_grad():                      # Disable gradient calculation for testing
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to device
        outputs = model(inputs)           # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get predicted class indices
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # Count correct predictions

# Print test accuracy
print(f"Test Accuracy: {100 * correct / total:.2f}%")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 15.9MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 524kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.35MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.47MB/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Epoch 1/10, Loss: 0.2372, Accuracy: 93.00%
Epoch 2/10, Loss: 0.1225, Accuracy: 96.23%
Epoch 3/10, Loss: 0.0980, Accuracy: 96.96%
Epoch 4/10, Loss: 0.0790, Accuracy: 97.52%
Epoch 5/10, Loss: 0.0724, Accuracy: 97.68%
Epoch 6/10, Loss: 0.0627, Accuracy: 97.98%
Epoch 7/10, Loss: 0.0557, Accuracy: 98.22%
Epoch 8/10, Loss: 0.0524, Accuracy: 98.26%
Epoch 9/10, Loss: 0.0470, Accuracy: 98.42%
Epoch 10/10, Loss: 0.0425, Accuracy: 98.53%
Test Accuracy: 98.22%


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Improved MLP model with more layers and batch normalization
class ImprovedMLP(nn.Module):
    def __init__(self):
        super(ImprovedMLP, self).__init__()
        # Input layer: 784 (28x28 images), first hidden layer: 512 neurons, second hidden layer: 256 neurons, output layer: 10 classes
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

        # Adding Dropout for regularization
        self.dropout = nn.Dropout(0.3)

        # Batch normalization to stabilize training
        self.bn1 = nn.BatchNorm1d(512)
        self.bn2 = nn.BatchNorm1d(256)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image (batch_size, 28, 28) -> (batch_size, 784)
        x = self.fc1(x)
        x = self.bn1(x)  # Apply batch normalization
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout

        x = self.fc2(x)
        x = self.bn2(x)  # Apply batch normalization
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout

        x = self.fc3(x)
        return x  # No need for softmax in the final layer when using CrossEntropyLoss

# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load MNIST data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Instantiate the model, loss function, and optimizer
model = ImprovedMLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop with increased epochs
epochs = 10
for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# Testing loop
model.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Epoch 1/10, Loss: 0.2390, Accuracy: 93.02%
Epoch 2/10, Loss: 0.1238, Accuracy: 96.17%
Epoch 3/10, Loss: 0.0988, Accuracy: 96.82%
Epoch 4/10, Loss: 0.0816, Accuracy: 97.45%
Epoch 5/10, Loss: 0.0707, Accuracy: 97.74%
Epoch 6/10, Loss: 0.0632, Accuracy: 97.92%
Epoch 7/10, Loss: 0.0559, Accuracy: 98.16%
Epoch 8/10, Loss: 0.0508, Accuracy: 98.37%
Epoch 9/10, Loss: 0.0468, Accuracy: 98.46%
Epoch 10/10, Loss: 0.0428, Accuracy: 98.54%
Test Accuracy: 98.16%
