In [1]:
import torch  # Import PyTorch for building and training neural networks
import torch.nn as nn  # Import the neural network module
import torch.optim as optim  # Import optimization algorithms
from torch.utils.data import DataLoader  # Import DataLoader for batch processing
from torchvision import datasets, transforms  # Import datasets and transformations

# Define an improved MLP model
class ImprovedMLP(nn.Module):
    def __init__(self):
        super(ImprovedMLP, self).__init__()  # Initialize the base class
        self.fc1 = nn.Linear(28 * 28, 512)  # Fully connected layer: input 784, output 512
        self.fc2 = nn.Linear(512, 256)  # Fully connected layer: input 512, output 256
        self.fc3 = nn.Linear(256, 10)  # Fully connected layer: input 256, output 10 (classes)

        self.relu = nn.ReLU()  # ReLU activation function
        self.dropout = nn.Dropout(0.3)  # Dropout layer with 30% probability
        self.bn1 = nn.BatchNorm1d(512)  # Batch normalization for the first layer
        self.bn2 = nn.BatchNorm1d(256)  # Batch normalization for the second layer

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the input tensor
        x = self.dropout(self.relu(self.bn1(self.fc1(x))))  # First layer with activation, batch norm, and dropout
        x = self.dropout(self.relu(self.bn2(self.fc2(x))))  # Second layer with activation, batch norm, and dropout
        x = self.fc3(x)  # Output layer
        return x  # Return the logits

# Utility function for training
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()  # Set the model to training mode
    running_loss = 0.0  # Initialize running loss
    correct = 0  # Initialize correct prediction count
    total = 0  # Initialize total sample count

    for inputs, labels in train_loader:  # Iterate over batches in the training loader
        inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss

        optimizer.zero_grad()  # Clear previous gradients
        loss.backward()  # Backward pass to compute gradients
        optimizer.step()  # Update model parameters

        running_loss += loss.item()  # Accumulate loss
        _, predicted = torch.max(outputs, 1)  # Get predicted class indices
        total += labels.size(0)  # Update total sample count
        correct += (predicted == labels).sum().item()  # Update correct prediction count

    accuracy = 100 * correct / total  # Calculate accuracy
    return running_loss / len(train_loader), accuracy  # Return average loss and accuracy

# Utility function for testing
def evaluate_model(model, test_loader, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0  # Initialize correct prediction count
    total = 0  # Initialize total sample count

    with torch.no_grad():  # Disable gradient computation for evaluation
        for inputs, labels in test_loader:  # Iterate over batches in the test loader
            inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device
            outputs = model(inputs)  # Forward pass
            _, predicted = torch.max(outputs, 1)  # Get predicted class indices
            total += labels.size(0)  # Update total sample count
            correct += (predicted == labels).sum().item()  # Update correct prediction count

    accuracy = 100 * correct / total  # Calculate accuracy
    return accuracy  # Return accuracy

# Main code
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Set device to GPU if available, else CPU

    transform = transforms.Compose([  # Define data transformations
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize((0.5,), (0.5,))  # Normalize images to [-1, 1] range
    ])

    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)  # Load MNIST training dataset
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)  # Load MNIST test dataset
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  # Create DataLoader for training
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)  # Create DataLoader for testing

    model = ImprovedMLP().to(device)  # Initialize model and move it to the device
    criterion = nn.CrossEntropyLoss()  # Define the loss function
    optimizer = optim.Adam(model.parameters(), lr=0.001)  # Define the optimizer with a learning rate of 0.001

    epochs = 10  # Set the number of epochs
    for epoch in range(epochs):  # Iterate over epochs
        train_loss, train_accuracy = train_model(model, train_loader, criterion, optimizer, device)  # Train the model
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.2f}%")  # Print training statistics

    test_accuracy = evaluate_model(model, test_loader, device)  # Evaluate the model on test data
    print(f"Test Accuracy: {test_accuracy:.2f}%")  # Print test accuracy

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 17.1MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 496kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.52MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 2.33MB/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Epoch 1/10, Loss: 0.2358, Accuracy: 93.02%
Epoch 2/10, Loss: 0.1220, Accuracy: 96.16%
Epoch 3/10, Loss: 0.0963, Accuracy: 96.97%
Epoch 4/10, Loss: 0.0799, Accuracy: 97.39%
Epoch 5/10, Loss: 0.0714, Accuracy: 97.67%
Epoch 6/10, Loss: 0.0612, Accuracy: 98.07%
Epoch 7/10, Loss: 0.0566, Accuracy: 98.17%
Epoch 8/10, Loss: 0.0501, Accuracy: 98.31%
Epoch 9/10, Loss: 0.0481, Accuracy: 98.44%
Epoch 10/10, Loss: 0.0442, Accuracy: 98.52%
Test Accuracy: 98.40%
