In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Data loading and preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1] range
])

train_set = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)  # Input: flattened image, Hidden: 128 units
        self.fc2 = nn.Linear(128, 10)       # Output: 10 classes (0-9)
    
    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image
        x = torch.relu(self.fc1(x))  # ReLU activation
        x = self.fc2(x)  # No activation here, as we'll use CrossEntropyLoss
        return x

In [3]:
class DeepMLP(nn.Module):
    def __init__(self):
        super(DeepMLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # Input channels: 1 (grayscale)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # 2x2 pooling
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # After two pools: 28/4 = 7
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [5]:
def train_and_evaluate(model, model_name, epochs=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"{model_name} - Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")
    
    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"{model_name} Test Accuracy: {accuracy:.2f}%")

In [6]:
mlp = MLP(); train_and_evaluate(mlp, 'MLP')

MLP - Epoch 1, Loss: 0.3835
MLP - Epoch 2, Loss: 0.1954
MLP - Epoch 3, Loss: 0.1402
MLP - Epoch 4, Loss: 0.1143
MLP - Epoch 5, Loss: 0.0973
MLP Test Accuracy: 97.16%


In [7]:
dl = DeepMLP(); train_and_evaluate(mlp, 'DL')

DL - Epoch 1, Loss: 0.0883
DL - Epoch 2, Loss: 0.0764
DL - Epoch 3, Loss: 0.0705
DL - Epoch 4, Loss: 0.0634
DL - Epoch 5, Loss: 0.0569
DL Test Accuracy: 97.59%


In [8]:
cnn = DeepMLP(); train_and_evaluate(mlp, 'CNN')

CNN - Epoch 1, Loss: 0.0549
CNN - Epoch 2, Loss: 0.0489
CNN - Epoch 3, Loss: 0.0446
CNN - Epoch 4, Loss: 0.0431
CNN - Epoch 5, Loss: 0.0399
CNN Test Accuracy: 97.24%


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Basic transform for normalization (used for test set)
basic_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Augmented transform for training set
augmented_transform = transforms.Compose([
    transforms.RandomRotation(degrees=10),  # Why this degree range? What if you experimented with 15?
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # How does translation simulate real handwriting shifts?
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load datasets
train_set = datasets.MNIST(root='./data', train=True, download=True, transform=augmented_transform)
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=basic_transform)

# Data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# MLP Model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Deep MLP Model
class DeepMLP(nn.Module):
    def __init__(self):
        super(DeepMLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# CNN Model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Training and evaluation function
def train_and_evaluate(model, model_name, epochs=5, device='cpu'):
    model.to(device)  
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"{model_name} - Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"{model_name} Test Accuracy: {accuracy:.2f}%")

# Main execution - let's run all models
if __name__ == "__main__":
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = torch.
    print(f"Using device: {device}")
    
    mlp = MLP()
    train_and_evaluate(mlp, 'MLP', epochs=5, device=device)
    
    deep_mlp = DeepMLP()
    train_and_evaluate(deep_mlp, 'DeepMLP', epochs=5, device=device)
    
    cnn = CNN()
    train_and_evaluate(cnn, 'CNN', epochs=5, device=device)

Using device: cpu
MLP - Epoch 1, Loss: 0.9018
MLP - Epoch 2, Loss: 0.4115
MLP - Epoch 3, Loss: 0.3140
MLP - Epoch 4, Loss: 0.2764
MLP - Epoch 5, Loss: 0.2497
MLP Test Accuracy: 96.40%
DeepMLP - Epoch 1, Loss: 0.6386
DeepMLP - Epoch 2, Loss: 0.2738
DeepMLP - Epoch 3, Loss: 0.2164
DeepMLP - Epoch 4, Loss: 0.1797
DeepMLP - Epoch 5, Loss: 0.1656
DeepMLP Test Accuracy: 95.80%
CNN - Epoch 1, Loss: 0.2969
CNN - Epoch 2, Loss: 0.0978
CNN - Epoch 3, Loss: 0.0749
CNN - Epoch 4, Loss: 0.0636
CNN - Epoch 5, Loss: 0.0551
CNN Test Accuracy: 99.00%
