In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# Configuration
batch_size = 64
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("CUDA is available:", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0))

CUDA is available: True
Device name: NVIDIA GeForce RTX 4060 Laptop GPU


In [8]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(784, 200),
            nn.ReLU(),
            nn.Linear(200, 50),
            nn.ReLU(),
            nn.Linear(50, 10)
        )
        
    def forward(self, x):
        x = x.view(-1, 784)  # Flatten images
        return self.layers(x)

In [4]:
# Training Function with Scheduler Support
def train_model(model, optimizer, scheduler=None):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        if scheduler:
            scheduler.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')

# Evaluation Function
def evaluate_model(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:a
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

In [5]:
optimizers = {
    'SGD': {'optimizer': optim.SGD, 'lr': 0.1},
    'SGD+Momentum': {'optimizer': optim.SGD, 'lr': 0.01, 'momentum': 0.9},
    'AdaGrad': {'optimizer': optim.Adagrad, 'lr': 0.01},
    'RMSprop': {'optimizer': optim.RMSprop, 'lr': 0.001},
    'Adam': {'optimizer': optim.Adam, 'lr': 0.001}
}

In [6]:
# Training and Evaluation
results = {}
for opt_name, config in optimizers.items():
    print(f"\n=== Training with {opt_name} ===")
    
    # Initialize fresh model and optimizer
    model = MLP().to(device)
    optimizer = config['optimizer'](model.parameters(), lr=config['lr'])
    if 'momentum' in config:
        optimizer = config['optimizer'](model.parameters(), 
                                      lr=config['lr'], 
                                      momentum=config['momentum'])
    
    # Add learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    # Train and evaluate
    train_model(model, optimizer, scheduler)
    acc = evaluate_model(model)
    results[opt_name] = acc
    print(f"{opt_name} Test Accuracy: {acc:.2f}%")

# Print final comparison
print("\n=== Final Results ===")
for opt_name, acc in results.items():
    print(f"{opt_name}: {acc:.2f}%")


=== Training with SGD ===
Epoch [1/10], Loss: 0.2932
Epoch [2/10], Loss: 0.1052
Epoch [3/10], Loss: 0.0711
Epoch [4/10], Loss: 0.0517
Epoch [5/10], Loss: 0.0405
Epoch [6/10], Loss: 0.0195
Epoch [7/10], Loss: 0.0159
Epoch [8/10], Loss: 0.0144
Epoch [9/10], Loss: 0.0133
Epoch [10/10], Loss: 0.0126
SGD Test Accuracy: 98.23%

=== Training with SGD+Momentum ===
Epoch [1/10], Loss: 0.3159
Epoch [2/10], Loss: 0.1091
Epoch [3/10], Loss: 0.0722
Epoch [4/10], Loss: 0.0522
Epoch [5/10], Loss: 0.0398
Epoch [6/10], Loss: 0.0200
Epoch [7/10], Loss: 0.0161
Epoch [8/10], Loss: 0.0145
Epoch [9/10], Loss: 0.0134
Epoch [10/10], Loss: 0.0126
SGD+Momentum Test Accuracy: 98.27%

=== Training with AdaGrad ===
Epoch [1/10], Loss: 0.2455
Epoch [2/10], Loss: 0.1156
Epoch [3/10], Loss: 0.0881
Epoch [4/10], Loss: 0.0718
Epoch [5/10], Loss: 0.0610
Epoch [6/10], Loss: 0.0499
Epoch [7/10], Loss: 0.0487
Epoch [8/10], Loss: 0.0478
Epoch [9/10], Loss: 0.0472
Epoch [10/10], Loss: 0.0466
AdaGrad Test Accuracy: 97.64%

=