In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm  # Standard library for progress bars

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=150, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

print("Training Examples:",len(train_dataset),"Test Examples:",len(test_dataset)) # 150x more than PA 2

Training Examples: 60000 Test Examples: 10000


In [2]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x = self.flatten(x) # Flatten inputs from (28,28) -> (784,)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [3]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return (float(num_correct) / num_samples) * 100

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleNN(input_size=784,hidden_size=16,num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))

NVIDIA GeForce GTX 1080 Ti


In [5]:
epochs = 3
for epoch in range(epochs):
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), leave=False)
    for batch_idx, (data, targets) in pbar:
        data, targets = data.to(device), targets.to(device)
        
        scores = model(data)
        loss = criterion(scores, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pbar.set_description(f"Epoch [{epoch+1}/{epochs}]")
        pbar.set_postfix(loss=loss.item())

    train_acc = check_accuracy(train_loader, model)
    print(f"Epoch {epoch+1} completed. Training Accuracy: {train_acc:.2f}%")

test_acc = check_accuracy(test_loader, model)
print(f"\nFinal Test Accuracy: {test_acc:.2f}%")



                                                                          

Epoch 1 completed. Training Accuracy: 92.40%


                                                                          

Epoch 2 completed. Training Accuracy: 93.28%


                                                                           

Epoch 3 completed. Training Accuracy: 93.39%

Final Test Accuracy: 92.81%
