In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [2]:
torch.manual_seed(42)

# Preprocessing pipeline
transform = transforms.Compose([
    transforms.ToTensor(),                # Convert images to PyTorch tensors (shape: [1,28,28], values in [0,1])
    transforms.Normalize((0.5,), (0.5,))  # Normalize: (x - 0.5)/0.5 → values in [-1,1]
])

# Load MNIST training and test datasets
train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root="./data", train=False, transform=transform, download=True)

# Wrap them in DataLoader for batching
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

100.0%
100.0%
100.0%
100.0%


In [4]:
class DigitClassifier(nn.Module):
    def __init__(self):
        super(DigitClassifier, self).__init__()
        # Define layers
        self.fc1 = nn.Linear(28*28, 128)   # fully connected: 784 → 128
        self.fc2 = nn.Linear(128, 64)      # fully connected: 128 → 64
        self.fc3 = nn.Linear(64, 10)       # fully connected: 64 → 10 (digits 0–9)

    def forward(self, x):
        # Flatten the image: from [batch,1,28,28] → [batch,784]
        x = x.view(-1, 28*28)
        # Pass through layers with ReLU activations
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        # Final layer: raw class scores (logits)
        x = self.fc3(x)
        return x

# Create the model and move it to device (CPU/GPU)
model = DigitClassifier().to(device)
print(model)

DigitClassifier(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
)


In [5]:
# Loss function: compares logits (from model) with true labels
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam optimizer updates model weights
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [8]:
epochs = 15
for epoch in range(epochs):
    model.train()  # put model in training mode
    
    for batch_idx, (data, target) in enumerate(train_loader):
        # Move data to device (CPU/GPU)
        data, target = data.to(device), target.to(device)
        
        # 1. Reset gradients (PyTorch accumulates by default)
        optimizer.zero_grad()
        
        # 2. Forward pass
        output = model(data)
        
        # 3. Compute loss
        loss = criterion(output, target)
        
        # 4. Backward pass (compute gradients)
        loss.backward()
        
        # 5. Update weights
        optimizer.step()
        
        # Print progress every 100 mini-batches
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Step [{batch_idx}], Loss: {loss.item():.4f}")


Epoch [1/15], Step [0], Loss: 0.0280
Epoch [1/15], Step [100], Loss: 0.0822
Epoch [1/15], Step [200], Loss: 0.1229
Epoch [1/15], Step [300], Loss: 0.0796
Epoch [1/15], Step [400], Loss: 0.1582
Epoch [1/15], Step [500], Loss: 0.1843
Epoch [1/15], Step [600], Loss: 0.0828
Epoch [1/15], Step [700], Loss: 0.1504
Epoch [1/15], Step [800], Loss: 0.0528
Epoch [1/15], Step [900], Loss: 0.0191
Epoch [2/15], Step [0], Loss: 0.0535
Epoch [2/15], Step [100], Loss: 0.0392
Epoch [2/15], Step [200], Loss: 0.0302
Epoch [2/15], Step [300], Loss: 0.0744
Epoch [2/15], Step [400], Loss: 0.1135
Epoch [2/15], Step [500], Loss: 0.0461
Epoch [2/15], Step [600], Loss: 0.3819
Epoch [2/15], Step [700], Loss: 0.1369
Epoch [2/15], Step [800], Loss: 0.0447
Epoch [2/15], Step [900], Loss: 0.0154
Epoch [3/15], Step [0], Loss: 0.0524
Epoch [3/15], Step [100], Loss: 0.0059
Epoch [3/15], Step [200], Loss: 0.1439
Epoch [3/15], Step [300], Loss: 0.0843
Epoch [3/15], Step [400], Loss: 0.0312
Epoch [3/15], Step [500], Loss:

In [9]:
model.eval()  # put model in evaluation mode
correct = 0
total = 0

with torch.no_grad():  # turn off gradient tracking
    for data, target in test_loader:
        # Move data to device
        data, target = data.to(device), target.to(device)

        # Forward pass only (no backward/gradients)
        outputs = model(data)

        # Pick class with highest score
        _, predicted = torch.max(outputs, 1)

        # Count total and correct predictions
        total += target.size(0)
        correct += (predicted == target).sum().item()

# Final accuracy
print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 97.80%
