In [4]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
print(torch.__version__)
print("CUDA available?" if torch.cuda.is_available() else "Using CPU")

2.7.1
Using CPU


In [None]:
# Transform: normalize MNIST to [0,1]
#this next line converts the MNIST images to tensors
#and normalizes them to the range [0, 1]
transform = transforms.ToTensor()

# Download MNIST
#train_data is the training set, test_data is the test set
#both are downloaded from the 'data' directory

train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=64, shuffle=False)

# Define a simple 2-layer neural net
# this is a feedforward neural network 
model = nn.Sequential(
    nn.Flatten(),# flatten the image to a 1D tensor 28x28 → 784
    nn.Linear(784, 128), # first layer: 784 inputs, 128 outputs
    nn.ReLU(), #activation function that introduces non-linearity
    nn.Linear(128, 10) # second layer: 128 inputs, 10 outputs (one for each digit 0-9)
)

# Loss and optimizer
loss_fn = nn.CrossEntropyLoss() #specifies the loss function 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) #defines the optimizer to update the model's parameters

# Training loop
#makes 5 full passes over the training data
#for each epoch, it iterates over the training data, where X are images and y are labels
# loss computes the loss between the predicted outputs and the true labels
# optimizer.zero_grad() resets the gradients to zero before backpropagation
# loss.backward() computes the gradients of the loss with respect to the model parameters
# optimizer.step() updates the model parameters based on the computed gradients
for epoch in range(5):
    for X, y in train_loader:
        pred = model(X)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")

100.0%
100.0%
100.0%
100.0%


Epoch 1: loss = 0.2113
Epoch 2: loss = 0.2457
Epoch 3: loss = 0.0471
Epoch 4: loss = 0.1373
Epoch 5: loss = 0.0351


In [6]:
correct = 0
total = 0

with torch.no_grad():
    for X, y in test_loader:
        pred = model(X)
        predicted = pred.argmax(1)
        correct += (predicted == y).sum().item()
        total += y.size(0)

print(f"Test Accuracy: {correct / total:.2%}")

Test Accuracy: 97.24%
