In [1]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))   # standard MNIST normalization
])
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=transform,
)
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=transform,
)
train_dataloader = DataLoader(training_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)

In [18]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
model = NeuralNetwork()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

In [19]:
# training loop
for epoch in range(40):
    model.train()
    for X, y in train_dataloader:
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

# testing loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X, y in test_dataloader:
        pred = model(X)
        _, predicted = torch.max(pred, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
print(f"Accuracy: {correct} / {total} ({100 * correct / total:.2f}%)")

Epoch 1, Loss: 0.19595462083816528
Epoch 2, Loss: 0.04651594161987305
Epoch 3, Loss: 0.06910475343465805
Epoch 4, Loss: 0.08368139714002609
Epoch 5, Loss: 0.00503446813672781
Epoch 6, Loss: 0.04005558416247368
Epoch 7, Loss: 0.05618561804294586
Epoch 8, Loss: 0.01764575019478798
Epoch 9, Loss: 0.01573936827480793
Epoch 10, Loss: 0.025243934243917465
Epoch 11, Loss: 0.0033509652130305767
Epoch 12, Loss: 0.001937373774126172
Epoch 13, Loss: 0.00014601129805669188
Epoch 14, Loss: 0.00024934366228990257
Epoch 15, Loss: 0.00017566591850481927
Epoch 16, Loss: 0.0006492267129942775
Epoch 17, Loss: 0.0008105513406917453
Epoch 18, Loss: 0.0004101367958355695
Epoch 19, Loss: 8.217881986638531e-05
Epoch 20, Loss: 0.00019032119598705322
Epoch 21, Loss: 0.0002195156557718292
Epoch 22, Loss: 0.00022732230718247592
Epoch 23, Loss: 0.0009684578981250525
Epoch 24, Loss: 0.0006568054668605328
Epoch 25, Loss: 0.00019421507022343576
Epoch 26, Loss: 0.00016127769777085632
Epoch 27, Loss: 0.0004730390792246

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # apply padding else size reduces with accuracy
        # Convolution layer 1: input=1 channel (grayscale), output=32 feature maps
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        # Convolution layer 2: input=32 channels, output=64 feature maps
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        # Max pooling layer: reduces width/height by half
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # Apply 1st conv → ReLU → max-pool
        x = self.pool(torch.relu(self.conv1(x)))
        # Apply 2nd conv → ReLU → max-pool
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

cnn_model = CNN()
cnn_optimizer = torch.optim.SGD(cnn_model.parameters(), lr=0.05)
cnn_loss_fn = nn.CrossEntropyLoss()

# CNN training loop
for epoch in range(40):
    cnn_model.train()
    for X, y in train_dataloader:
        pred = cnn_model(X)                         # Forward pass
        loss = cnn_loss_fn(pred, y)
        loss.backward()                             # Backpropagation
        cnn_optimizer.step()                        # Update weights
        cnn_optimizer.zero_grad()
    print(f"CNN Epoch {epoch+1}, Loss: {loss.item()}")

# CNN testing loop
cnn_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X, y in test_dataloader:
        pred = cnn_model(X)
        _, predicted = torch.max(pred, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
print(f"CNN Accuracy: {100 * correct / total:.2f}%")

CNN Epoch 1, Loss: 0.24244551360607147
CNN Epoch 2, Loss: 0.012083734385669231
CNN Epoch 3, Loss: 0.0013802343746647239
CNN Epoch 4, Loss: 0.0012967275688424706
CNN Epoch 5, Loss: 0.0007667469326406717
CNN Epoch 6, Loss: 0.03320466727018356
CNN Epoch 7, Loss: 0.00011867890134453773
CNN Epoch 8, Loss: 1.1472442565718666e-05
CNN Epoch 9, Loss: 0.02218550629913807
CNN Epoch 10, Loss: 0.044965535402297974
CNN Epoch 11, Loss: 0.000297695049084723
CNN Epoch 12, Loss: 7.513537639169954e-06
CNN Epoch 13, Loss: 0.0003709682496264577
CNN Epoch 14, Loss: 0.0014930849429219961
CNN Epoch 15, Loss: 1.4454011534326128e-06
CNN Epoch 16, Loss: 7.945905963424593e-05
CNN Epoch 17, Loss: 3.803430445259437e-06
CNN Epoch 18, Loss: 0.00017183736781589687
CNN Epoch 19, Loss: 2.2351740014414645e-08
CNN Epoch 20, Loss: 1.4230334954845603e-06
CNN Epoch 21, Loss: 6.186945392983034e-05
CNN Epoch 22, Loss: 1.2314736522966996e-05
CNN Epoch 23, Loss: 2.1744464902440086e-05
CNN Epoch 24, Loss: 0.00019477022578939795
C