In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define a simple feedforward neural network for classification
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(28 * 28, 128)  # Input layer to hidden layer
        self.layer2 = nn.Linear(128, 64)       # Hidden layer to another hidden layer
        self.output = nn.Linear(64, 10)        # Hidden layer to output layer

    def forward(self, x):
        x = torch.flatten(x, 1)  # Flatten the input
        x = torch.relu(self.layer1(x))  # Apply ReLU
        x = torch.relu(self.layer2(x))  # Apply ReLU
        x = self.output(x)  # Output raw scores (logits)
        return x

# Load MNIST dataset with basic transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('.', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Initialize the model, loss function, and optimizer
model = SimpleNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, epochs=5):
    model.train()  # Set the model in training mode
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()  # Zero out previous gradients
            output = model(data)   # Forward pass
            loss = criterion(output, target)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights

            if batch_idx % 100 == 0:
                print(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}')

# Train the model
train(model, train_loader, criterion, optimizer)

# Evaluate the model on test data
test_dataset = datasets.MNIST('.', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

def evaluate(model, test_loader):
    model.eval()  # Set the model in evaluation mode
    correct = 0
    with torch.no_grad():  # Disable gradient calculation for evaluation
        for data, target in test_loader:
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    accuracy = 100. * correct / len(test_loader.dataset)
    print(f'Test set: Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)')

evaluate(model, test_loader)

Epoch: 0 [0/60000] Loss: 2.308037
Epoch: 0 [6400/60000] Loss: 0.227940
Epoch: 0 [12800/60000] Loss: 0.246641
Epoch: 0 [19200/60000] Loss: 0.245361
Epoch: 0 [25600/60000] Loss: 0.090768
Epoch: 0 [32000/60000] Loss: 0.211719
Epoch: 0 [38400/60000] Loss: 0.173334
Epoch: 0 [44800/60000] Loss: 0.430613
Epoch: 0 [51200/60000] Loss: 0.147467
Epoch: 0 [57600/60000] Loss: 0.126151
Epoch: 1 [0/60000] Loss: 0.173915
Epoch: 1 [6400/60000] Loss: 0.128406
Epoch: 1 [12800/60000] Loss: 0.075501
Epoch: 1 [19200/60000] Loss: 0.103690
Epoch: 1 [25600/60000] Loss: 0.114390
Epoch: 1 [32000/60000] Loss: 0.072733
Epoch: 1 [38400/60000] Loss: 0.029844
Epoch: 1 [44800/60000] Loss: 0.063114
Epoch: 1 [51200/60000] Loss: 0.061868
Epoch: 1 [57600/60000] Loss: 0.038389
Epoch: 2 [0/60000] Loss: 0.020869
Epoch: 2 [6400/60000] Loss: 0.083799
Epoch: 2 [12800/60000] Loss: 0.051658
Epoch: 2 [19200/60000] Loss: 0.015820
Epoch: 2 [25600/60000] Loss: 0.039515
Epoch: 2 [32000/60000] Loss: 0.093170
Epoch: 2 [38400/60000] Loss