In [16]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [17]:
# Transform to convert images to PyTorch tensors
transform = transforms.ToTensor()

# Load the MNIST training dataset
train_dataset = datasets.MNIST(root='/content/data',
                               train=True,
                               transform=transform,
                               download=True)

# Load the MNIST test dataset
test_dataset = datasets.MNIST(root='/content/data',
                              train=False,
                              transform=transform,
                              download=True)

# Create data loaders for training and testing
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=64,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=64,
                         shuffle=False)

In [18]:
# Define a simple feedforward neural network
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(784, 500)   # Input layer to hidden layer
        self.relu = nn.ReLU()           # Activation function
        self.l2 = nn.Linear(500, 10)    # Hidden layer to output layer (10 classes for digits 0-9)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out

# Initialize the model
model = NeuralNet()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [19]:
# Training the model
n_total_steps = len(train_loader)
num_epochs = 3

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Flatten the images from (64, 1, 28, 28) to (64, 784)
        images = images.reshape(-1, 28*28)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print loss every 100 steps
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/3], Step [100/938], Loss: 0.2941
Epoch [1/3], Step [200/938], Loss: 0.2816
Epoch [1/3], Step [300/938], Loss: 0.4622
Epoch [1/3], Step [400/938], Loss: 0.1265
Epoch [1/3], Step [500/938], Loss: 0.2440
Epoch [1/3], Step [600/938], Loss: 0.2272
Epoch [1/3], Step [700/938], Loss: 0.1318
Epoch [1/3], Step [800/938], Loss: 0.2736
Epoch [1/3], Step [900/938], Loss: 0.1291
Epoch [2/3], Step [100/938], Loss: 0.1216
Epoch [2/3], Step [200/938], Loss: 0.0707
Epoch [2/3], Step [300/938], Loss: 0.0773
Epoch [2/3], Step [400/938], Loss: 0.0832
Epoch [2/3], Step [500/938], Loss: 0.1313
Epoch [2/3], Step [600/938], Loss: 0.0754
Epoch [2/3], Step [700/938], Loss: 0.1244
Epoch [2/3], Step [800/938], Loss: 0.0367
Epoch [2/3], Step [900/938], Loss: 0.0898
Epoch [3/3], Step [100/938], Loss: 0.0373
Epoch [3/3], Step [200/938], Loss: 0.1039
Epoch [3/3], Step [300/938], Loss: 0.0781
Epoch [3/3], Step [400/938], Loss: 0.0509
Epoch [3/3], Step [500/938], Loss: 0.0466
Epoch [3/3], Step [600/938], Loss:

In [20]:
# Evaluate the model on the test dataset
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        # Flatten test images
        images = images.reshape(-1, 28*28)
        outputs = model(images)

        # Get predictions from the maximum value
        _, predicted = torch.max(outputs.data, 1)

        # Count correct predictions
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    # Compute and print accuracy
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10,000 test images: {acc:.2f}%')

Accuracy of the network on the 10,000 test images: 97.75%
