# A first look at a neural network

In [1]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Loading the MNIST dataset from Keras

In [2]:
# Define transformations for the dataset
transform = transforms.Compose([transforms.ToTensor()])

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Extract images and labels
train_images = train_dataset.data
train_labels = train_dataset.targets
test_images = test_dataset.data
test_labels = test_dataset.targets

"""## Preparing the image data"""
# Flatten the images and normalize
train_images = train_images.view(-1, 28*28).float() / 255
test_images = test_images.view(-1, 28*28).float() / 255

# Create DataLoader for batch processing
train_dataset = TensorDataset(train_images, train_labels)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 11527115.29it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 350762.11it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3223910.73it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 9247829.50it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [3]:
train_images.shape

torch.Size([60000, 784])

In [4]:
len(train_labels)

60000

In [5]:
train_labels

tensor([5, 0, 4,  ..., 5, 6, 8])

In [6]:
test_images.shape

torch.Size([10000, 784])

In [7]:
len(test_labels)

10000

In [8]:
test_labels

tensor([7, 2, 1,  ..., 4, 5, 6])

## The network architecture

In [9]:
model = nn.Sequential(
    nn.Linear(28*28, 512),
    nn.ReLU(),
    nn.Linear(512, 10)
).to(device)

## The compilation step

In [10]:
optimizer = optim.RMSprop(model.parameters())  # Same as Keras
criterion = nn.CrossEntropyLoss() # Same as sparse_categorical_crossentropy in Keras

## "Fitting" the model

In [11]:
# Training loop
num_epochs = 5
for epoch in range(num_epochs):  # epochs=5
    model.train()
    for batch_images, batch_labels in train_loader:
        batch_images, batch_labels = batch_images.to(device), batch_labels.to(device)

        # Forward pass
        outputs = model(batch_images)
        loss = criterion(outputs, batch_labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/5], Loss: 0.1617
Epoch [2/5], Loss: 0.0337
Epoch [3/5], Loss: 0.2001
Epoch [4/5], Loss: 0.0524
Epoch [5/5], Loss: 0.0038


## Evaluating the model on new data

In [12]:
with torch.no_grad():
    model.eval()
    test_images, test_labels = test_images.to(device), test_labels.to(device)
    outputs = model(test_images)
    loss = criterion(outputs, test_labels)

    # Get predictions
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == test_labels).sum().item()
    total = test_labels.size(0)
    test_acc = correct / total

print(f"Test Accuracy: {test_acc * 100:.2f}%")

Test Accuracy: 97.38%
