**A Pythonic Guide to PyTorch & Computer Vision**

This notebook will cover:
Setting up our environment.

*  The core data pipeline: Dataset, Transform, and DataLoader.

*  Defining a model using nn.Module.


*   The standard training loop (forward pass, loss, backward pass, optimizer step).

*   The standard evaluation loop.


In [None]:
# 1. Setup and Imports
# We'll need torch for all the core deep learning functionality,
# nn for building blocks of networks, optim for optimizers,
# and torchvision for datasets, transforms, and pre-trained models.

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as F

# Check for GPU and set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# This is the expected output if you are on a Colab notebook with a GPU runtime.
# If you see "cpu", go to Runtime > Change runtime type and select "GPU".
print(f"Using device: {device}")

## 2. Data (Transforms, Datasets, and DataLoaders)

In [None]:
# Define the transformations.
# transforms.ToTensor() converts images from PILImage format to PyTorch Tensors.
# transforms.Normalize() adjusts the pixel values. The two tuples are the means
# and standard deviations for the 3 color channels (R, G, B). These specific
# values are the standard pre-calculated ones for the CIFAR-10 dataset.
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    #transforms.RandomRotation(degrees=(0, 180)),
    #transforms.RandomPerspective(distortion_scale=0.6, p=1.0)
])

# Define the batch size. This is how many images we process at once.
batch_size = 64

# Download and load the training data.
# `root='./data'` is where the data will be stored.
# `train=True` specifies the training set.
# `download=True` will download it if it's not already there.
# `transform=transform` applies our pre-processing pipeline.
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

# Download and load the testing data.
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# Define the human-readable class names for CIFAR-10
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

## **3. Visualize a Batch of Data**

In [None]:
def imshow(img):
    """Helper function to un-normalize and display an image"""
    img = img / 2 + 0.5  # Un-normalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get one random batch of training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Show images in a grid
imshow(torchvision.utils.make_grid(images))

# Print the labels for the images shown
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

## **4. Define the Neural Network (nn.Module)**

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # 3 input image channels (R,G,B), 16 output channels, 3x3 square convolution
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2) # 2x2 max pooling
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)

        # The size calculation for the linear layer can be tricky.
        # Images start at 32x32.
        # After one pool layer -> 16x16.
        # After a second pool layer -> 8x8.
        # So the flattened size is 32 (output channels of conv2) * 8 * 8.
        self.fc1 = nn.Linear(32 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10) # 10 output classes

    def forward(self, x):
        # -> n, 3, 32, 32
        x = self.pool(F.relu(self.conv1(x)))  # -> n, 16, 16, 16
        x = self.pool(F.relu(self.conv2(x)))  # -> n, 32, 8, 8
        x = torch.flatten(x, 1) # flatten all dimensions except batch -> n, 2048
        x = F.relu(self.fc1(x)) # -> n, 256
        x = self.fc2(x) # -> n, 10
        return x

## **5. Instantiate Model, Loss Function, and Optimizer**

In [None]:
# Create an instance of the model and move it to the GPU
model = SimpleCNN().to(device)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

## **6. The Training Loop**

In [None]:
print("Starting training...")

# Set the number of epochs (passes through the training data)
num_epochs = 1

for epoch in range(num_epochs):
    running_loss = 0.0

    # Set the model to training mode
    model.train()

    for i, data in enumerate(train_loader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # 1. Zero the parameter gradients
        optimizer.zero_grad()

        # 2. Forward pass
        outputs = model(inputs)

        # 3. Calculate loss
        loss = criterion(outputs, labels)

        # 4. Backward pass (calculate gradients)
        loss.backward()

        # 5. Update weights (take a step with the optimizer)
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # Print every 100 mini-batches
            print(f'[Epoch {epoch + 1}, Batch {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')

## **7. The Testing/Evaluation Loop**

In [None]:
correct = 0
total = 0

# Set the model to evaluation mode
model.eval()

# Since we're not training, we don't need to calculate gradients
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)

        # Get model outputs (forward pass)
        outputs = model(images)

        # The class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f} %')