# Stochastic Gradient Descent

## Setup

We do the same setup that we've done before.

In [1]:
import matplotlib.pyplot as plt
import torchvision
import torch
from PIL import Image

In [2]:
size = (128, 128)

In [3]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size),
    torchvision.transforms.ToTensor()
])

In [4]:
train_dataset = list(
    torchvision.datasets.Flowers102(
        "./flowers", "train", transform=transform, download=True
    )
)
test_dataset = list(
    torchvision.datasets.Flowers102(
        "./flowers", "test", transform=transform, download=True
    )
)

In [5]:
train_images = torch.stack([im for im, _ in train_dataset], dim=0)
train_labels = torch.tensor([label for _, label in train_dataset])

## Model setup

In [12]:
model = torch.nn.Linear(128 * 128 * 3, 102)
loss = torch.nn.CrossEntropyLoss()
lr = 0.01
epochs = 10
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

The only thing that we have to change here is our training loop. Instead of just looping across epochs, we loop across all epochs and per epoch, we loop across each sample.

In [13]:
for epoch in range(epochs):
    losses = []
    for i in range(0, len(train_images)):
        # compute forward pass for one image.
        x = train_images[None, i].view(-1, 128 * 128 * 3)
        y = train_labels[None, i]
        y_pred = model(x)

        # compute backward pass
        optimizer.zero_grad()
        l = loss(y_pred, y)
        l.backward()
        optimizer.step()

        losses.append(l.item())
    avg_loss = sum(losses) / len(losses)
    print(f"Epoch {epoch} loss: {avg_loss}")

Epoch 0 loss: 8.152144950877561
Epoch 1 loss: 8.067449410941057
Epoch 2 loss: 6.710525179325545
Epoch 3 loss: 7.233729608679183
Epoch 4 loss: 6.268898064698453
Epoch 5 loss: 5.806133178098848
Epoch 6 loss: 5.861039021034764
Epoch 7 loss: 6.061175036286352
Epoch 8 loss: 5.665907482912174
Epoch 9 loss: 5.536450230343481


We can also add batching as well. Batching will help this run faster and should greatly improve convergence as well (since using individual values (i.e., batch_size = 1) can lead to too many update steps happening).

In [14]:
batch_size = 32

In [15]:
for epoch in range(epochs):
    losses = []
    for i in range(0, len(train_images), batch_size):
        # compute forward pass for one image.
        x = train_images[None, i].view(-1, 128 * 128 * 3)
        y = train_labels[None, i]
        y_pred = model(x)

        # compute backward pass
        optimizer.zero_grad()
        l = loss(y_pred, y)
        l.backward()
        optimizer.step()

        losses.append(l.item())
    avg_loss = sum(losses) / len(losses)
    print(f"Epoch {epoch} loss: {avg_loss}")

Epoch 0 loss: 48.04394211281758
Epoch 1 loss: 29.512721039411876
Epoch 2 loss: 13.354049857065519
Epoch 3 loss: 4.804887838971842
Epoch 4 loss: 3.845917576204579
Epoch 5 loss: 1.657101103170172
Epoch 6 loss: 1.851333588298587e-05
Epoch 7 loss: 1.6722442589767184e-05
Epoch 8 loss: 1.5262842746111005e-05
Epoch 9 loss: 1.404521314363194e-05


We can also add **momentum** as well as decrease the learning rate. If the loss jumps around, that's normally a sign of the learning rate being too high.

In [16]:
model = torch.nn.Linear(128 * 128 * 3, 102)
loss = torch.nn.CrossEntropyLoss()
lr = 0.0001
momentum = 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)

epochs = 10
batch_size = 32

In [17]:
for epoch in range(epochs):
    losses = []
    for i in range(0, len(train_images), batch_size):
        # compute forward pass for one image.
        x = train_images[None, i].view(-1, 128 * 128 * 3)
        y = train_labels[None, i]
        y_pred = model(x)

        # compute backward pass
        optimizer.zero_grad()
        l = loss(y_pred, y)
        l.backward()
        optimizer.step()

        losses.append(l.item())
    avg_loss = sum(losses) / len(losses)
    print(f"Epoch {epoch} loss: {avg_loss}")

Epoch 0 loss: 5.868790969252586
Epoch 1 loss: 3.9322223626077175
Epoch 2 loss: 1.8542299214750528
Epoch 3 loss: 0.8313129461312201
Epoch 4 loss: 0.4318122226977721
Epoch 5 loss: 0.1006411284324713
Epoch 6 loss: 0.05923208067542873
Epoch 7 loss: 0.050101972854463384
Epoch 8 loss: 0.04454981617163867
Epoch 9 loss: 0.040248340694233775
