In [18]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import torch

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x75e1fbd4a4f0>

In [19]:
# load dataset
from torchvision import datasets, transforms
import torch

data_path = '../data/ch7/'

cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.ToTensor() 
)

# calculate mean and std for normalization
images = torch.stack([img for img, _ in cifar10], dim=3)

mean = images.view(3, -1).mean(dim=1)
std = images.view(3, -1).std(dim=1)

print("Mean (R, G, B):", mean)
print("Std (R, G, B):", std)

Mean (R, G, B): tensor([0.4914, 0.4822, 0.4465])
Std (R, G, B): tensor([0.2470, 0.2435, 0.2616])


In [20]:
# split data set for training and validation + normalization

cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
)

In [21]:
# filtering the dataset to include only the classes 0 (airplane) and 2 (bird),
# here we are interested in a binary classification problem: 
# distinguishing birds from airplanes

label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

# training set
cifar2 = [(img, label_map[label])
          for img, label in cifar10
          if label in [0, 2]]

# validation set
cifar2_val = [(img, label_map[label]) 
              for img, label in cifar10_val
              if label in [0, 2]]

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim

# creates a dataloader:
# - the dataset is divided into batches of 64 images
# - data is shuffled at the beginning of each epoch
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)

# defines our model: a feedforward neural network
model = nn.Sequential(
            nn.Linear(3072, 1024), # linear layer: flattened 32x32 RGB image input, 1024 output
            nn.Tanh(),             # hyperbolic tangent activation function to introduce non-linearity
            nn.Linear(1024, 512),  
            nn.Tanh(),             
            nn.Linear(512, 128),   
            nn.Tanh(),              
            nn.Linear(128, 2))     # output size is 2: one output for each class: airplane and bird.

# learning rate controls how much the model's parameters are updated during training
learning_rate = 1e-2

# Stochastic Gradient Descent (SGD) optimizer:
# this is who updates parameters using gradients computed from a batch of data
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# cross-entropy loss function (commonly used for classification tasks)
# it computes the difference between the predicted class probabilities and the true labels
loss_fn = nn.CrossEntropyLoss()

n_epochs = 100

for epoch in range(n_epochs):         
    for imgs, labels in train_loader:                  # iterates over batches of images and labels

        outputs = model(imgs.view(imgs.shape[0], -1))  # compute model's prediction
                                                       # we flatten each 32x32 RGB image into a 3072-dimensional vector
                                                            
        loss = loss_fn(outputs, labels)                # compute loss

        optimizer.zero_grad()                          # reset gradients to prevent accumulation from previous batches
        loss.backward()                                # compute gradients of the loss with respect to the model's params 
        optimizer.step()                               # update params

    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

Epoch: 0, Loss: 0.543585
Epoch: 1, Loss: 0.436783
Epoch: 2, Loss: 0.369613
Epoch: 3, Loss: 0.525788
Epoch: 4, Loss: 0.380967
Epoch: 5, Loss: 0.403782
Epoch: 6, Loss: 0.343655
Epoch: 7, Loss: 0.523449
Epoch: 8, Loss: 0.534279
Epoch: 9, Loss: 0.469703
Epoch: 10, Loss: 0.544880
Epoch: 11, Loss: 0.323124
Epoch: 12, Loss: 0.413903
Epoch: 13, Loss: 0.323149
Epoch: 14, Loss: 0.392230
Epoch: 15, Loss: 0.381743
Epoch: 16, Loss: 0.240899
Epoch: 17, Loss: 0.400531
Epoch: 18, Loss: 0.333019
Epoch: 19, Loss: 0.402232
Epoch: 20, Loss: 0.358307
Epoch: 21, Loss: 0.117321
Epoch: 22, Loss: 0.271958
Epoch: 23, Loss: 0.201787
Epoch: 24, Loss: 0.073590
Epoch: 25, Loss: 0.167260
Epoch: 26, Loss: 0.109803
Epoch: 27, Loss: 0.376509
Epoch: 28, Loss: 0.273178
Epoch: 29, Loss: 0.056173
Epoch: 30, Loss: 0.154359
Epoch: 31, Loss: 0.327829
Epoch: 32, Loss: 0.192236
Epoch: 33, Loss: 0.243612
Epoch: 34, Loss: 0.070247
Epoch: 35, Loss: 0.080788
Epoch: 36, Loss: 0.090627
Epoch: 37, Loss: 0.145514
Epoch: 38, Loss: 0.284

In [23]:
# evaluate the model's accuracy on the training set

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in train_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

Accuracy: 0.999900


In [24]:
# evaluate the model's accuracy on the validation set

val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in val_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

Accuracy: 0.806000


Next step: CONVOLUTIONS!