<a href="https://colab.research.google.com/github/tr-dev-bc/Modern_CV_Assignments/blob/main/step_8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt  # used to plot error later
# helps running in jupyter
%matplotlib inline

from torchvision import datasets, transforms
from torch.optim import SGD


In [2]:


# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"  # switch for gpu else cpu

# Load and normalize the Fashion-MNIST dataset
data_folder = '~/FMNIST'
fmnist_train = datasets.FashionMNIST(data_folder, download=True, train=True)
fmnist_valid = datasets.FashionMNIST(data_folder, download=True, train=False)
tr_images = fmnist_train.data
tr_targets = fmnist_train.targets
val_images = fmnist_valid.data
val_targets = fmnist_valid.targets


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:01<00:00, 13.4MB/s]


Extracting /root/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/FMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 269kB/s]


Extracting /root/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/FMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:00<00:00, 5.06MB/s]


Extracting /root/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/FMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 17.2MB/s]

Extracting /root/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/FMNIST/FashionMNIST/raw






In [3]:
class FMNISTDataset(Dataset):
    def __init__(self, x, y):
        x = x.float() / 255  # Normalizes images to [0,1]
        x = x.view(-1, 1, 28, 28)
        self.x, self.y = x, y

    def __getitem__(self, ix):
        x, y = self.x[ix], self.y[ix]  # unpack tuple into variables
        return x.to(device), y.to(device)

    def __len__(self):
        return len(self.x)

In [4]:
def get_data():
    train = FMNISTDataset(tr_images, tr_targets)
    trn_dl = DataLoader(train, batch_size=64, shuffle=True)         # batch size is 64
    return trn_dl

def get_val_data():
    valid = FMNISTDataset(val_images, val_targets)
    val_dl = DataLoader(valid, batch_size=1, shuffle=True)         # for our CAM, the batch size is 1
    return val_dl

In [5]:
def get_model():
    class neuralnet(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv_layer = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1)
            self.pool = nn.MaxPool2d(kernel_size=2)
            self.flatten = nn.Flatten()
            self.input_to_hidden_layer = nn.Linear(32 * 13 * 13, 128)
            self.batch_norm = nn.BatchNorm1d(128)
            self.hidden_layer_activation = nn.ReLU()
            self.hidden_to_output_layer = nn.Linear(128, 10)

        def forward(self, x):
            x = self.conv_layer(x)
            x = self.pool(x)
            x = self.flatten(x)
            x = self.input_to_hidden_layer(x)
            x0 = self.batch_norm(x)
            x1 = self.hidden_layer_activation(x0)
            x2 = self.hidden_to_output_layer(x1)
            return x2

    model = neuralnet().to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=1e-2)
    return model, loss_fn, optimizer

In [6]:
# loading up training data
train_loader = get_data()
val_loader = get_val_data()  # Get validation data
model, loss_fn, optimizer = get_model()

# training loop with tracking of loss
num_epochs = 10
train_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        predictions = model(x)
        loss = loss_fn(predictions, y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    average_loss = epoch_loss / len(train_loader)
    train_losses.append(average_loss)
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {average_loss:.4f}')

Epoch 1/10, Loss: 0.5125
Epoch 2/10, Loss: 0.3446
Epoch 3/10, Loss: 0.2981
Epoch 4/10, Loss: 0.2646
Epoch 5/10, Loss: 0.2401
Epoch 6/10, Loss: 0.2190
Epoch 7/10, Loss: 0.2017
Epoch 8/10, Loss: 0.1866
Epoch 9/10, Loss: 0.1708
Epoch 10/10, Loss: 0.1592


# ADDING THE CAMS

In [7]:
# Function to generate and display CAMs
def generate_cam(model, images, target_layer, class_index):
    model.eval()
    with torch.no_grad():
        # Forward pass
        output = model(images)
        # Get the class scores
        class_score = output[0, class_index]

        # Backward pass to get gradients
        model.zero_grad()
        class_score.backward()

        # Get the gradients and the activation of the target layer
        gradients = target_layer.grad
        activations = target_layer(images).detach()

        # Compute the weights
        weights = torch.mean(gradients, dim=[0, 2, 3])  # Global Average Pooling on gradients

        # Create the CAM
        cam = torch.zeros(activations.shape[2:], dtype=torch.float32)
        for i in range(weights.shape[0]):
            cam += weights[i] * activations[0, i, :, :]

        # Normalize CAM
        cam = nn.ReLU()(cam)
        cam = cam - cam.min()
        cam = cam / cam.max()
        return cam

# Display the CAMS (4)

In [8]:
target_layer = model.conv_layer  # Using the conv layer for CAM
for images, labels in val_loader:
    for i in range(4):  # choose 4 CAM images
        image = images[i].unsqueeze(0)
        label = labels[i].item()
        cam = generate_cam(model, image, target_layer, label)

        # Plot the image and the CAM
        plt.subplot(1, 2, 1)
        plt.imshow(image[0].cpu().numpy(), cmap='gray')
        plt.title(f'Image: {label}')
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(cam.cpu().numpy(), cmap='jet', alpha=0.5)  # Overlay CAM
        plt.title('Class Activation Map')
        plt.axis('off')

        plt.show()

    break  # Only display for the first batch

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn