# Densenet121 Grad-CAM

In [24]:
# Libraries
import torch
from torchvision.models import densenet121
import torch.nn as nn
from torchcam.methods import GradCAM
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import pandas as pd
import matplotlib.pyplot as plt
from torchcam.utils import overlay_mask
from torchvision.transforms.functional import to_pil_image

## Model preparation

In [25]:
# Set the device to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the model architecture
num_classes = 10
model = densenet121()
model.classifier = nn.Linear(model.classifier.in_features, num_classes)

# Load the saved checkpoint (won't work unless you own the file)
checkpoint = torch.load('model_23_May_12_07.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()

  checkpoint = torch.load('model_23_May_12_07.pth')


DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [26]:
# Set the CAM extractor
# The target is the last conv layer (presumably)
cam_extractor = GradCAM(model, target_layer=model.features.denseblock4.denselayer16.conv2)

## Data preparation

In [27]:
# Dataset model
class SpectrogramDataset(Dataset):
    def __init__(self, dataframe=None, csv_file=None, root_dir=None, transform=None, label_map=None):
        if dataframe is not None:
            self.annotations = dataframe.reset_index(drop=True)
        elif csv_file is not None:
            self.annotations = pd.read_csv(csv_file)
        else:
            raise ValueError("This should be either dataframe or csv!")

        self.root_dir = root_dir
        self.transform = transform
        self.label_map = label_map or self._build_label_map()

    def _build_label_map(self):
        labels = self.annotations['genre'].unique()
        return {label: idx for idx, label in enumerate(sorted(labels))}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        filename = self.annotations.iloc[idx, 0]
        subfolder = filename.split('.')[0]
        filename = filename[:-4] + '.png'
        img_name = os.path.join(self.root_dir, subfolder, filename)

        image = Image.open(img_name).convert('RGB')
        label_str = self.annotations.iloc[idx, 1]
        label = self.label_map[label_str]

        if self.transform:
            image = self.transform(image)

        return image, label, filename

In [28]:
# Define the transformation of images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Read the test set and transform the images
test_df = pd.read_csv('densenet_dataset_split/test.csv')        # !!! update the path after moving the notebook
test_dataset = SpectrogramDataset(
    dataframe=test_df,
    root_dir='../project_data/spectrograms',
    transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

## Grad-CAM algorithm

In [29]:
# Map number to class name
inv_genre_map = {0: 'blues',
                 1: 'classical',
                 2: 'country',
                 3: 'disco',
                 4: 'hip-hop',
                 5: 'jazz',
                 6: 'metal',
                 7: 'pop',
                 8: 'reggae',
                 9: 'rock'}

In [30]:
for images, labels, filenames in test_loader:

    images = images.to(device)
    labels = labels.to(device)

    batch_size = images.size(0)

    for i in range(batch_size):
        input_img = images[i].unsqueeze(0)
        input_label = labels[i].item()
        input_filename = filenames[i]

        out = model(input_img)     # Image preprocess
        pred_class = out.squeeze(0).argmax().item()
        activation_map = cam_extractor(pred_class, out)     # Retrieve the CAM
        cam_np = activation_map[0].squeeze(0).detach().cpu().numpy()

        # Turn numbers into label names
        pred_label_name = inv_genre_map[pred_class]
        input_label_name = inv_genre_map[input_label]

        # plt.imshow(cam_np)    # Visualise the map
        # plt.axis('off')
        # plt.tight_layout()
        # plt.title(f'Predicted: {pred_label_name} | Actual: {input_label_name}')
        # plt.show()

        result = overlay_mask(to_pil_image(input_img.squeeze(0)), to_pil_image(activation_map[0].squeeze(0), mode='F'), alpha=0.5)     # Plot the overlay mask
        # plt.imshow(result)
        # plt.axis('off')
        # plt.tight_layout()
        # plt.show()

        # Save data to file
        output_filename = f'cam_{input_filename}_pred_{pred_label_name}.png'

        output_path = os.path.join('masks', output_filename)
        os.makedirs('masks', exist_ok=True)
        result.save(output_path)

## CLEAR CACHE

In [31]:
torch.cuda.empty_cache()
del images, labels, out, activation_map, cam_np, result