# Class Activation Map

Hi everyone,

here I implemented the paper that introduced Class Action Map (CAM) https://arxiv.org/pdf/1512.04150.pdf. 

This is a simple method that computes the heatmap, at the pixel level, where the network in paying attention in order to make the prediction. I think it can be useful for this challenge, since most of the solutions I'm seeing here use the spectrogram (and variants such as mel-spectrograms) that can be considered as images. In particular this tool can be helpful for understanding the data and debugging the model.

This is a PyTorch implementation, hope this can help!

Guglielmo

In [None]:
##################################################
# Imports
##################################################

from PIL import Image
import matplotlib.pyplot as plt
import torch
from torchvision import models, transforms
import torch.nn.functional as F
import numpy as np
import skimage.transform

# Download an image
!wget https://img.webmd.com/dtmcms/live/webmd/consumer_assets/site_images/article_thumbnails/other/cat_relaxing_on_patio_other/1800x1200_cat_relaxing_on_patio_other.jpg -O cat.jpg >/dev/null 2>&1

In [None]:
# Show the image
image = Image.open("cat.jpg")
plt.figure(figsize=(15, 10))
plt.imshow(image)
plt.show()

In [None]:
##################################################
# Preprocessing
##################################################

# Imagenet mean/std
normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)

# Scale to 224x224, convert to tensor, and normalize with mean/std for ImageNet
preprocess = transforms.Compose([
   transforms.Resize((224, 224)),
   transforms.ToTensor(),
   normalize,
])

x_img = preprocess(image).unsqueeze(0)

# Using CAM on your model

In [None]:
# Model
model = models.resnet18(pretrained=True)

In [None]:
# Get the features from a model
class SaveFeatures():
    features = None
    def __init__(self, module): 
        self.hook = module.register_forward_hook(self.hook_fn)

    def hook_fn(self, module, input, output): 
        self.features = output.data.numpy()

    def remove(self): 
        self.hook.remove()

def getCAM(feature_conv, weight_fc, class_idx):
    _, nc, h, w = feature_conv.shape
    cam = weight_fc[class_idx].dot(feature_conv.reshape((nc, h * w)))
    cam = cam.reshape(h, w)
    cam = cam - np.min(cam)
    cam_img = cam / np.max(cam)
    return [cam_img]

In [None]:
# Get features from last conv layer
final_layer = model._modules.get('layer4')
activated_features = SaveFeatures(final_layer)

# Inference
_ = model.eval()
prediction = model(x_img)
pred_probabilities = F.softmax(prediction).data.squeeze()
activated_features.remove()
print('Top-1 prediction:', torch.topk(pred_probabilities, 1))

# Take weights from the first linear layer
weight_softmax_params = list(model._modules.get('fc').parameters())
weight_softmax = np.squeeze(weight_softmax_params[0].data.numpy())

# Get the top-1 prediction and get CAM
class_idx = torch.topk(pred_probabilities, 1)[1].int()
overlay = getCAM(activated_features.features, weight_softmax, class_idx )

In [None]:
# Show CAM
plt.figure(figsize=(5, 5))
plt.title('Class Activation Map', fontweight='bold')
plt.imshow(overlay[0], alpha=0.5, cmap='jet')

# Show CAM on the image
plt.figure(figsize=(15, 10))
plt.title('Class Activation Map on the Image', fontweight='bold')
plt.imshow(image)
plt.imshow(skimage.transform.resize(overlay[0], (image.size[1], image.size[0])), alpha=0.5, cmap='jet');
plt.show()