# Class Activation Map (CAM)
- 本教學將透過 CAM 來說明影像神經網路模型的可解釋性
- CAM 論文連結：https://arxiv.org/abs/1512.04150
- 本教學程式碼改寫自原作者程式碼
  - https://github.com/zhoubolei/CAM/blob/master/pytorch_CAM.py

In [57]:
# 0. 載入需要的套件

# default 套件
import json
from PIL import Image, ImageOps

# OpenCV (Open Source Computer Vision Library)
import cv2 # cv2 是 Python 中調用 OpenCV 函數的模組名稱

# NumPy & Matplotlib
import numpy as np
import matplotlib.pyplot as plt

# PyTorch
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.nn import functional as F

In [None]:
# 1. 定義影像前處理函數 transform

transform = transforms.Compose([
    # 我們等下使用 ResNet-18 模型
    # 尺寸改為與 ResNet-18 的預訓練尺寸相同
    transforms.Resize((224, 224)),
    transforms.ToTensor(), # 會把數值轉為 [0.0, 1.0] 之間的浮點數
    transforms.Normalize(
        # 等等的測試資料為 RGB 影像，所以mea跟std各有三個數值
        # 數值來源為：https://docs.pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [11]:
# 2. 載入測試圖像

img_path = "shiba_inu.JPG"  # 輸入圖片路徑

img = Image.open(img_path)
img = ImageOps.exif_transpose(img) # 避免來自手機的影像載入後旋轉 90 度
tmp_img_tensor = transform(img)

In [16]:
# 3. 對 `img_tensor` 增加 batch 的維度

print(tmp_img_tensor.shape)

# 一般來說我們需要增加 batch 的維度，才能輸入給模型
img_tensor = tmp_img_tensor.unsqueeze(0)
print("維度意義為：(batch, num_channels, height, width)")
print(img_tensor.shape)

torch.Size([3, 224, 224])
維度意義為：(batch, num_channels, height, width)
torch.Size([1, 3, 224, 224])


In [92]:
# 4. 加載預訓練 ResNet-18 模型

model = models.resnet18(pretrained=True)
model.eval() # 讓模型進入推論模式



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## AvgPool2d vs. AdaptiveAvgPool2d
- [`AvgPool2d`](https://docs.pytorch.org/docs/stable/generated/torch.nn.AvgPool2d.html) 需要指定 kernel size、stride 和 padding，PyTorch 將根據指定的參數來進行縮放
- [`AdaptiveAvgPool2d`](https://docs.pytorch.org/docs/stable/generated/torch.nn.AdaptiveAvgPool2d.html) 只需要指定 output size，PyTorch 會自動計算合適的 kernel size、stride 和 padding
- https://discuss.pytorch.org/t/adaptive-avg-pool2d-vs-avg-pool2d/27011


In [None]:
# 5. 設定 `final_conv_name` (觀察 model 的結構)
# `layer4` 也就是 ResNet-18 的最後一個 block

final_conv_name = 'layer4'

In [None]:
# 6. 設定 hook 函數，讓我們能夠取得特定層的輸出

features_maps = [] # 原作者 code 的命名為 features_blobs
def hook_feature(module, input, output):
    # .detach(): 切斷計算圖追蹤，require_grad 會隨之被設定為 False
    features_maps.append(output.detach().cpu().numpy())

In [None]:
# 7. 註冊 hook 函數到 `final_conv_name` 層

model._modules.get(final_conv_name).register_forward_hook(hook_feature)
# 此行的意義：
# 當整個 layer4 模組計算完成時，hook_feature() 會捕獲 整個 layer4 的輸出 (即 Block 1 的輸出)
# 並將其存儲到 features_maps 中

<torch.utils.hooks.RemovableHandle at 0x790a0db6a850>

In [96]:
print(type(model._modules))
print(model._modules)

<class 'dict'>
{'conv1': Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), 'bn1': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'relu': ReLU(inplace=True), 'maxpool': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'layer1': Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64

In [97]:
model._modules.get(final_conv_name)

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(512, eps=1

In [None]:
# 8. 取得 softmax weight
# 也就是 Global Average Pooling 的數值

params = list(model.parameters())
weight_softmax = np.squeeze(params[-2].detach().numpy())

print(params[-2].detach().numpy().shape)
print(weight_softmax.shape)

(1000, 512)
(1000, 512)


In [None]:
# 9. 取得模型輸出機率值 (正式進行推論)

logit = model(img_tensor)
h_x = F.softmax(logit, dim=1).detach().squeeze()
print(h_x.shape)

torch.Size([1000])


In [None]:
# 10. 觀察 features_maps (此時已經捕獲完畢了)

print(features_maps)
print(len(features_maps))
print(features_maps[0].shape)

[array([[[[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
          0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
         [0.0000000e+00, 1.0926756e+00, 0.0000000e+00, ...,
          0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
         [1.0094099e+00, 4.0737429e+00, 2.5078511e+00, ...,
          1.1662819e+00, 2.6986521e-01, 0.0000000e+00],
         ...,
         [1.2592449e+00, 3.8847704e+00, 5.1532283e+00, ...,
          2.1587372e+00, 3.3041552e-02, 0.0000000e+00],
         [2.2102252e-01, 1.2333813e+00, 2.6796930e+00, ...,
          1.5236278e+00, 0.0000000e+00, 2.9857203e-01],
         [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ...,
          0.0000000e+00, 0.0000000e+00, 7.4208416e-02]],

        [[0.0000000e+00, 9.2013490e-01, 1.5779635e+00, ...,
          3.9511982e-01, 0.0000000e+00, 0.0000000e+00],
         [1.0944923e+00, 2.9729476e+00, 3.5193918e+00, ...,
          1.0731202e+00, 0.0000000e+00, 0.0000000e+00],
         [4.9499720e-01, 1.2646437e+00, 2.7977865e+00, 

In [None]:
# 11. 把輸出之機率值進行排序

probs, idx = h_x.sort(dim=0, descending=True)
probs = probs.numpy()
idx = idx.numpy()

In [None]:
# 12. 載入 ImageNet 的 1000 類別

LABELS_file = "imagenet-simple-labels.json"

# load the imagenet category list
with open(LABELS_file) as f:
    classes = json.load(f)

In [None]:
# 13. 印出前五高機率值的類別

for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

0.339 -> Pembroke Welsh Corgi
0.210 -> Chihuahua
0.059 -> Cardigan Welsh Corgi
0.032 -> vacuum cleaner
0.019 -> Labrador Retriever


## Pembroke Welsh Corgi from Wikipedia
![img](https://upload.wikimedia.org/wikipedia/commons/9/99/Welsh_Pembroke_Corgi.jpg)

In [None]:
# 14. 取得 CAM 的輸出 (function)

def returnCAM(feature_conv, weight_softmax, class_idx: list):
    # 最終輸出的 CAM 圖像尺寸 (256, 256)
    size_upsample = (256, 256)

    # 取得 feature map 的 shape
    bs, nc, h, w = feature_conv.shape
    # bs: batch size
    # nc: number of channels
    # h: height
    # w: width

    output_cam = [] # 用於儲存產生的 CAM 影像
    for idx in class_idx:
        cam = weight_softmax[idx].dot(feature_conv.reshape((nc, h*w)))
        # 還原回原本影像的長跟寬
        cam = cam.reshape(h, w)

        # 標準化：將 CAM 的數值範圍壓縮至 [0, 1] 區間，方便後續轉換為影像格式
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)

        # 轉換為影像格式
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
    return output_cam

In [None]:
# 15. 取得 CAM 的輸出 (執行)
# features_mapes 是一個 list，裡面有一個元素，所以我們要取 features_maps[0]

CAMs = returnCAM(features_maps[0], weight_softmax, [idx[0]])

In [None]:
# 16. 顯示 CAM 的輸出 (產生熱力圖)

print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
img = cv2.imread(img_path)
height, width, _ = img.shape

# 將 CAM 熱力圖調整至與原始圖像相同大小，並套用顏色映射
heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)

# 疊加熱力圖與原始影像
result = heatmap * 0.3 + img * 0.5

# 儲存結果
cv2.imwrite('CAM.jpg', result)

# 顯示疊加後的影像
# OpenCV 的顏色順序為 BGR，需要轉換為 RGB 以與 Matplotlib 對齊
heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
result = heatmap * 0.3 + img * 0.5

# 將結果轉換為 uint8 格式，並將數值範圍限制在 [0, 255] 之間
result_uint8 = np.clip(result, 0, 255) / 255.0
plt.imshow(result_uint8)
plt.axis('off')
plt.tight_layout()
plt.show()

output CAM.jpg for the top1 prediction: Pembroke Welsh Corgi


True