### reference baseline code
https://github.com/Caoliangjie/pytorch-gradcam-resnet50/blob/bfbd6d2fa6f6c490eaba1232b226ca4b09fe5fc1/grad-cam.py#L80

# Loading pretrained model
### every models are pre-trained 1000-class Imagenet datasets
### reference: https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

In [65]:
IMG_PATH = "cat.jpg"
MODEL_NAME = "resnet18"
TARGET_CLASS = 1 # None is highest confidence
TARGET_LAYER_NAMES = ["layer4"]

IMAGE_SIZE = (224,224)  # later, automatically determined

In [66]:
# first get pretrained model
import torch
import torch.nn as nn
import torchvision.models as models

print("==========================================")
print(f"torch version: {torch.__version__}")
print(f"cuda available: {torch.cuda.is_available()}")
print("==========================================")

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# load pretrained model
if MODEL_NAME == "resnet18":
    model = models.resnet18(pretrained=True, progress=True)
if MODEL_NAME == "resnet50":
    model = models.resnet50(pretrained=True, progress=True)

model.eval()
model.to(device=device)

torch version: 1.8.0+cu111
cuda available: True


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## test whether it works well

In [67]:
data = torch.rand(1, 3, 64, 64).to(device) # you need to select input size of the model
labels = torch.rand(1, 1000).to(device) # you need to select the class you want to predict

prediction = model(data)

loss = (prediction - labels).sum()
loss.backward() # backward pass

# Load image for testing
### preprocess image to be fitted pretrained model

In [68]:
import cv2
import numpy as np

# normalized image
origin_img = cv2.imread(IMG_PATH, 1)
rgb_img = np.float32(origin_img) / 255

IMAGE_SIZE = (rgb_img.shape[0], rgb_img.shape[1]) # determine image size

In [69]:
from torchvision.transforms import Compose, Normalize, ToTensor

image_transformer = Compose([ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
input_tensor = image_transformer(rgb_img).unsqueeze(0).to(device)
input_tensor.requires_grad = True

# Implementation Grad-CAM
## -Extract gradient
### using gradient hook, get gradient layers what we want

## -GradCAM
### Wrapping extract gradient

In [70]:
class ExtractGradient():
    def __init__(self, model, target_layers, device = 'cpu'):

        # gradient저장할 변수 지정
        self.model = model
        self.model.eval()
        self.model.to(device)
        
        self.target_layers = target_layers
        self.device = device

        self.gradients = []

        print("============Module name============")
        for name, module in self.model._modules.items():
            print(name)
            if name in self.target_layers:
                print("register hook")
                module.register_full_backward_hook(self.gradient_hook_function)

        print("===================================")

    def forward(self, x):
        return self.model(x)

    # Implement hookup function
    def gradient_hook_function(self, module, grad_in, grad_out):
        # grad_in : [batch_size, num_features, height, width]        
        self.gradients.append(grad_in[0])   # No need batch

    # get backwards gradient
    def backward_gradient(self, output, target_label):

        # make target label's one-hot vector
        one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
        one_hot[0][target_label] = 1
        one_hot = torch.from_numpy(one_hot).to(self.device)
        one_hot.requires_grad = True

        one_hot = torch.sum(one_hot * output)
        
        # hookup function 발동
        self.model.zero_grad()
        one_hot.backward(retain_graph=True)

    def get_gradient(self):

        output = self.gradients
        self.gradients = []

        return output

In [71]:
class GradCam(ExtractGradient):
    def __init__(self, model, target_layers, device = 'cpu'):

        super().__init__(model, target_layers, device)

    def make_grad_cam_image(self, gradients, image_size=(224,224)):

        grad_cam_image = np.zeros(image_size, dtype=np.float32)

        for cam in gradients:

            cam = cam.cpu().data.numpy().squeeze(axis=0) # [num_features, height, width]
            weights = np.mean(cam, axis=(1,2))

            tmp_grad = np.zeros_like(cam[0,:,:])

            for i, w in enumerate(weights):
                tmp_grad += w * cam[i, :, :]

            tmp_grad = cv2.resize(tmp_grad, (image_size[1],image_size[0]))

            grad_cam_image += tmp_grad
            
        
        grad_cam_image = np.maximum(grad_cam_image, 0)  # like relu
        
        ## normalize
        grad_cam_image = grad_cam_image - np.min(grad_cam_image)
        grad_cam_image = grad_cam_image / (1e-7 + np.max(grad_cam_image)) # prevent from zero division

        return grad_cam_image
    
    def __call__(self, x, target_label=None):

        # forward pass
        prediction = self.forward(x)

        # make gradient
        if target_label is None:
            target_label = np.argmax(prediction.cpu().data.numpy())

        print(f"target label: {target_label}")

        self.backward_gradient(prediction, target_label)

        # get gradient
        gradients = self.get_gradient()

        # make grad cam image
        grad_cam_image = self.make_grad_cam_image(gradients, image_size=IMAGE_SIZE)

        return grad_cam_image, target_label, gradients

In [80]:
import copy # gpu make error, so copying but it doesn't work
grad_cam = GradCam(copy.deepcopy(model), TARGET_LAYER_NAMES, device)

conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
register hook
avgpool
fc


In [81]:
grad_cam_iamge, target_label, gradients = grad_cam(input_tensor, TARGET_CLASS)

target label: 1


RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED
You can try to repro this exception using the following code snippet. If that doesn't trigger the error, please include your original repro script when reporting this issue.

import torch
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.allow_tf32 = True
data = torch.randn([1, 64, 102, 153], dtype=torch.float, device='cuda', requires_grad=True)
net = torch.nn.Conv2d(64, 64, kernel_size=[3, 3], padding=[1, 1], stride=[1, 1], dilation=[1, 1], groups=1)
net = net.cuda().float()
out = net(data)
out.backward(torch.randn_like(out))
torch.cuda.synchronize()

ConvolutionParams 
    data_type = CUDNN_DATA_FLOAT
    padding = [1, 1, 0]
    stride = [1, 1, 0]
    dilation = [1, 1, 0]
    groups = 1
    deterministic = false
    allow_tf32 = true
input: TensorDescriptor 0x7f39a80ecde0
    type = CUDNN_DATA_FLOAT
    nbDims = 4
    dimA = 1, 64, 102, 153, 
    strideA = 998784, 15606, 153, 1, 
output: TensorDescriptor 0x7f39a80e9d30
    type = CUDNN_DATA_FLOAT
    nbDims = 4
    dimA = 1, 64, 102, 153, 
    strideA = 998784, 15606, 153, 1, 
weight: FilterDescriptor 0x7f39a80e36e0
    type = CUDNN_DATA_FLOAT
    tensor_format = CUDNN_TENSOR_NCHW
    nbDims = 4
    dimA = 64, 64, 3, 3, 
Pointer addresses: 
    input: 0xb2c43d800
    output: 0xb320cf600
    weight: 0xb31cb0000
Additional pointer addresses: 
    grad_output: 0xb320cf600
    grad_weight: 0xb31cb0000
Backward filter algorithm: 5


In [74]:
print(f"grad_cam_iamge shape: {grad_cam_iamge.shape}")
print(f"grad_cam_iamge min: {grad_cam_iamge.min()}")
print(f"grad_cam_iamge max: {grad_cam_iamge.max()}")

grad_np = gradients[0].cpu().data.numpy()
print(f"grad_np shape: {grad_np.shape}")
print(f"grad_np min: {grad_np.min()}")
print(f"grad_np max: {grad_np.max()}")

grad_cam_iamge shape: (408, 612)
grad_cam_iamge min: 0.0
grad_cam_iamge max: 0.9996618032455444
grad_np shape: (1, 256, 26, 39)
grad_np min: -0.025650158524513245
grad_np max: 0.027240999042987823


# visualization

### get imagenet labels
### reference: https://discuss.pytorch.org/t/imagenet-classes/4923/2

In [75]:
import json
class_idx = json.load(open("imagenet_class_index.json"))

In [76]:
def show_cam_on_image(img: np.ndarray,
                      mask: np.ndarray,
                      use_rgb: bool = False,
                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
    """ This function overlays the cam mask on the image as an heatmap.
    By default the heatmap is in BGR format.

    :param img: The base image in RGB or BGR format.
    :param mask: The cam mask.
    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
    :param colormap: The OpenCV colormap to be used.
    :returns: The default image with the cam overlay.
    """
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
    if use_rgb:
        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
    heatmap = np.float32(heatmap) / 255

    if np.max(img) > 1:
        raise Exception(
            "The input image should np.float32 in the range [0, 1]")

    cam = heatmap + img
    cam = cam / np.max(cam)
    return np.uint8(heatmap*255), np.uint8(255 * cam)


In [77]:
heatmap, mix_image = show_cam_on_image(rgb_img, grad_cam_iamge)

cv2.imwrite("grad_cam_gray.jpg", np.uint8(grad_cam_iamge*255))
cv2.imwrite("grad_cam_heatmap.jpg", heatmap)
cv2.imwrite("grad_cam_image.jpg", mix_image)

print(f"target label: {target_label}, {class_idx[str(target_label)][1]}")

target label: 1, goldfish


# Guided Backpropagation
### using https://github.com/Caoliangjie/pytorch-gradcam-resnet50/blob/bfbd6d2fa6f6c490eaba1232b226ca4b09fe5fc1/grad-cam.py#L80

In [78]:
class GuidedBackpropReLUModel:
	def __init__(self, model, device):
		self.model = model
		self.model.eval()
		self.model.to(device)

		self.device = device

		for module in self.model.named_modules():
			module[1].register_backward_hook(self.bp_relu)

	def bp_relu(self, module, grad_in, grad_out):
		if isinstance(module, nn.ReLU):
			return (torch.clamp(grad_in[0], min=0.0),)
	def forward(self, input):
		return self.model(input)

	def __call__(self, input, index = None):
		input = input.to(self.device)

		output = self.forward(input)

		if index == None:
			index = np.argmax(output.cpu().data.numpy())
		one_hot = np.zeros((1, output.size()[-1]), dtype = np.float32)
		one_hot[0][index] = 1
		one_hot = torch.from_numpy(one_hot)
		one_hot.requires_grad = True
		one_hot = one_hot.to(self.device)

		one_hot = torch.sum(one_hot * output)
		one_hot.backward(retain_graph=True)
		output = input.grad.cpu().data.numpy()
		output = output[0,:,:,:]

		return output

In [79]:
gb_model = GuidedBackpropReLUModel(model = copy.deepcopy(model), device=device)
gb = gb_model(input_tensor, index=TARGET_CLASS)

weights = np.mean(gb, axis=(1,2))

gb_img = np.zeros_like(gb[0,:,:])

for i, w in enumerate(weights):
    gb_img += w * gb[i, :, :]

## normalize
gb_img = np.maximum(gb_img, 0)
gb_img = gb_img - np.min(gb_img)
gb_img = gb_img / (1e-7 + np.max(gb_img)) # prevent from zero division

cv2.imwrite("guided_backpropagation.jpg", gb_img*255)

RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED
You can try to repro this exception using the following code snippet. If that doesn't trigger the error, please include your original repro script when reporting this issue.

import torch
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.allow_tf32 = True
data = torch.randn([1, 64, 102, 153], dtype=torch.float, device='cuda', requires_grad=True)
net = torch.nn.Conv2d(64, 64, kernel_size=[3, 3], padding=[1, 1], stride=[1, 1], dilation=[1, 1], groups=1)
net = net.cuda().float()
out = net(data)
out.backward(torch.randn_like(out))
torch.cuda.synchronize()

ConvolutionParams 
    data_type = CUDNN_DATA_FLOAT
    padding = [1, 1, 0]
    stride = [1, 1, 0]
    dilation = [1, 1, 0]
    groups = 1
    deterministic = false
    allow_tf32 = true
input: TensorDescriptor 0x7f39a80e07d0
    type = CUDNN_DATA_FLOAT
    nbDims = 4
    dimA = 1, 64, 102, 153, 
    strideA = 998784, 15606, 153, 1, 
output: TensorDescriptor 0x7f39a8038ad0
    type = CUDNN_DATA_FLOAT
    nbDims = 4
    dimA = 1, 64, 102, 153, 
    strideA = 998784, 15606, 153, 1, 
weight: FilterDescriptor 0x7f39a80cbbd0
    type = CUDNN_DATA_FLOAT
    tensor_format = CUDNN_TENSOR_NCHW
    nbDims = 4
    dimA = 64, 64, 3, 3, 
Pointer addresses: 
    input: 0xb1f6dec00
    output: 0xb2552ae00
    weight: 0xb1ca30000
Additional pointer addresses: 
    grad_output: 0xb2552ae00
    grad_weight: 0xb1ca30000
Backward filter algorithm: 5


# Guided Grad-CAM

In [None]:
ggc = grad_cam_iamge * gb_img
ggc = ggc - np.min(ggc)
ggc = ggc / (1e-7 + np.max(ggc)) # prevent from zero division

cv2.imwrite("guided_grad_cam.jpg", ggc*255)