## nn.Module

In [45]:
from torchvision import models

In [46]:
model = models.resnet18(pretrained=True)

In [47]:
dict(model.named_parameters())['fc.weight'].data.shape

torch.Size([1000, 512])

In [48]:
list(model.parameters())[-2].data.shape

torch.Size([1000, 512])

In [50]:
model._modules['layer4']._modules['0']

BasicBlock(
  (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (downsample): Sequential(
    (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

## Class Activation Map

In [11]:
import cv2

def preprocess(image, image_size=(224, 224), device='cpu'):
    sample = cv2.resize(image, dsize=image_size)
    sample = torch.from_numpy(sample).to(device).to(torch.float)
    sample = sample.unsqueeze(dim=0).permute(0, 3, 1, 2)
    sample = (sample - sample.mean()) / sample.std()

    return image, sample

In [9]:
weight_path = 'weights/hole_detection/transformer_resnet18/2103191437/best_model_83_loss=-0.1259.pt'
device = 'cpu'
num_classes = 2

model = TransformerResnet18(num_classes)
model.load_state_dict(torch.load(weight_path, map_location='cpu'))
model.to(device)
model = model.eval()

In [24]:
def get_feature_map(model, sample, last_layer_name='self_attention'):
    # without transformer
#     feature_map = model._modules[last_layer_name](sample)  # [1, 512, 7, 7]
    
    # with transformer
    feature_map = model._modules['resnet18_conv'](sample)
    B, C, H, W = feature_map.shape
    feature_map = feature_map.reshape(B, C, H * W).permute(2, 0, 1).contiguous()
    feature_map = model._modules[last_layer_name](feature_map)  # [49, 1, 512]
    feature_map = feature_map.permute(1, 2, 0).contiguous().reshape(B, C, H, W)  # [1, 512, 7, 7]

    return feature_map

In [25]:
image = cv2.imread('../../ID_CARD/hole_classification/dataset_update/test/hole/CMQD_A/2_05003827_GTTT-page0.jpg')

In [26]:
image, sample = preprocess(image)
print(sample.shape)

torch.Size([1, 3, 224, 224])


In [27]:
feature_map = get_feature_map(model, sample)
print(feature_map.shape)

torch.Size([1, 512, 7, 7])


In [141]:
# get weight of fully connected layers
fc_classes_weights = list(model.parameters())[-2]
print(fc_classes_weights.shape)  # [n_classes, feature_map_channels]

torch.Size([2, 512])


In [142]:
# class prediction
with torch.no_grad():
    preds = model(sample)

pred = preds.softmax(dim=1).squeeze(dim=0)
class_idx = pred.argmax().item()
class_name = classes[class_idx]
class_score = pred[class_idx].item()
print(class_name, class_score)

hole 0.9997307658195496


In [143]:
# get weight of predicted class
fc_class_weights = fc_classes_weights[class_idx]

In [144]:
B, C, H, W = feature_map.shape
feature_map = feature_map.reshape(C, H * W)
CAM = torch.matmul(fc_class_weights, feature_map)
CAM = CAM.reshape(H, W)
print(CAM.shape)

torch.Size([7, 7])


In [145]:
CAM = (CAM - CAM.min()) / (CAM.max() - CAM.min())
CAM = (CAM * 255).to(torch.uint8).cpu().detach().numpy()
print(CAM)

[[235 197 204 221 238 225 243]
 [240 175 195 205 230 210 233]
 [232 202 216 235 246 255 253]
 [170  77 121 171 241 236 248]
 [123  55  79 151 213 200 225]
 [ 81   0   8  83 175 171 216]
 [109  42  50 132 185 179 207]]


In [146]:
CAM = cv2.resize(CAM, dsize=(image.shape[1], image.shape[0]))
print(CAM.shape)

(492, 733)


In [147]:
CAM_heatmap = cv2.applyColorMap(CAM, cv2.COLORMAP_JET)

In [149]:
heatmap = (CAM_heatmap * 0.5 + image * 0.5).astype(np.uint8)
cv2.imshow('heatmap', heatmap)
cv2.waitKey()
cv2.destroyAllWindows()