In [1]:
from __future__ import print_function, division

import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from models.models import SqueezeNet, ResNet
from matplotlib.image import imread
from PIL import Image

import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
import matplotlib.pyplot as plt
import time
import pandas as pd
import os
from pathlib import Path
import copy
from utils import get_dataloaders, get_default_parser, load_sketchy_images, get_loss_fn
import cv2



In [2]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = ResNet()
model.to(device)
model.load_state_dict(torch.load("ckpts/resnet_classifier_lr1e-2_wd0/531838_3.3481321117504925"))
# model.load_state_dict(torch.load("ckpts/resnet_binary_classifier_wd5e-3_lr1e-4_epoch5-10_alpha.5_bs20/531735_1000.5626831054688"))
model.eval()

ResNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (

In [3]:
features = []

def hook_feature(module, input_, output):
    features.append(output.data.cpu().numpy())

model._modules.get('features')[7][1].register_forward_hook(hook_feature)


<torch.utils.hooks.RemovableHandle at 0x7f8cf79e8cf8>

In [4]:
params = list(model.parameters())

In [5]:
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())

In [6]:
def returnCAM(feature_conv, weight_softmax, class_idx):
    # generate the class activation maps upsample to 256x256
    size_upsample = (256, 256)
    b, c, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        cam = weight_softmax[idx].dot(feature_conv.reshape((c, h * w)))
        cam = cam.reshape(h, w)
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
    return output_cam

normalize = T.Normalize(
   mean=[0, 0, 0],
   std=[1, 1, 1]
)
preprocess = T.Compose([
   T.Resize((256,256)),
   T.ToTensor(),
   normalize
])

In [7]:
PREFIX = "/home/robincheong/data/sketchy"


train_set = pd.read_csv(os.path.join(PREFIX, 'valset.csv'))

In [8]:
classes = {}
for i, row in train_set.iterrows():
    classes[row['Label']] = row['Photo Path'].split('/')[7]

In [9]:
train_list = list(train_set.iterrows())

In [10]:
ex_photo = train_list[232][1]['Photo Path']
ex_sketch = train_list[232][1]['Sketch Path']
cat = train_list[232][1]['Photo Path'].split('/')[7]

In [11]:
photo = preprocess(Image.open(ex_photo))
photo = photo.unsqueeze(0)
logit = model(photo.to(device))

h_x = F.softmax(logit, dim=1).cpu().data.squeeze()
probs, idx = h_x.sort(0, True)
probs = probs.numpy()
idx = idx.numpy()


# output the prediction
for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

1.000 -> rifle
0.000 -> guitar
0.000 -> sword
0.000 -> violin
0.000 -> racket


In [12]:
# generate class activation mapping for the top1 prediction
CAMs = returnCAM(features[0], weight_softmax, [idx[0]])

# render the CAM and output
print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
img = cv2.imread(str(ex_photo))
height, width, _ = img.shape
heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
result = heatmap * 0.3 + img * 0.5
cv2.imwrite('cams/photo_{}_untrained.jpg'.format(cat), result)

output CAM.jpg for the top1 prediction: rifle


True

In [13]:
sketch = preprocess(Image.open(ex_sketch))
sketch = sketch.unsqueeze(0)
logit = model(sketch.to(device))

h_x = F.softmax(logit, dim=1).cpu().data.squeeze()
probs, idx = h_x.sort(0, True)
probs = probs.numpy()
idx = idx.numpy()


# output the prediction
for i in range(0, 5):
    print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))

0.982 -> rifle
0.009 -> crocodilian
0.002 -> rocket
0.001 -> knife
0.001 -> fish


In [14]:
# generate class activation mapping for the top1 prediction
CAMs = returnCAM(features[1], weight_softmax, [idx[0]])
print(len(features))
# render the CAM and output
print('output CAM.jpg for the top1 prediction: %s'%classes[idx[0]])
img = cv2.imread(str(ex_sketch))
height, width, _ = img.shape
heatmap = cv2.applyColorMap(cv2.resize(CAMs[0],(width, height)), cv2.COLORMAP_JET)
result = heatmap * 0.3 + img * 0.5
cv2.imwrite('cams/sketch_{}_untrained.jpg'.format(cat), result)

2
output CAM.jpg for the top1 prediction: rifle


True