In [3]:
import torch
from torch.autograd import Variable
import torchvision
import torchvision.transforms as T  # 对PIL.Image进行变换
from torchsummary import summary
import random
import os

import numpy as np
from scipy.ndimage.filters import gaussian_filter1d
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


In [4]:
def load_imagenet_val(num=None):
    """Load a handful of validation images from ImageNet.
    Inputs:
    - num: Number of images to load (max of 25)
    Returns:
    - X: numpy array with shape [num, 224, 224, 3]
    - y: numpy array of integer image labels, shape [num]
    - class_names: dict mapping integer label to class name
    """
    imagenet_fn = './imagenet_val_25.npz'
    f = np.load(imagenet_fn,  allow_pickle=True)   # Allow loading pickled object arrays stored in npy files
    X = f['X']
    y = f['y']
    class_names = f['label_map'].item()
    if num is not None:
        X = X[:num]
        y = y[:num]
    return X, y, class_names

In [5]:
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
    
def preprocess(img, size=224):
    """
    因为transform的function都是针对PIL.Image.Image 所以在这里img需要的输入为PIL.Image.Image类型,从Image.fromarray(x)得到
    """
    transform = T.Compose([                
        T.Resize(size),                    
        T.ToTensor(),                                                                
        T.Normalize(mean = mean,std = std),      
        T.Lambda(lambda x: x[None]),                                               
    ])
    return transform(img)


In [7]:
# read pre-train model
model = torchvision.models.vgg16_bn(pretrained=True)  # last layer linear(fc) , pretrained: If True, returns a model pre-trained on ImageNet
# model = torchvision.models.alexnet(pretrained=True)   # last layer linear(fc) 
# model = torchvision.models.squeezenet1_1(pretrained=True)   # AdaptiveAvgPool2d 


# seeting the model parameters as can not trained status
for param in model.parameters():
    param.requires_grad = False

summary(model, (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
       BatchNorm2d-2         [-1, 64, 224, 224]             128
              ReLU-3         [-1, 64, 224, 224]               0
            Conv2d-4         [-1, 64, 224, 224]          36,928
       BatchNorm2d-5         [-1, 64, 224, 224]             128
              ReLU-6         [-1, 64, 224, 224]               0
         MaxPool2d-7         [-1, 64, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]          73,856
       BatchNorm2d-9        [-1, 128, 112, 112]             256
             ReLU-10        [-1, 128, 112, 112]               0
           Conv2d-11        [-1, 128, 112, 112]         147,584
      BatchNorm2d-12        [-1, 128, 112, 112]             256
             ReLU-13        [-1, 128, 112, 112]               0
        MaxPool2d-14          [-1, 128,

In [8]:
def compute_saliency_maps(X, y, model):
    """
    Compute a class saliency map using the model for images X and labels y.

    Input:
    - X: Input images; Tensor of shape (N, 3, H, W)
    - y: Labels for X; LongTensor of shape (N,)
    - model: A pretrained CNN that will be used to compute the saliency map.

    Returns:
    - saliency: A Tensor of shape (N, H, W) giving the saliency maps for the input
    images.
    """
    # make sure the model is 'test' mode
    model.eval()
    X_var = Variable(X,requires_grad=True)
    y_var = Variable(y)                # make the tensor into Variable
    salicency = None
    
    # forward pass
    score = model(X_var)

    # Get the correct class computed scores. 得到label对应的score
    # view 类似于numpy的reshape, 如果不确定要多少行,但是确定列数, 可以设定为(-1,1)这意味着确定列数为1
    # torch.gather, 沿着axis=1的方向, 取index为y_var.view(-1, 1)的数
    scores = score.gather(1, y_var.view(-1, 1)).squeeze()  

    # Backward pass, need to supply initial gradients of same tensor shape as scores.
    scores.backward(torch.FloatTensor([1.0,1.0,1.0,1.0,1.0]))
    
    # Get gradient for image.
    saliency = X_var.grad.data
    
    # Convert from 3d to 1d.
    saliency = saliency.abs()
    saliency, i = torch.max(saliency,dim=1)
#     saliency = saliency.squeeze() 
    return saliency

In [10]:
def show_saliency_maps(X,y):
    
    # torch.cat: Concatenates the given sequence of seq tensors in the given dimension
    # dim:  the dimension over which the tensors are concatenated
    X_tensor = torch.cat([preprocess(Image.fromarray(x)) for x in X], dim=0).cuda()
    y_tensor = torch.LongTensor(y).cuda()
    
    # Compute saliency maps for images in X
    saliency = compute_saliency_maps(X_tensor, y_tensor, model)
    
    # Convert the saliency map from Torch Tensor to numpy array and show images
    # and saliency maps together.
    saliency = saliency.cpu().numpy()
    N = X.shape[0]
    
    for i in range(N):
        plt.subplot(2, N, i + 1)
        plt.imshow(X[i])
        plt.axis('off')
        plt.title(class_names[y[i]])
        plt.subplot(2, N, N + i + 1)
        plt.imshow(saliency[i], cmap=plt.cm.hot)
        plt.axis('off')
        plt.gcf().set_size_inches(12, 5)
        
X, y, class_names = load_imagenet_val(num=5)       
show_saliency_maps(X,y)

FileNotFoundError: [Errno 2] No such file or directory: './imagenet_val_25.npz'