# Obtain importance score estimates

With this notebook we obtain the importance scores for all the considered dataset and importance estimators. Results are stored in the estimators/ folder.

This notebook assumes that the models are already trained and with the weights stored in the weights/ folder.

First choose for which model you want to obtain the importance scores:

In [1]:
models = {
        0: "cifar",
        1: "food101",
        2: "imgnet"
}
model_name = models[0]

### Imports

In [2]:
from random import randrange
import torch
import seaborn as sns
from matplotlib_inline.backend_inline import set_matplotlib_formats
import saliency.core as saliency
import os
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pytorch_lightning import LightningModule, Trainer
import pickle
import glob
import scipy
from sklearn.model_selection import train_test_split

data_dir = os.environ.get("PATH_DATASETS", ".")

In [2]:
%load_ext autoreload
%autoreload 2
from lightning_models.model_cifar_resnet import CIFAR_ResNet
from lightning_models.model_imagenet import ImgNet_ResNet
from lightning_models.model_food101 import Food101_ResNet

from torchvision.datasets import CIFAR10
from torchvision.datasets import ImageNet
from torchvision.datasets import Food101

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Global seed set to 7
Global seed set to 7


### Define evaluation datasets

In [4]:
device = 'cuda:0'

In [5]:
transform3d = transforms.Compose(
    [
        transforms.ToTensor(),
        # normalizes images to [-1,1]
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

transform_imgnet = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.ConvertImageDtype(torch.float),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ]
)

transform_food101 = transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize((0.561, 0.440, 0.312), (0.252, 0.256, 0.259)),
        ]
)

cifar = CIFAR10('./data', train=False, transform=transform3d, download=True)
imgnet = ImageNet(root='/home/lbrocki/AugmentData/data', 
                  split='val', 
                  transform=transform_imgnet)
food101 = Food101("/home/lbrocki/AugmentData/data/", split="test", transform=transform_food101)

Files already downloaded and verified


In [6]:
# load the same images that were used in validation during training
imgnet_test_indices = np.load('test_idx_val_imgnet.npy')

FileNotFoundError: [Errno 2] No such file or directory: 'test_idx_val_imgnet.npy'

In [7]:
# define subset for food101 that is used for evaluations
_, food_test_indices = train_test_split(
    np.arange(0,len(food101)), 
    test_size=5000, random_state=42
)

In [8]:
imgnet_test = torch.utils.data.Subset(imgnet, imgnet_test_indices)
food101_test = torch.utils.data.Subset(food101, food_test_indices)

NameError: name 'imgnet_test_indices' is not defined

In [9]:
if(model_name == "cifar"):
    data = cifar
elif(model_name == "food101"):
    data = food101_test
elif(model_name == "imgnet"):
    data = imgnet_test
else:
    print("model name error")

In [10]:
# transform to np array to be compatible with pip saliency
imgs = np.stack([np.array(i[0]) for i in data])
labels = [i[1] for i in data]

### Set up importance estimators using the pip saliency package

In [11]:
def preprocess(img):
    img = torch.tensor(img, dtype=torch.float32)
    img = img.to(device)
    img = img.requires_grad_(True)
    return img

In [12]:
class_idx_str = 'class_idx_str'
def call_model_function(images, call_model_args=None, expected_keys=None):
    target_class_idx = call_model_args[class_idx_str]
    images = preprocess(images)
    # use pre-softmax layer for creation of saliency maps
    output = model.model(images)
    if saliency.base.INPUT_OUTPUT_GRADIENTS in expected_keys:
        outputs = output[:,target_class_idx]
        grads = torch.autograd.grad(outputs, images, grad_outputs=torch.ones_like(outputs))
        gradients = grads[0].cpu().detach().numpy()
        return {saliency.base.INPUT_OUTPUT_GRADIENTS: gradients}

In [13]:
gradient_saliency = saliency.GradientSaliency()
ig = saliency.IntegratedGradients()

### Apply importance estimators on the dataset

In [14]:
def setup(model_path):
    if(model_name == "cifar"):
        model = CIFAR_ResNet().to(device)
    elif(model_name == "food101"):
        model = Food101_ResNet().to(device)
    elif(model_name == "imgnet"):
        model = ImgNet_ResNet().to(device)
    else:
        print("model name error")
        
    model.load_state_dict(torch.load(f"weights/{model_path}"))
    
    model.eval()

    estimator_path = 'estimators/'+model_path+'/' 

    if(random_label):
        str_rand = '_rl'
    else:
        str_rand = ''
    #create directories to save importance estimators, if they don't already exist
    for e in estimators:
        try:
            os.makedirs(estimator_path + e + str_rand)
        except:
            continue
    
    return model, estimator_path

In [17]:
def create_estimators(imgs, labels, estimator_path, model):
    model = model
    shape = imgs.shape[1:]
    baseline = np.zeros(shape)
    estim_dict = {}
    num = 10
    for i, (img, label) in tqdm(enumerate(zip(imgs[:num], labels[:num])),total=len(imgs[:num])):
            #use predicted class as label
            predicted_label = model(torch.tensor(img).unsqueeze(0).to(device)).argmax()
            call_model_args = {class_idx_str: predicted_label}
            
            if('vanilla' in estimators):
                vanilla = gradient_saliency.GetMask(img, call_model_function, call_model_args)
                estim_dict = {'vanilla': vanilla}

            if('smooth_sq' in estimators):
                smooth_sq = gradient_saliency.GetSmoothedMask(img, 
                                                              call_model_function, 
                                                              call_model_args, 
                                                              nsamples=50, 
                                                              magnitude=True)
                estim_dict['smooth_sq'] = smooth_sq
            if('smooth' in estimators):
                smooth = gradient_saliency.GetSmoothedMask(img, 
                                                           call_model_function, 
                                                           call_model_args, 
                                                           nsamples=50, 
                                                           magnitude=False)
                estim_dict['smooth'] = smooth

            if('intgrad' in estimators):
                intgrad = ig.GetMask(img, 
                             call_model_function, 
                             call_model_args, 
                             x_steps=200, 
                             x_baseline=baseline, 
                             batch_size=200)
                estim_dict['intgrad'] = intgrad
                            
            for e in estimators:
                savepath = estimator_path+e+'/img'+str(i).zfill(5)+'.npy'
                np.save(savepath, estim_dict[e])

In [18]:
# choose estimators to create importance scores for
# estimators = ["intgrad", "vanilla",  "smooth", "smooth_sq"]
estimators = ["intgrad"]

perturb = ["rand", "rect", "none"]
model_paths = [f"{model_name}/{p}.pt" for p in perturb]
    
random_label=False
for model_path in model_paths:
    model, estim_path = setup(model_path)
    create_estimators(imgs, labels, estim_path, model)

100%|██████████| 10/10 [00:00<00:00, 17.91it/s]
100%|██████████| 10/10 [00:00<00:00, 18.62it/s]
100%|██████████| 10/10 [00:00<00:00, 18.91it/s]
