In [1]:
! pip install opencv-python
! pip install ftfy regex tqdm
! pip install streamlit
! pip install torchray
! pip install torchvision
!pip install ttach

Collecting opencv-python
  Downloading opencv_python-4.5.5.62-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (60.4 MB)
[K     |████████████████████████████████| 60.4 MB 6.1 MB/s eta 0:00:01
Installing collected packages: opencv-python
Successfully installed opencv-python-4.5.5.62
Collecting streamlit
  Downloading streamlit-1.3.1-py2.py3-none-any.whl (9.2 MB)
[K     |████████████████████████████████| 9.2 MB 5.6 MB/s eta 0:00:01
Collecting astor
  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Collecting pyarrow
  Downloading pyarrow-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25.6 MB)
[K     |████████████████████████████████| 25.6 MB 81.5 MB/s eta 0:00:01
[?25hCollecting validators
  Downloading validators-0.18.2-py3-none-any.whl (19 kB)
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.7.1-py2.py3-none-any.whl (4.3 MB)
[K     |████████████████████████████████| 4.3 MB 74.9 MB/s eta 0:00:01
[?25hCollecting blinker
  Downloading blinker-1.4.tar

In [2]:
import os
import sys
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"
sys.path.append("/scratch/students/2021-fall-sp-mamooler/sem_project_IVRL/code/")

import torch
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
import numpy as np
import cv2
import json
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from model import build_model
from clip_ import load, tokenize, similarity_score
from vit_cam import interpret_vit, interpret_vit_overlapped, vit_relevance
from rn_cam import interpret_rn, interpret_rn_overlapped, rn_relevance
sys.path.append("/scratch/students/2021-fall-sp-mamooler/sem_project_IVRL/code/pytorch-grad-cam")
from pytorch_grad_cam import GradCAM, \
    ScoreCAM, \
    GradCAMPlusPlus, \
    AblationCAM, \
    XGradCAM, \
    EigenCAM, \
    EigenGradCAM, \
    LayerCAM, \
    FullGrad

from pickle_loader import Pickle_data_loader

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
# can't use RN50 cause the memotions embeddings have dimension 512 and RN50 expects 1028
model_rn, preprocess = load("RN101", device=device, jit=False)
model_vit, preprocess = load("ViT-B/32", device=device, jit=False)

In [7]:
model_types = ["ViT", "RN101"] 

font = {
    'size': 15,
}

ori_preprocess = Compose([
        Resize((224), interpolation=Image.BICUBIC),
        CenterCrop(size=(224, 224)),
        ToTensor()])

emotion_dict = json.load(open("ARTEMIS_EMOTIONS.json"))
artemis_labels = emotion_dict.keys()

ARTEMIS_EMOTIONS = { 'amusement':0,
                     'awe':1,
                     'contentment':2,
                     'excitement':3,
                     'anger':4,
                     'disgust':5,
                     'fear':6,
                     'sadness':7,
                     'something else':8}

root = "../../Images/"
res_dir = "../../results/artemis_emotions/"

if not os.path.exists(res_dir):
    os.mkdir(res_dir)

# Saliency visualization for Artemis images 

In [8]:
image_loader = Pickle_data_loader(root+"wikiart/", 40)
for i,batch in enumerate(image_loader):
    if i==0:
        image_list = batch["image"]
        label_list = batch["label"]
    else:
        image_list = torch.cat((image_list, batch["image"]), dim=0)
        label_list = torch.cat((label_list, batch["label"]), dim=0)

In [22]:
pp = PdfPages(res_dir+'artemis.pdf')

def tensor2img(tensor):
    tensor = tensor.detach().cpu()
    c = tensor-tensor.min()
    c /= c.max()
    return c.permute(1,2,0)

for id_, image in enumerate(image_list):
    
    image = image.unsqueeze(0).to(device)
    label_dict = {}
    for key, val in ARTEMIS_EMOTIONS.items():
        label_dict[key] = round(float(label_list[id_][val].item()), 4)

    for neg in [False]:
         
        for model_id, model_type in enumerate(model_types):
            

            plt.figure(figsize=(16, 16))
            plt.tight_layout()
            plt.subplot(1,10,1)
            plt.imshow(tensor2img(image[0]))
            plt.axis('off')
            caption = ""
            for key, val in label_dict.items():
                if val>0:
                    caption+=f"{key}:{round(float(val), 4)}\n"

            if neg==None: title = "\n overlapped  \n blue:neg, red: pos"
            elif neg==True: title = "\n negative \n blue: high, red: low"
            else: title = "\n positive \n blue:low, red:high"
            plt.title(model_type+title, **font, y=-0.15)


            subplot = 2
            scores = []

            for emotion, emotion_feature in emotion_dict.items():

                emotion_feature = torch.HalfTensor(emotion_feature).unsqueeze(dim=0).to(device)

                plt.subplot(1,10, subplot)
                subplot+=1

                if model_type=="ViT":
                    score = similarity_score(model_vit, image, emotion_feature)
                    if not neg==None:

                        interpret_vit(image.type(model_vit.dtype), emotion_feature, model_vit.visual, device, neg_saliency=neg)
                    else:
                        interpret_vit_overlapped(image.type(model_vit.dtype), emotion_feature, model_vit.visual, device)

                else:
                    score = similarity_score(model_rn, image, emotion_feature)
                    if not neg==None:
                        interpret_rn(image.type(model_rn.dtype), emotion_feature, model_rn.visual, GradCAM, device, neg_saliency=neg)
                    else:
                        interpret_rn_overlapped(image.type(model_rn.dtype), emotion_feature, model_rn.visual, GradCAM, device)

                scores.append(score.item())

                plt.axis('off')
                plt.title(emotion, **font,y=-0.15)
                plt.tight_layout()

            scores = torch.tensor(scores)
            scores = torch.nn.functional.softmax(scores)
            preds = {}
            for key, val in ARTEMIS_EMOTIONS.items():
                preds[key] = round(float(scores[val].item()), 4) 

            plt.figtext(0.5, 0.1, f'ground truth:{label_dict}\npreds:{preds}', wrap=True, verticalalignment='top', horizontalalignment='center', fontsize=10)
            pp.savefig()
        
pp.close()

torch.Size([40, 3, 224, 224])
torch.Size([40, 9])
Compose(
    Resize(size=224, interpolation=PIL.Image.BICUBIC)
    CenterCrop(size=(224, 224))
    <function _transform.<locals>.<lambda> at 0x7f752018cb00>
    ToTensor()
    Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
)


# Attention mean and variance for each emotion

In [16]:
image_loader = Pickle_data_loader(root+"wikiart/", 1000)
for i,batch in enumerate(image_loader):
    if i==0:
        image_list = batch["image"]
        label_list = batch["label"]
    else:
        image_list = torch.cat((image_list, batch["image"]), dim=0)
        label_list = torch.cat((label_list, batch["label"]), dim=0)

In [None]:
pp = PdfPages(res_dir+'artemis_bias.pdf')
for emotion, emotion_features in emotion_dict.items():
    
    emotion_features = torch.HalfTensor(emotion_features).unsqueeze(dim=0).to(device)
    var = []
    
    plt.figure(figsize=(16, 16)) 
    for model_id, model_type in enumerate(model_types):
        
        for id_, image in enumerate(image_list):

            image = image.unsqueeze(0).to(device)

            label_dict = {}
            for key, val in ARTEMIS_EMOTIONS.items():
                label_dict[key] = round(float(label_list[id_][val].item()), 4)

            
            if model_type=="ViT":
                relevance, _ = vit_relevance(image.type(model_vit.dtype), emotion_features, model_vit.visual, device)

            else:
                relevance, _ = rn_relevance(image.type(model_rn.dtype), emotion_features, model_rn.visual, GradCAM, device)

            relevance = np.expand_dims(relevance, axis=0)
           

            if id_ == 0:
                attentions = relevance
            else:
                attentions = np.concatenate((attentions, relevance), axis=0)


        mean = np.mean(attentions, axis=0)
        mean = np.float32(mean/np.max(mean))

        
        plt.subplot(1,2,1+model_id)
        plt.imshow(mean)
        plt.axis('off')
        plt.tight_layout()
        plt.title(emotion +" "+ model_type +" attention mean", **font,y=-0.10)
        
    plt.savefig(res_dir+emotion+'_mean.png')
    pp.savefig()
    
pp.close()

  "See the documentation of nn.Upsample for details.".format(mode))
