In [21]:
import torch
import torchvision
import os
import sys
import time
import cv2
import numpy as np
import pytorch_grad_cam
import pickle
import json
import json
%matplotlib inline
import matplotlib.pyplot as plt
from metric import *

In [59]:

def cosine_similarity(gt, mask):
    mask_flattened = mask.flatten()
    gt_flattened = gt.flatten()
    cosine = 1 - distance.cosine(mask_flattened,gt_flattened)
    return cosine


def load_RGB(img_path):
    img = cv2.cvtColor(cv2.imread(img_path, cv2.IMREAD_UNCHANGED),cv2.COLOR_BGR2RGB)
    return img
def load_gray(img_path):
    return cv2.imread(img_path,cv2.IMREAD_UNCHANGED)
def get_images(dataset, video_index, image_index):
    
    video_name = str(video_index)
    if video_index <= 9:
        video_name = "0" + str(video_index)
    video_directory = os.path.join(dataset,video_name)
    image_directory = os.path.join(video_directory, str(image_index))
    mask = 255 * np.load(os.path.join(image_directory,"mask.npy"),allow_pickle = True)
    mask = mask.astype(np.uint8)
    final = load_RGB(os.path.join(image_directory,"final.jpg"))
    box = load_RGB(os.path.join(image_directory,"box.jpg"))
    garmin_directory = os.path.join(video_directory, "garmin")
    saliency_directory = os.path.join(video_directory, "saliency")
    #print(dataset, video_directory, image_directory, garmin_directory, saliency_directory)
    garmin_image = load_RGB(os.path.join(garmin_directory,str(image_index) + ".jpg"))
    saliency_image = cv2.cvtColor(cv2.imread(os.path.join(saliency_directory,str(image_index) + ".jpg"),cv2.IMREAD_UNCHANGED),cv2.COLOR_BGR2GRAY)
    #print(saliency_image.shape)
    #print(mask.shape)
    #print(mask.min(),mask.max())
    #mask = mask / mask.max()
    #print(mask.dtype)
    #mask = mask.astype(np.float32)
    #mask = saliency_image.astype(np.float32) / 255
    #mask = saliency_image.astype(np.float32)
    #print(saliency_image.max(),saliency_image.min())
    return final,box,mask,garmin_image,saliency_image
def show_images(images):
    d = len(images) // 2
    if len(images) % 2 == 1:
        d += 1
    fig = plt.figure(figsize = (40,40))
    cnt = 1
    for i in range(len(images)):
        #print(d,2,cnt)
        
        fig.add_subplot(d,2,cnt)
        cnt += 1
        img = images[i]
        print(img.shape)
        print(img.max(),img.min())
        if len(img.shape) == 2:
            plt.imshow(img,cmap = 'gray')
        else:
            plt.imshow(img)
            
def combine_images(images):
    new_images = []
    for im in images:
        print(im.max(),im.min(),im.dtype)
        if im.shape[0] == 540:
            new_im = cv2.resize(im,dsize = None,fx = 0.5, fy = 0.5)
            new_images.append(new_im)
        else:
            new_images.append(im)
    final_image = np.zeros((new_images[0].shape[0],5 * new_images[0].shape[1] + 250,3),dtype = np.uint8)
    final_image += 255
    cnt = 0
    (l,b) = new_images[0].shape[0], new_images[0].shape[1]
    for im in new_images:
        if len(im.shape) == 2:
            im = cv2.cvtColor(im,cv2.COLOR_GRAY2RGB)
        final_image[0:l,cnt * (b + 50) : (cnt + 1) * b + cnt * 50,:] = im
        cnt += 1
    return final_image
        
    
            
def generate_image(video_index,img_index):
    dataset_directory = "./../DREYEVE_DATA_OUTPUT/"
    final,box,mask,garmin_image,saliency_image = get_images(dataset_directory,video_index,img_index)
    final_image = combine_images([final,box,mask,garmin_image,saliency_image])
    #plt.imshow(final_image)
    return final_image
    
def save_RGB(path,img):
    cv2.imwrite(path,cv2.cvtColor(img,cv2.COLOR_RGB2BGR))
    

In [75]:
#each video folder has info data file 
#data file is indexed with image index for example {0 : obj, 5 : obj , 10 : obj}
#each obj has index and size, if size = 0, then it is empty, otherwise we have it has obj['objects'] where object[3] = contains details of each obhect
dataset_path = "./../DREYEVE_DATA_OUTPUT"
def generate_all_relevant_image_paths(video_index):
    video_name = str(video_index)
    if video_index < 10:
        video_name = "0" + video_name
    paths = []
    video_path = os.path.join(dataset_path, video_name)
    dict_path = os.path.join(video_path,"info_data.json")
    #print(dict_path)
    f = open(dict_path,"r")
    data = json.load(f)
    for index in data.keys():
        if data[index]["size"] > 0 and int(index) < 7500 :
            mask_path = os.path.join(os.path.join(video_path,index),"mask.npy")
            gt_path = os.path.join(os.path.join(video_path,"saliency"),str(index) + ".jpg")
            #paths.append([mask_path,gt_path])
            classes = []
            objects = data[index]['objects']
            for cnt in objects.keys():
                obj_cls = objects[cnt]['class']
                classes.append(obj_cls)
            paths.append([mask_path,gt_path,classes])
    return paths
def compute_metrics(paths,class_data):
    similarity = 0
    cnt = 0 
    for path in paths:
        mask_path = path[0]
        gt_path = path[1]
        classes = path[2]
        mask = np.load(mask_path,allow_pickle = True)
        #print(gt_path)
        gt = cv2.imread(gt_path,cv2.IMREAD_UNCHANGED)
        #resize to half
        #print(gt.shape)
        gt = cv2.resize(gt, dsize = None , fx = 0.5, fy = 0.5)
        #change to correct data format float32
        gt = gt.astype(np.float32)
        #normalize
        gt = gt / 255.0
        #convert to grayscale 
        gt = cv2.cvtColor(gt, cv2.COLOR_BGR2GRAY)
        cosine_sim = cosine_similarity(mask,gt)
        similarity = similarity + cosine_sim
        cnt += 1
        #removing duplicates 
        orig = {}
        new_classes = []
        for clas in classes :
            if clas not in orig.keys():
                orig[clas] = 1
                new_classes.append(clas)
        classes = new_classes
        for clas in classes:
            if clas not in class_data.keys():
                class_data[clas] = {'distance' : 0.0 , 'cnt' : 0}
                class_data[clas]['distance'] = cosine_sim
                class_data[clas]['cnt'] = 1
            else:
                class_data[clas]['distance'] += cosine_sim
                class_data[clas]['cnt'] += 1
                
        #print(cosine_sim)
    #print("done")
    print(cnt)
    return class_data , similarity / (cnt + 0.001), cnt

        
        
    

In [76]:
class_data = {}
for i in range(1,21):
    paths = generate_all_relevant_image_paths(i)
    class_data , similarity , cnt = compute_metrics(paths, class_data)
    print("Video Index : {0}  Similarity : {1}   Count : {2}".format(i, similarity, cnt))
for clas in class_data.keys():
    avg_similarity = class_data[clas]['distance'] / class_data[clas]['cnt']
    print("class : {0}   average similarity : {1} number of frames : {2} ".format(clas, avg_similarity, class_data[clas]['cnt']))

383
Video Index : 1  Similarity : 0.08171698864513298   Count : 383
704
Video Index : 2  Similarity : 0.04092719086055858   Count : 704
750
Video Index : 3  Similarity : 0.07168294688028039   Count : 750
669
Video Index : 4  Similarity : 0.1478240183300658   Count : 669
410
Video Index : 5  Similarity : 0.03234396465798317   Count : 410
750
Video Index : 6  Similarity : 0.12071778733429155   Count : 750
749
Video Index : 7  Similarity : 0.04705952011568003   Count : 749
481
Video Index : 8  Similarity : 0.047783176741826895   Count : 481
369
Video Index : 9  Similarity : 0.09820599672958094   Count : 369
750
Video Index : 10  Similarity : 0.07320889107224644   Count : 750
750
Video Index : 11  Similarity : 0.06075114894907288   Count : 750
648
Video Index : 12  Similarity : 0.12195357452806325   Count : 648
460
Video Index : 13  Similarity : 0.09389401106003642   Count : 460
733
Video Index : 14  Similarity : 0.06446290630713913   Count : 733
232
Video Index : 15  Similarity : 0.066047

In [71]:
class_data = {}
for i in range(1,21):
    paths = generate_all_relevant_image_paths(i)
    class_data , similarity , cnt = compute_metrics(paths, class_data)
    print("Video Index : {0}  Similarity : {1}   Count : {2}".format(i, similarity, cnt))



383
Video Index : 1  Similarity : 0.08171698864513298   Count : 383
704
Video Index : 2  Similarity : 0.04092719086055858   Count : 704
750
Video Index : 3  Similarity : 0.07168294688028039   Count : 750
669
Video Index : 4  Similarity : 0.1478240183300658   Count : 669
410
Video Index : 5  Similarity : 0.03234396465798317   Count : 410
750
Video Index : 6  Similarity : 0.12071778733429155   Count : 750
749
Video Index : 7  Similarity : 0.04705952011568003   Count : 749
481
Video Index : 8  Similarity : 0.047783176741826895   Count : 481
369
Video Index : 9  Similarity : 0.09820599672958094   Count : 369
750
Video Index : 10  Similarity : 0.07320889107224644   Count : 750
750
Video Index : 11  Similarity : 0.06075114894907288   Count : 750
648
Video Index : 12  Similarity : 0.12195357452806325   Count : 648
460
Video Index : 13  Similarity : 0.09389401106003642   Count : 460
733
Video Index : 14  Similarity : 0.06446290630713913   Count : 733
232
Video Index : 15  Similarity : 0.066047

In [73]:
for clas in class_data.keys():
    avg_similarity = class_data[clas]['distance'] / class_data[clas]['cnt']
    print("class : {0}   average similarity : {1} number of frames : {2} ".format(clas, avg_similarity, class_data[clas]['cnt']))

class : airplane   average similarity : 0.07018202177451013 number of frames : 164 
class : bench   average similarity : 0.0709808556741338 number of frames : 185 
class : traffic light   average similarity : 0.1024950851796641 number of frames : 7189 
class : car   average similarity : 0.0725900096547534 number of frames : 41292 
class : person   average similarity : 0.13966597424596633 number of frames : 3273 
class : cup   average similarity : 0.07155556394982027 number of frames : 29 
class : boat   average similarity : 0.07208738203261482 number of frames : 43 
class : truck   average similarity : 0.06186837976094926 number of frames : 1605 
class : clock   average similarity : 0.08795790682281356 number of frames : 131 
class : baseball bat   average similarity : 0.019363563507795334 number of frames : 1 
class : potted plant   average similarity : 0.04361278039254731 number of frames : 96 
class : train   average similarity : 0.06888602962835956 number of frames : 118 
class : u

0.00853986601633272
