In [1]:
#hide
%load_ext autoreload
%autoreload 2
%matplotlib inline

# Results

> This module contains all the code for running our experiments for Tango.

In [2]:
#hide
import cv2
import pickle
import random
import time

from pathlib import Path

# tango
from tango.prep import *
from tango.features import *
from tango.eval import *
from tango.model import *
from tango.approach import *

from tqdm.auto import tqdm

In [3]:
random.seed(42)
path = Path("/tf/data")

# TODO Need to see if there are app overlaps between the RICO dataset and our validation set and user data

# Data Setup
### Description:
* Number of Participants: 14 (10 students and 4 authors)
* Number of Applications: 6
* Number of Bug Reports per Application: 10

In [4]:
fps = 30

In [5]:
ds_val = 'val'
vid_val_ds = VideoDataset.from_path(path/"datasets/validation_set/", fr = fps).label_from_paths()
vid_val_ds.get_labels()

['car_report', 'king', 'tasty']

In [5]:
ds_user = 'user'
vid_user_ds = VideoDataset.from_path(path/"datasets/user_data/", fr = fps).label_from_paths()
vid_user_ds.get_labels()

['APOD', 'GROW', 'TIME', 'TOK', 'DROID', 'GNU']

In [None]:
# TODO run evaluation both ways with all videos and then one where some bug reports don't have duplicates

In [None]:
# TODO look at original fivr paper and bovw papers for finding configuration of models

# Model Setup
### Configurations:
* Number of Visual Words: 1,000, 5,000, 10,000
* Codebook Number of Image Samples: MAX ~50,000
* Number of frames kept: 1, 5
* Model + Bag of Visual Words
* Model + Bag of Visual Words + Fuzzy LCS
* **Potential:** Model + Bag of Visual Words + Fuzzy LCS + Weighting scheme to weight end of video more
* **Potential:** Model + Bag of Visual Words + Fuzzy LCS + Weighting scheme to weight end of video more + V2S selection of important frames (touch indicator appears)

For a total of 12 different configurations per model

# SIFT - M00

In [6]:
model_00 = 'M00'
M00 = SIFTExtractor(cv2.xfeatures2d.SIFT_create(nfeatures = 10)) # limit SIFT features to top 10

# SimCLR - M01

In [7]:
model_01 = 'M01'
simclr = SimCLRModel.load_from_checkpoint(checkpoint_path = str(path/'models/simclr/checkpointepoch=98.ckpt')).eval()
M01 = SimCLRExtractor(simclr)

In [8]:
vwords = [1_000, 5_000, 10_000]
n_imgs = 15_000 # Putting None, means use entire RICO dataset, which is equal to ...
n_frames_to_keep = [1, 5]
models = [(model_00, M00), (model_01, M01)]

In [None]:
gen_codebooks(path, models, vwords, n_imgs)

In [None]:
def generate_rankings(
    path, vid_ds, ds_name, model_name, model, sim_func, vwords, n_imgs,
     n_frames_to_keep, fps
):

    for vw in tqdm(vwords):
        for ftk in tqdm(n_frames_to_keep):
            rankings = {}
            evaluation_metrics = {}
            fname = path/f'models/codebooks/{model_name}/cookbook_{model_name}_{vw}vw.model'
            codebook = pickle.load(open(fname, 'rb'))
            start = time.time()
            vid_ds_features = gen_extracted_features(vid_ds, model, fps, ftk)
            df, bovw_vid_ds_sims = gen_bovw_similarity(vid_ds, vid_ds_features, model, codebook, vw, ftk)
            bovw_vid_ds_sims = gen_lcs_similarity(vid_ds, vid_ds_features, sim_func, mdl, codebook, df, vw, ftk)
            
            
            rankings['bovw'] = approach(
                vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
                codebook, df, vw, fps = fps, ftk = ftk
            )
            end_bovw = time.time()
            rankings['bovw_time'] = end_bovw - start
            evaluation_metrics['bovw'] = evaluate(
                rankings['bovw']
            )
            
            rankings['lcs'] = approach(
                vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
                codebook, df, vw, fps = fps, ftk = ftk, mode = 'lcs'
            )
            end_lcs = time.time()
            rankings['lcs_time'] = end_lcs - start
            evaluation_metrics['lcs'] = evaluate(
                rankings['lcs']
            )

#             rankings['bovw_lcs'] = approach(
#                 vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
#                 codebook, df, vw, fps = fps, ftk = ftk, mode = 'bovw_lcs'
#             )
#             end_bovw_lcs = time.time()
#             rankings['bovw_lcs_time'] = end_bovw_lcs - start
#             evaluation_metrics['bovw_lcs'] = evaluate(
#                 rankings['bovw_lcs']
#             )
            
            id_name = f'{ds_name}_{n_imgs}n_{vw}vw_{ftk}ftk'
            with open(path/f'results/{model_name}/rankings_{id_name}_lcs.pkl', 'wb') as f:
                pickle.dump(rankings, f, protocol=pickle.HIGHEST_PROTOCOL)

            with open(path/f'results/{model_name}/evaluation_metrics_{id_name}_lcs.pkl', 'wb') as f:
                pickle.dump(evaluation_metrics, f, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
def generate_rankings(
    path, vid_ds, ds_name, model_name, model, sim_func, vwords, n_imgs,
     n_frames_to_keep, fps
):

    for vw in tqdm(vwords):
        for ftk in tqdm(n_frames_to_keep):
            rankings = {}
            evaluation_metrics = {}
            fname = path/f'models/codebooks/{model_name}/cookbook_{model_name}_{vw}vw.model'
            codebook = pickle.load(open(fname, 'rb'))
            start = time.time()
            vid_ds_features = gen_extracted_features(vid_ds, model, fps, ftk)
            df, bovw_vid_ds_sims = gen_bovw_similarity(vid_ds, vid_ds_features, model, codebook, vw, ftk)
            lcs_vid_ds_sims = gen_lcs_similarity(vid_ds, vid_ds_features, sim_func, mdl, codebook, df, vw, ftk)
            
            
            rankings['bovw'] = approach(
                vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
                codebook, df, vw, fps = fps, ftk = ftk
            )
            end_bovw = time.time()
            rankings['bovw_time'] = end_bovw - start
            evaluation_metrics['bovw'] = evaluate(
                rankings['bovw']
            )
            
            rankings['lcs'] = approach(
                vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
                codebook, df, vw, fps = fps, ftk = ftk, mode = 'lcs'
            )
            end_lcs = time.time()
            rankings['lcs_time'] = end_lcs - start
            evaluation_metrics['lcs'] = evaluate(
                rankings['lcs']
            )

#             rankings['bovw_lcs'] = approach(
#                 vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
#                 codebook, df, vw, fps = fps, ftk = ftk, mode = 'bovw_lcs'
#             )
#             end_bovw_lcs = time.time()
#             rankings['bovw_lcs_time'] = end_bovw_lcs - start
#             evaluation_metrics['bovw_lcs'] = evaluate(
#                 rankings['bovw_lcs']
#             )
            
            id_name = f'{ds_name}_{n_imgs}n_{vw}vw_{ftk}ftk'
            with open(path/f'results/{model_name}/rankings_{id_name}_lcs.pkl', 'wb') as f:
                pickle.dump(rankings, f, protocol=pickle.HIGHEST_PROTOCOL)

            with open(path/f'results/{model_name}/evaluation_metrics_{id_name}_lcs.pkl', 'wb') as f:
                pickle.dump(evaluation_metrics, f, protocol=pickle.HIGHEST_PROTOCOL)

# Validation Set

In [11]:
generate_rankings(
    path, vid_val_ds, ds_val, model_00, M00, sift_frame_sim, vwords, n_imgs,
    n_frames_to_keep, fps
)

NameError: name 'vid_val_ds' is not defined

In [14]:
generate_rankings(
    path, vid_val_ds, ds_val, model_01, M01, simclr_frame_sim, vwords, n_imgs,
    n_frames_to_keep, fps
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


car_report Elapsed Time in Seconds 63.44844651222229
car_report σ Rank 1.7204650534085255
car_report μ Rank 1.8
car_report Median Rank 1.0
car_report mRR: 0.8229166666666667
car_report mAP: 0.7260905736563632
car_report Hit@1: 0.75
car_report Hit@5: 0.95
car_report Hit@10: 1.0
king Elapsed Time in Seconds 104.19167971611023
king σ Rank 1.0198039027185568
king μ Rank 1.6
king Median Rank 1.0
king mRR: 0.7933333333333333
king mAP: 0.6480639268139268
king Hit@1: 0.65
king Hit@5: 1.0
king Hit@10: 1.0
tasty Elapsed Time in Seconds 78.35678124427795
tasty σ Rank 0.6403124237432849
tasty μ Rank 1.3
tasty Median Rank 1.0
tasty mRR: 0.8833333333333332
tasty mAP: 0.8186261423761423
tasty Hit@1: 0.8
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))


car_report Elapsed Time in Seconds 70.80435085296631
car_report σ Rank 0.6782329983125267
car_report μ Rank 1.2
car_report Median Rank 1.0
car_report mRR: 0.9375
car_report mAP: 0.8619172932330826
car_report Hit@1: 0.9
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 117.23649311065674
king σ Rank 1.061838029079765
king μ Rank 1.65
king Median Rank 1.0
king mRR: 0.7849999999999999
king mAP: 0.652819264069264
king Hit@1: 0.65
king Hit@5: 1.0
king Hit@10: 1.0
tasty Elapsed Time in Seconds 85.77620220184326
tasty σ Rank 0.4330127018922193
tasty μ Rank 1.25
tasty Median Rank 1.0
tasty mRR: 0.875
tasty mAP: 0.828989898989899
tasty Hit@1: 0.75
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


car_report Elapsed Time in Seconds 305.5726613998413
car_report σ Rank 0.7810249675906654
car_report μ Rank 1.3
car_report Median Rank 1.0
car_report mRR: 0.9041666666666666
car_report mAP: 0.8371001221001222
car_report Hit@1: 0.85
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 518.4577212333679
king σ Rank 1.1575836902790224
king μ Rank 1.6
king Median Rank 1.0
king mRR: 0.8308333333333333
king mAP: 0.7070472582972583
king Hit@1: 0.75
king Hit@5: 1.0
king Hit@10: 1.0
tasty Elapsed Time in Seconds 375.53088760375977
tasty σ Rank 0.5099019513592785
tasty μ Rank 1.2
tasty Median Rank 1.0
tasty mRR: 0.9166666666666666
tasty mAP: 0.8701785714285715
tasty Hit@1: 0.85
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))


car_report Elapsed Time in Seconds 464.14875078201294
car_report σ Rank 0.0
car_report μ Rank 1.0
car_report Median Rank 1.0
car_report mRR: 1.0
car_report mAP: 0.9554761904761906
car_report Hit@1: 1.0
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 811.3421664237976
king σ Rank 0.9733961166965892
king μ Rank 1.45
king Median Rank 1.0
king mRR: 0.8516666666666666
king mAP: 0.7042826617826617
king Hit@1: 0.75
king Hit@5: 1.0
king Hit@10: 1.0
tasty Elapsed Time in Seconds 535.4938454627991
tasty σ Rank 0.47696960070847283
tasty μ Rank 1.15
tasty Median Rank 1.0
tasty mRR: 0.9416666666666667
tasty mAP: 0.8994246031746032
tasty Hit@1: 0.9
tasty Hit@5: 1.0
tasty Hit@10: 1.0



HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


car_report Elapsed Time in Seconds 63.505577087402344
car_report σ Rank 0.7399324293474372
car_report μ Rank 1.55
car_report Median Rank 1.0
car_report mRR: 0.775
car_report mAP: 0.6573383278085292
car_report Hit@1: 0.6
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 104.1245505809784
king σ Rank 2.0099751242241775
king μ Rank 2.4
king Median Rank 1.5
king mRR: 0.6675595238095238
king mAP: 0.6095583093377211
king Hit@1: 0.5
king Hit@5: 0.9
king Hit@10: 1.0
tasty Elapsed Time in Seconds 77.87287330627441
tasty σ Rank 0.3570714214271425
tasty μ Rank 1.15
tasty Median Rank 1.0
tasty mRR: 0.925
tasty mAP: 0.8265873015873015
tasty Hit@1: 0.85
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))


car_report Elapsed Time in Seconds 70.31286430358887
car_report σ Rank 0.7348469228349535
car_report μ Rank 1.4
car_report Median Rank 1.0
car_report mRR: 0.85
car_report mAP: 0.7136117123617124
car_report Hit@1: 0.75
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 116.2100682258606
king σ Rank 2.0024984394500787
king μ Rank 2.3
king Median Rank 1.0
king mRR: 0.6947222222222222
king mAP: 0.6263686748980867
king Hit@1: 0.55
king Hit@5: 0.95
king Hit@10: 1.0
tasty Elapsed Time in Seconds 84.73524451255798
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.8338789682539682
tasty Hit@1: 0.9
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


car_report Elapsed Time in Seconds 307.9623382091522
car_report σ Rank 1.0677078252031311
car_report μ Rank 1.4
car_report Median Rank 1.0
car_report mRR: 0.8975
car_report mAP: 0.7357682726200528
car_report Hit@1: 0.85
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 519.2798550128937
king σ Rank 2.080264406271472
king μ Rank 2.15
king Median Rank 1.0
king mRR: 0.7555555555555555
king mAP: 0.65692606005106
king Hit@1: 0.65
king Hit@5: 0.9
king Hit@10: 1.0
tasty Elapsed Time in Seconds 375.72621512413025
tasty σ Rank 0.0
tasty μ Rank 1.0
tasty Median Rank 1.0
tasty mRR: 1.0
tasty mAP: 0.9569444444444445
tasty Hit@1: 1.0
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))


car_report Elapsed Time in Seconds 466.4981746673584
car_report σ Rank 0.8874119674649424
car_report μ Rank 1.25
car_report Median Rank 1.0
car_report mRR: 0.9349999999999999
car_report mAP: 0.7973933422385435
car_report Hit@1: 0.9
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 811.8493564128876
king σ Rank 1.5960889699512368
king μ Rank 1.95
king Median Rank 1.0
king mRR: 0.7613095238095238
king mAP: 0.6506891025641026
king Hit@1: 0.65
king Hit@5: 0.95
king Hit@10: 1.0
tasty Elapsed Time in Seconds 535.5874562263489
tasty σ Rank 0.0
tasty μ Rank 1.0
tasty Median Rank 1.0
tasty mRR: 1.0
tasty mAP: 0.945138888888889
tasty Hit@1: 1.0
tasty Hit@5: 1.0
tasty Hit@10: 1.0



HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


car_report Elapsed Time in Seconds 63.44441246986389
car_report σ Rank 0.0
car_report μ Rank 1.0
car_report Median Rank 1.0
car_report mRR: 1.0
car_report mAP: 0.884920634920635
car_report Hit@1: 1.0
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 104.51212310791016
king σ Rank 1.57797338380595
king μ Rank 1.9
king Median Rank 1.0
king mRR: 0.7696428571428571
king mAP: 0.7025694444444442
king Hit@1: 0.65
king Hit@5: 0.95
king Hit@10: 1.0
tasty Elapsed Time in Seconds 78.18621182441711
tasty σ Rank 0.7810249675906654
tasty μ Rank 1.3
tasty Median Rank 1.0
tasty mRR: 0.9041666666666666
tasty mAP: 0.7999806187964082
tasty Hit@1: 0.85
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))


car_report Elapsed Time in Seconds 70.26576256752014
car_report σ Rank 0.0
car_report μ Rank 1.0
car_report Median Rank 1.0
car_report mRR: 1.0
car_report mAP: 0.9258333333333333
car_report Hit@1: 1.0
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 116.6252670288086
king σ Rank 1.4352700094407325
king μ Rank 1.8
king Median Rank 1.0
king mRR: 0.7958333333333334
king mAP: 0.708668990323402
king Hit@1: 0.7
king Hit@5: 0.95
king Hit@10: 1.0
tasty Elapsed Time in Seconds 85.07738757133484
tasty σ Rank 0.47696960070847283
tasty μ Rank 1.15
tasty Median Rank 1.0
tasty mRR: 0.9416666666666667
tasty mAP: 0.8274500962000962
tasty Hit@1: 0.9
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


car_report Elapsed Time in Seconds 308.94186758995056
car_report σ Rank 0.21794494717703372
car_report μ Rank 1.05
car_report Median Rank 1.0
car_report mRR: 0.975
car_report mAP: 0.9564484126984126
car_report Hit@1: 0.95
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 519.4086105823517
king σ Rank 2.118962010041709
king μ Rank 2.1
king Median Rank 1.0
king mRR: 0.7591666666666667
king mAP: 0.6873954517704518
king Hit@1: 0.65
king Hit@5: 0.95
king Hit@10: 1.0
tasty Elapsed Time in Seconds 377.120591878891
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.927142857142857
tasty Hit@1: 0.9
tasty Hit@5: 1.0
tasty Hit@10: 1.0


HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))


car_report Elapsed Time in Seconds 467.0696225166321
car_report σ Rank 0.0
car_report μ Rank 1.0
car_report Median Rank 1.0
car_report mRR: 1.0
car_report mAP: 0.9833333333333332
car_report Hit@1: 1.0
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 810.8539173603058
king σ Rank 1.5960889699512368
king μ Rank 1.95
king Median Rank 1.0
king mRR: 0.7613095238095238
king mAP: 0.6880222555222554
king Hit@1: 0.65
king Hit@5: 0.95
king Hit@10: 1.0
tasty Elapsed Time in Seconds 535.9586563110352
tasty σ Rank 0.21794494717703372
tasty μ Rank 1.05
tasty Median Rank 1.0
tasty mRR: 0.975
tasty mAP: 0.9142015392015391
tasty Hit@1: 0.95
tasty Hit@5: 1.0
tasty Hit@10: 1.0




# User Data

In [None]:
generate_rankings(
    path, vid_user_ds, ds_user, model_00, M00, sift_frame_sim, vwords, n_imgs,
    n_frames_to_keep, fps
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))


APOD Elapsed Time in Seconds 137.18566870689392
APOD σ Rank 6.72053238631848
APOD μ Rank 7.033333333333333
APOD Median Rank 4.0
APOD mRR: 0.3394523877183724
APOD mAP: 0.258917949980462
APOD Hit@1: 0.13333333333333333
APOD Hit@5: 0.6
APOD Hit@10: 0.7666666666666667
GROW Elapsed Time in Seconds 181.32364177703857
GROW σ Rank 4.689528263929048
GROW μ Rank 4.516129032258065
GROW Median Rank 2.0
GROW mRR: 0.5774402403859109
GROW mAP: 0.447443430985815
GROW Hit@1: 0.4838709677419355
GROW Hit@5: 0.6774193548387096
GROW Hit@10: 0.9032258064516129
TIME Elapsed Time in Seconds 154.77173161506653
TIME σ Rank 6.728050733062784
TIME μ Rank 10.0
TIME Median Rank 9.5
TIME mRR: 0.18574943704768268
TIME mAP: 0.15163777994158123
TIME Hit@1: 0.03333333333333333
TIME Hit@5: 0.36666666666666664
TIME Hit@10: 0.5666666666666667
TOK Elapsed Time in Seconds 131.85000610351562
TOK σ Rank 6.89895322172542
TOK μ Rank 11.733333333333333
TOK Median Rank 12.0
TOK mRR: 0.14707765280416377
TOK mAP: 0.1307391999991247

HBox(children=(FloatProgress(value=0.0, max=435.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=465.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=435.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=435.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=435.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=435.0), HTML(value='')))


APOD Elapsed Time in Seconds 111.38591718673706
APOD σ Rank 6.556082841317842
APOD μ Rank 6.533333333333333
APOD Median Rank 4.0
APOD mRR: 0.3178536262308192
APOD mAP: 0.2318390767154601
APOD Hit@1: 0.1
APOD Hit@5: 0.6
APOD Hit@10: 0.8666666666666667
GROW Elapsed Time in Seconds 191.25272369384766
GROW σ Rank 7.244072534745746
GROW μ Rank 8.67741935483871
GROW Median Rank 7.0
GROW mRR: 0.3540689167646514
GROW mAP: 0.28471266207628854
GROW Hit@1: 0.22580645161290322
GROW Hit@5: 0.4838709677419355
GROW Hit@10: 0.5806451612903226
TIME Elapsed Time in Seconds 160.0667724609375
TIME σ Rank 6.3962662719919825
TIME μ Rank 9.433333333333334
TIME Median Rank 8.5
TIME mRR: 0.20449145864264542
TIME mAP: 0.1591668891284138
TIME Hit@1: 0.06666666666666667
TIME Hit@5: 0.3333333333333333
TIME Hit@10: 0.7
TOK Elapsed Time in Seconds 118.49521207809448
TOK σ Rank 7.008011288676855
TOK μ Rank 11.233333333333333
TOK Median Rank 11.0
TOK mRR: 0.1851730466890313
TOK mAP: 0.15000096682229513
TOK Hit@1: 0.0

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

In [None]:
generate_rankings(
    path, vid_user_ds, ds_user, model_01, M01, simclr_frame_sim, vwords, n_imgs,
    n_frames_to_keep, fps
)

# Scratch

In [None]:
def generate_rankings(
    path, vid_ds, ds_name, model_name, model, sim_func, vwords, n_imgs,
     n_frames_to_keep, fps
):

    for vw in tqdm(vwords):
        for ftk in tqdm(n_frames_to_keep):
            rankings = {}
            evaluation_metrics = {}
            fname = path/f'models/codebooks/{model_name}/cookbook_{model_name}_{vw}vw.model'
            codebook = pickle.load(open(fname, 'rb'))
            start = time.time()
            vid_ds_features = gen_extracted_features(vid_ds, model, fps, ftk)
            df, bovw_vid_ds_sims = gen_bovw_similarity(vid_ds, vid_ds_features, model, codebook, vw, ftk)
            bovw_vid_ds_sims = gen_lcs_similarity(vid_ds, vid_ds_features, sim_func, mdl, codebook, df, vw, ftk)
            
            
            rankings['bovw'] = approach(
                vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
                codebook, df, vw, fps = fps, ftk = ftk
            )
            end_bovw = time.time()
            rankings['bovw_time'] = end_bovw - start
            evaluation_metrics['bovw'] = evaluate(
                rankings['bovw']
            )
            
            rankings['lcs'] = approach(
                vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
                codebook, df, vw, fps = fps, ftk = ftk, mode = 'lcs'
            )
            end_lcs = time.time()
            rankings['lcs_time'] = end_lcs - start
            evaluation_metrics['lcs'] = evaluate(
                rankings['lcs']
            )

#             rankings['bovw_lcs'] = approach(
#                 vid_ds, vid_ds_features, vid_ds_sims, model, sim_func,
#                 codebook, df, vw, fps = fps, ftk = ftk, mode = 'bovw_lcs'
#             )
#             end_bovw_lcs = time.time()
#             rankings['bovw_lcs_time'] = end_bovw_lcs - start
#             evaluation_metrics['bovw_lcs'] = evaluate(
#                 rankings['bovw_lcs']
#             )
            
            id_name = f'{ds_name}_{n_imgs}n_{vw}vw_{ftk}ftk'
            with open(path/f'results/{model_name}/rankings_{id_name}_lcs.pkl', 'wb') as f:
                pickle.dump(rankings, f, protocol=pickle.HIGHEST_PROTOCOL)

            with open(path/f'results/{model_name}/evaluation_metrics_{id_name}_lcs.pkl', 'wb') as f:
                pickle.dump(evaluation_metrics, f, protocol=pickle.HIGHEST_PROTOCOL)

## Setup data

In [None]:
from random import sample

rico_path = Path('/tf/data/combined/data')
img_paths = sorted(rico_path.glob('*.jpg'))

n = 1_000
sampled_imgs = [Image.open(img) for img in sample(img_paths, n)]
len(sampled_imgs)

In [None]:
path = Path("/tf/data/datasets/validation_videos")
vid_ds = VideoDataset.from_path(path).label_from_paths()
vid_ds.get_labels()

## Define different models to run experiments on:
- SIFT baseline model - (M01)
- CNN non-layered non-finetuned baseline model - (M02)
- CNN non-layered finetuned baseline model - (M03)
- CNN layered non-finetuned baseline model - (M04)
- CNN layered finetuned baseline model - (M05)

In [None]:
# SIFT based models:
M00 = SIFTExtractor(cv2.xfeatures2d.SIFT_create())

# CNN based models:
model = SimCLRModel.load_from_checkpoint(checkpoint_path='/tf/data/models/simclr/checkpointepoch=98.ckpt').eval()
M01 = SimCLRExtractor(model)
# M01 = CNNExtractor(createExtractor(None, None, 'resnet50', False))
# learn, linear_output_layer = cnn.trainPetsModel() # Need to change to training on android screenshots
# M02 = CNNExtractor(cnn.createExtractor(learn, linear_output_layer, 'resnet50', True))
# M03 = CNNExtractor(createLayeredExtractor()) # TODO: Rego over implementation as it doesn't seem to generate multiple visual codebooks
# Need to create code for using finetuned layered cnn model M05

models_under_study = [M00, M01] # [M01, M02, M04]

In [None]:
def gen_all_codebooks(imgs, models, vw):
    for i, model in enumerate(models):
        codebook = gen_vcodebook(imgs, model, vw)
        fname = f'/tf/data/models/cookbook_M{i:02}_{len(imgs)}n_{vw}vw.model'
        pickle.dump(codebook, open(fname, 'wb'))

In [None]:
vw = 100
gen_all_codebooks(sampled_imgs, models_under_study, vw)

## Experiment 01
Ability for each model to detect duplicate bug report videos.

**TODO:** Need to vary hyperparameters of different values such as number of visual words to include in codebooks

In [None]:
def gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk):
#     vid_tfids = results = defaultdict(
#         lambda: defaultdict(list)
#     )
    vid_tfids = defaultdict(
        lambda: defaultdict(list)
    )
    for app, reports in tqdm(vid_ds.labels.items()):
        for i, (report, vids) in enumerate(reports.items()):
            for vid in vids:
                bovw = new_get_bovw(vid, mdl, codebook, vw, frames_to_keep = ftk)
                vid_tfids[app][report].append(calc_tf_idf(bovw, df))
    
    return vid_tfids

In [None]:
def get_results(vid_ds, imgs, mdls, vw, ftk):
    results = {}
    
    for m, mdl in enumerate(mdls):
        fname = f'/tf/data/models/cookbook_M{m:02}_{len(imgs)}n_{vw}vw.model'
        codebook = pickle.load(open(fname, 'rb'))
        results[f'M{m:02}-{vw}'] = {}
        df = get_df(imgs, mdl, codebook, vw)
        print(df)
        vid_tfids = gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk)
        for app, reports in vid_ds.labels.items():
            results[f'M{m:02}-{vw}'][app] = {}
            for report_i in reports:
                results[f'M{m:02}-{vw}'][app][report_i] = {}
                for report_j in reports:
                    results[f'M{m:02}-{vw}'][app][report_i][report_j] = {}
                    for k in range(len(vid_tfids[app][report_i])):
                        results[f'M{m:02}-{vw}'][app][report_i][report_j][f'vid_{k}'] = {}
                        for l in range(len(vid_tfids[app][report_j])):
                            results[f'M{m:02}-{vw}'][app][report_i][report_j][f'vid_{k}'][f'vid_{l}'] = np.dot(vid_tfids[app][report_i][k], vid_tfids[app][report_j][l]) / (np.linalg.norm(vid_tfids[app][report_i][k]) * np.linalg.norm(vid_tfids[app][report_j][l]))
    
    return results

In [None]:
results = get_results(vid_ds, sampled_imgs, models_under_study, vw, 1)

In [None]:
def flatten_dict(d_in, d_out, parent_key):
    for k, v in d_in.items():
        if isinstance(v, dict):
            flatten_dict(v, d_out, parent_key + (k,))
        else:
            d_out[parent_key + (k,)] = v

In [None]:
def sort_results(results):
    sorted_results = {}
    for m in results:
        sorted_results[m] = {}
        for app in results[m]:
            sorted_results[m][app] = {}
            d_out = {}
            flatten_dict(results[m][app], d_out, tuple())
            sorted_results[m][app] = OrderedDict(
                sorted(d_out.items(), key=lambda x: x[1], reverse = True)
            )
    
    return sorted_results

In [None]:
sorted_results = sort_results(results)
pprint(sorted_results['M01-10'])

In [None]:
evaluate(sorted_results)

In [None]:
model_codebooks = {}
num_vwords = [100,]
for vwords in tqdm(num_vwords):
    for i, model in tqdm(enumerate(models_under_study), total = len(models_under_study)):
        codebook = gen_vcodebook(imgs, model, vwords)
        model_codebooks[f'M{i + 1:02}-{vwords}'] = codebook

In [None]:
def gen_tfidfs(vid_ds, df):
    vid_tfids = results = defaultdict(
        lambda: defaultdict(list)
    )
    for app, reports in tqdm(vid_ds.labels.items()):
        for i, (report, vids) in enumerate(reports.items()):
            for vid in vids:
                bovw = new_get_bovw(vid.vid_path, model, codebook, vwords, n = 500)
                vid_tfids[app][report].append(calc_tf_idf(hist, df))
    
    return vid_tfids

In [None]:
def get_results(vid_ds, imgs, models, model_codebooks, num_vwords):
    results = {}
    
    for vwords in num_vwords:
        for m, (model, (key, codebook)) in enumerate(zip(models, model_codebooks.items())):
            results[f'M{m + 1:02}-{vwords}'] = {}
            df = get_df(imgs, model, codebook, vwords)
            vid_tfids = gen_tfidfs(vid_ds, df)
            for app, reports in vid_ds.labels.items():
                results[f'M{m + 1:02}-{vwords}'][app] = {}
                for report_i in reports:
                    results[f'M{m + 1:02}-{vwords}'][app][report_i] = {}
                    for report_j in reports:
                        results[f'M{m + 1:02}-{vwords}'][app][report_i][report_j] = {}
                        for k in range(len(vid_tfids[app][report_i])):
                            results[f'M{m + 1:02}-{vwords}'][app][report_i][report_j][f'vid_{k}'] = {}
                            for l in range(len(vid_tfids[app][report_j])):
                                results[f'M{m + 1:02}-{vwords}'][app][report_i][report_j][f'vid_{k}'][f'vid_{l}'] = np.dot(vid_tfids[app][report_i][k], vid_tfids[app][report_j][l]) / (np.linalg.norm(vid_tfids[app][report_i][k]) * np.linalg.norm(vid_tfids[app][report_j][l]))
    
    return results

In [None]:
results = get_results(vid_ds, imgs, models_under_study, model_codebooks, num_vwords)

In [None]:
def flatten_dict(d_in, d_out, parent_key):
    for k, v in d_in.items():
        if isinstance(v, dict):
            flatten_dict(v, d_out, parent_key + (k,))
        else:
            d_out[parent_key + (k,)] = v

In [None]:
def sort_results(results):
    sorted_results = {}
    for m in results:
        sorted_results[m] = {}
        for app in results[m]:
            sorted_results[m][app] = {}
            d_out = {}
            flatten_dict(results[m][app], d_out, tuple())
            sorted_results[m][app] = OrderedDict(
                sorted(d_out.items(), key=lambda x: x[1], reverse = True)
            )
    
    return sorted_results

In [None]:
sorted_results = sort_results(results)
pprint(sorted_results['M01-10'])

In [None]:
evaluate(sorted_results)

In [None]:
for model in sorted_results:
    rs = []
    for app in sorted_results[model]:
        r = []
        for labels, score in sorted_results[model][app].items():
            if labels[0] == 'S01':
                if labels[2] != labels[3]:
                    if labels[0] == labels[1]: r.append(1)
                    else: r.append(0)
        rs.append(r)
    
    print(f'{model} mAP:', mean_average_precision(rs))
    print(f'{model} mRR:', mean_reciprocal_rank(rs))

In [None]:
mean_reciprocal_rank(rs)

In [None]:
mean_reciprocal_rank(rs)

In [None]:
mean_average_precision(rs)

In [None]:
mean_average_precision(rs)

In [None]:
r1, r2, r3 = [], [], []
for labels, score in sorted_results['M01-100']['car_part'].items():
    if labels[0] == 'S01':
        if labels[2] != labels[3]:
            if labels[0] == labels[1]: r1.append(1)
            else: r1.append(0)

In [None]:
mean_average_precision([r1, r2, r3])

In [None]:
average_precision(r1), average_precision(r2), average_precision(r3)

In [None]:
average_precision(r)

In [None]:
r2, r3

In [None]:
for labels, score in sorted_results['M01-100']['car_part'].items():
    if labels[0] == 'S02':
        print(labels)

In [None]:
sorted_results['M01-100']['car_part']

In [None]:
# export
def mean_reciprocal_rank(rs):
    """Score is reciprocal of the rank of the first relevant item

    First element is 'rank 1'.  Relevance is binary (nonzero is relevant).

    Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
    >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.61111111111111105
    >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
    >>> mean_reciprocal_rank(rs)
    0.5
    >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.75

    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Mean reciprocal rank
    """
    rs = (np.asarray(r).nonzero()[0] for r in rs)
    return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])

def r_precision(r):
    """Score is precision after all relevant documents have been retrieved

    Relevance is binary (nonzero is relevant).

    >>> r = [0, 0, 1]
    >>> r_precision(r)
    0.33333333333333331
    >>> r = [0, 1, 0]
    >>> r_precision(r)
    0.5
    >>> r = [1, 0, 0]
    >>> r_precision(r)
    1.0

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        R Precision
    """
    r = np.asarray(r) != 0
    z = r.nonzero()[0]
    if not z.size:
        return 0.
    return np.mean(r[:z[-1] + 1])


def precision_at_k(r, k):
    """Score is precision @ k

    Relevance is binary (nonzero is relevant).

    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k


    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Precision @ k

    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)


def average_precision(r):
    """Score is average precision (area under PR curve)

    Relevance is binary (nonzero is relevant).

    >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
    >>> delta_r = 1. / sum(r)
    >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
    0.7833333333333333
    >>> average_precision(r)
    0.78333333333333333

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)


def mean_average_precision(rs):
    """Score is mean average precision

    Relevance is binary (nonzero is relevant).

    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
    >>> mean_average_precision(rs)
    0.78333333333333333
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
    >>> mean_average_precision(rs)
    0.39166666666666666

    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Mean average precision
    """
    return np.mean([average_precision(r) for r in rs])


In [None]:
pd.DataFrame.from_dict(results['M02-100']['car_part']['S01'])

In [None]:
results['M02-100']['car_part']['S01']

In [None]:
# TODO Reduce frames per second (try difference numbers)

In [None]:
# sorted_results = sort_results(results)
pprint(sorted_results['M01-100']['car_part'])

In [None]:
pprint(sorted_results['M02-100'])

In [None]:
def get_results(vid_ds, imgs, models, model_codebooks, num_vwords):
    results = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(
                        lambda: defaultdict(float)
                    )
                )
            )
        )
    )
    
    for vwords in num_vwords:
        for i, (model, (key, codebook)) in enumerate(zip(models, model_codebooks.items())):
            df = get_df(imgs, model, codebook, vwords)
            for app, reports in vid_ds.labels.items():
                for report_i, vids_i in reports.items():
                    for report_j, vids_j in reports.items():
                        for j, vid_i in enumerate(vids_i):
                            for k, vid_j in enumerate(vids_j):
                                hist_1, bovw_1 = get_bovw(vid_i.vid_path, model, codebook, vwords, n = 100)
                                tf_idf_1 = calc_tf_idf(hist_1, df)
                                hist_2, bovw_2 = get_bovw(vid_j.vid_path, model, codebook, vwords, n = 100)
                                tf_idf_2 = calc_tf_idf(hist_2, df)
                                results[f'M{i + 1:02}-{vwords}'][app][report_i][report_j][f'vid_{j}'][f'vid_{k}'] = np.dot(tf_idf_1, tf_idf_2) / (np.linalg.norm(tf_idf_1) * np.linalg.norm(tf_idf_2))
    
    return results

In [None]:
results = get_results(vid_ds, imgs, models_under_study, model_codebooks, num_vwords); results

In [None]:
results['M01-100']['king_james']['S01']

In [None]:
hist, bovw = get_bovw(vid_ds['king_james']['S01'][0].vid_path, M01, model_codebooks['00-100'], 100, 100)

In [None]:
#hide
plt.bar(range(vwords), hist)
plt.show()

In [6]:
path2 = Path("/tf/data/results")
model_name = 'M01'
fname = path2/f'{model_name}/val/rankings_val_15000n_10000vw_5ftk.pkl'
rankings = pickle.load(open(fname, 'rb'))