In [None]:
#default_exp approach

In [1]:
#hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Approach


> This module contains all the code for defining the various approaches

In [2]:
#export
import copy
import cv2
import multiprocessing
import pickle
import time

import numpy as np

from collections import defaultdict, OrderedDict
from itertools import combinations, combinations_with_replacement, permutations
from joblib import Parallel, delayed
from pathlib import Path

# tango
from tango.eval import *
from tango.features import *
from tango.model import *
from tango.prep import *

from tqdm.auto import tqdm

from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# export
def flatten_dict(d_in, d_out, parent_key):
    for k, v in d_in.items():
        if isinstance(v, dict):
            flatten_dict(v, d_out, parent_key + (k,))
        else:
            d_out[parent_key + (k,)] = v

In [4]:
# export
def gen_extracted_features(vid_ds, mdl, fps, ftk):
    vid_ds_features = {}
    for app in tqdm(vid_ds.labels):
        start = time.time()
        vid_ds_features[app] = {}
        for bug in vid_ds[app]:
            vid_ds_features[app][bug] = {}
            for report in vid_ds[app][bug]:
                vid_ds_features[app][bug][report] = {
                    'features': extract_features(vid_ds[app][bug][report], mdl, fps, frames_to_keep = ftk)
                }
        end = time.time()
        vid_ds_features[app]['elapsed_time'] = end - start
        
    return vid_ds_features

In [5]:
# export
def gen_tfidfs(vid_ds_features, vw, codebook, df, ftk):
    vid_tfids = defaultdict(
        lambda: defaultdict(dict)
    )
    
    for app in vid_ds_features:
        for bug in vid_ds_features[app]:
            if bug == 'elapsed_time': continue
            for report in vid_ds_features[app][bug]:
                bovw = new_get_bovw(
                    vid_ds_features[app][bug][report]['features'],
                    codebook, vw
                )
                vid_tfids[app][bug][report] = calc_tf_idf(bovw, df)
    
    return vid_tfids

In [6]:
# export
def gen_bovw_similarity(vid_ds, vid_ds_features, mdl, codebook, vw, ftk):
    results = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(
                        lambda: defaultdict(float)
                    )
                )
            )
        )
    )
    
    vid_ds_features = copy.deepcopy(vid_ds_features)
    df = np.histogram(codebook.labels_, bins = range(vw + 1))[0]
    vid_tfids = gen_tfidfs(vid_ds_features, vw, codebook, df, ftk)
    for app, bugs in vid_ds.labels.items():
        start = time.time()
        l = [(bug, report) for bug in bugs for report in bugs[bug] if bug != 'elapsed_time']
        pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
        for (bug_i, report_i), (bug_j, report_j) in pairs:
            results[app][bug_i][report_i][bug_j][report_j]['bovw'] = np.dot(vid_tfids[app][bug_i][report_i], vid_tfids[app][bug_j][report_j]) / (np.linalg.norm(vid_tfids[app][bug_i][report_i]) * np.linalg.norm(vid_tfids[app][bug_j][report_j]))
        end = time.time()
        results[app]['elapsed_time'] = end - start + vid_ds_features[app]['elapsed_time']
            
    return df, results

In [8]:
# export
# Modified from geeksforgeeks: https://www.geeksforgeeks.org/longest-common-substring-dp-29/
def fuzzy_LCS(X, Y, m, n, sim_func, codebook, df, vw, mdl_frame_threshold = 0.0):
    LCSuff = [[0 for k in range(n + 1)] for l in range(m + 1)]
    LCSuff_weighted = [[0 for k in range(n + 1)] for l in range(m + 1)]
      
    # To store the length of  
    # longest common substring 
    result = result_weighted = 0 
  
    # Following steps to build 
    # LCSuff[m+1][n+1] in bottom up fashion 
    for i in range(0, m + 1):
        for j in range(0, n + 1):
            if (i == 0 or j == 0): 
                LCSuff[i][j] = 0
                LCSuff_weighted[i][j] = 0
                continue
            
            sim = sim_func(X[i - 1], Y[j - 1], codebook, df, vw)
            if sim > mdl_frame_threshold:
                LCSuff[i][j] = LCSuff[i - 1][j - 1] + sim
                LCSuff_weighted[i][j] = LCSuff_weighted[i - 1][j - 1] + sim * (i / m) * (j / n)
                if LCSuff[i][j] > result:
                    result = LCSuff[i][j]
                    result_weighted = LCSuff_weighted[i][j]
            else: 
                LCSuff[i][j] = 0
                LCSuff_weighted[i][j] = 0
        
    mini, maxi = min(m, n), max(m, n)
    sum_w = 0
    max_v = maxi + 1
    for i in reversed(range(1, mini + 1)):
        sum_w += (i / mini) * (max_v / maxi)
        max_v -= 1
    return result / min(m, n), result_weighted / sum_w

In [357]:
X = [2, 3, 4, 5, 6, 11, 7, 8, 9]
Y = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10]
test_sim_func = lambda *args: int(args[0] == args[1])

In [359]:
lcs, weighted_lcs = fuzzy_LCS(X, Y, len(X), len(Y), test_sim_func, None, None, None)
lcs, weighted_lcs
assert abs(lcs - 0.556) <= 1e-1 and abs(weighted_lcs - 0.183) <= 1e-1

In [9]:
# export
def gen_lcs_similarity(vid_ds, vid_ds_features, sim_func, mdl, codebook, df, vw, ftk):
    results = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(
                        lambda: defaultdict(float)
                    )
                )
            )
        )
    )
    
    vid_ds_features = copy.deepcopy(vid_ds_features)
    for app, bugs in vid_ds.labels.items():
        start = time.time()
        l = [(bug, report) for bug in bugs for report in bugs[bug] if bug != 'elapsed_time']
        pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
        for (bug_i, report_i), (bug_j, report_j) in tqdm(pairs):
            lcs_sim, weighted_lcs_sim = fuzzy_LCS(
                vid_ds_features[app][bug_i][report_i]['features'],
                vid_ds_features[app][bug_j][report_j]['features'],
                len(vid_ds_features[app][bug_i][report_i]['features']),
                len(vid_ds_features[app][bug_j][report_j]['features']),
                sim_func, codebook, df, vw
            )
            results[app][bug_i][report_i][bug_j][report_j]['lcs'] = lcs_sim
            results[app][bug_i][report_i][bug_j][report_j]['weighted_lcs'] = weighted_lcs_sim

        end = time.time()
        results[app]['elapsed_time'] = end - start
        
    return results

In [22]:
# export
def fix_sims(vid_sims, vid_ds):
    for sim_type in vid_sims:
        for app in vid_sims[sim_type]:
            l = [(bug, report) for bug in vid_ds[app] for report in vid_ds[app][bug] if bug != 'elapsed_time']
            pairs = reversed(list(x for x in permutations(l, 2) if x[0] != x[1]))
            for (bug_i, report_i), (bug_j, report_j) in pairs:
                if (bug_i, report_i) == (bug_j, report_j): continue
                vid_sims[sim_type][app][bug_i][report_i][bug_j][report_j] = vid_sims[sim_type][app][bug_j][report_j][bug_i][report_i]
    
    return vid_sims

In [23]:
# export
def sort_rankings(vid_sims):
    sorted_rankings = {}
    for sim_type in vid_sims:
        sorted_rankings[sim_type] = {}
        for app in vid_sims[sim_type]:
            sorted_rankings[sim_type][app] = {'elapsed_time': vid_sims[sim_type][app][f'elapsed_time']}
            for bug in vid_sims[sim_type][app]:
                if bug == 'elapsed_time': continue
                sorted_rankings[sim_type][app][bug] = {}
                for report in vid_sims[sim_type][app][bug]:
                    sorted_rankings[sim_type][app][bug][report] = []
                    d_out = {}
                    flatten_dict(vid_sims[sim_type][app][bug][report], d_out, tuple())
                    sorted_rankings[sim_type][app][bug][report] = OrderedDict(
                        sorted(d_out.items(), key = lambda x: str(x[1]), reverse = True)
                    )
    
    return sorted_rankings

In [24]:
# export
def approach(
    vid_ds, vid_ds_features, bovw_vid_ds_sims, lcs_vid_ds_sims,
    mdl, sim_func, codebook, df, vw, fps = 30, ftk = 1
):
    vid_ds_sims = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(
                        lambda: defaultdict(
                            lambda: defaultdict(float)
                        )
                    )
                )
            )
        )
    )
    
    vid_ds_features = copy.deepcopy(vid_ds_features)
    bovw_vid_ds_sims = copy.deepcopy(bovw_vid_ds_sims)
    lcs_vid_ds_sims = copy.deepcopy(lcs_vid_ds_sims)
    for app, bugs in vid_ds.labels.items():
        l = [(bug, report) for bug in bugs for report in bugs[bug] if bug != 'elapsed_time']
        pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
        for (bug_i, report_i), (bug_j, report_j) in tqdm(pairs):
            lcs = lcs_vid_ds_sims[app][bug_i][report_i][bug_j][report_j]['lcs']
            weighted_lcs = lcs_vid_ds_sims[app][bug_i][report_i][bug_j][report_j]['weighted_lcs']
            vid_ds_sims['lcs'][app][bug_i][report_i][bug_j][report_j] = lcs
            vid_ds_sims['weighted_lcs'][app][bug_i][report_i][bug_j][report_j] = weighted_lcs
            
            bovw = bovw_vid_ds_sims[app][bug_i][report_i][bug_j][report_j]['bovw']
            vid_ds_sims['bovw'][app][bug_i][report_i][bug_j][report_j] = bovw
            vid_ds_sims['bovw_lcs'][app][bug_i][report_i][bug_j][report_j] = (bovw + lcs) / 2
            vid_ds_sims['bovw_weighted_lcs'][app][bug_i][report_i][bug_j][report_j] = (bovw + weighted_lcs) / 2

        bovw_time = bovw_vid_ds_sims[app]['elapsed_time']
        lcs_time = lcs_vid_ds_sims[app]['elapsed_time']
        
        vid_ds_sims['bovw'][app]['elapsed_time'] = bovw_time
        vid_ds_sims['lcs'][app]['elapsed_time'] = lcs_time
        vid_ds_sims['weighted_lcs'][app]['elapsed_time'] = lcs_time
        vid_ds_sims['bovw_lcs'][app]['elapsed_time'] = bovw_time + lcs_time
        vid_ds_sims['bovw_weighted_lcs'][app]['elapsed_time'] = bovw_time + lcs_time

    fixed_vid_ds_sims = fix_sims(vid_ds_sims, vid_ds)
    rankings = sort_rankings(fixed_vid_ds_sims)
    return rankings

In [13]:
path = Path("/tf/data/datasets/validation_set")
vid_ds = VideoDataset.from_path(path).label_from_paths()
vid_ds.get_labels()

['car_report', 'king', 'tasty']

In [14]:
fps = 30
ftk = 1
vw = 1_000
model_01 = 'M01'
simclr = SimCLRModel.load_from_checkpoint(checkpoint_path = str('/tf/data/models/simclr/checkpointepoch=98.ckpt')).eval()
M01 = SimCLRExtractor(simclr)

fname = f'/tf/data/models/codebooks/M01/cookbook_M01_{vw}vw.model'
codebook_01 = pickle.load(open(fname, 'rb'))

In [15]:
vid_ds_features = gen_extracted_features(vid_ds, M01, fps, ftk)
df, bovw_vid_ds_sims = gen_bovw_similarity(vid_ds, vid_ds_features, M01, codebook_01, vw, ftk)
lcs_vid_ds_sims = gen_lcs_similarity(vid_ds, vid_ds_features, simclr_frame_sim, M01, codebook_01, df, vw, ftk)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




In [25]:
# vid_ds_features = gen_extracted_features(vid_ds, M01, fps, ftk)
# df, vid_ds_sims = gen_similarity(vid_ds, vid_ds_features, M01, codebook_01, vw, ftk)
rankings_01 = approach(
    vid_ds, vid_ds_features, bovw_vid_ds_sims, lcs_vid_ds_sims, M01, simclr_frame_sim,
    codebook_01, df, vw, fps = fps, ftk = ftk,
)

HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




In [26]:
rankings_01['weighted_lcs']

{'car_report': {'elapsed_time': 7.567551136016846,
  'S0': {'carlos': OrderedDict([(('S0', 'kevin'), 0.8500608567268618),
                (('S2', 'nathan'), 0.7135313603255126),
                (('S0', 'nathan'), 0.6998692607241017),
                (('S0', 'oscar'), 0.6837033876350945),
                (('S2', 'oscar'), 0.6440685623357086),
                (('S2', 'kevin'), 0.6033778326851981),
                (('S1', 'carlos'), 0.5922630589741926),
                (('S1', 'nathan'), 0.5835621754328408),
                (('S1', 'kevin'), 0.5584753714501858),
                (('S4', 'carlos'), 0.4330672498544056),
                (('S4', 'nathan'), 0.41162325412034984),
                (('S4', 'oscar'), 0.3914870790732923),
                (('S4', 'kevin'), 0.3626835550448691),
                (('S3', 'nathan'), 0.3620487858158673),
                (('S3', 'oscar'), 0.35599820962568535),
                (('S3', 'carlos'), 0.325695841815196),
                (('S1', 'oscar'), 0.31760603

In [27]:
evals = evaluate(rankings_01['lcs'])

car_report Elapsed Time in Seconds 7.567551136016846
car_report σ Rank 0.66332495807108
car_report μ Rank 1.4
car_report Median Rank 1.0
car_report mRR: 0.8333333333333333
car_report mAP: 0.7386541005291004
car_report Hit@1: 0.7
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king Elapsed Time in Seconds 13.515673160552979
king σ Rank 3.201171660501823
king μ Rank 3.05
king Median Rank 2.0
king mRR: 0.6076556776556776
king mAP: 0.5153041351493363
king Hit@1: 0.4
king Hit@5: 0.85
king Hit@10: 0.95
tasty Elapsed Time in Seconds 7.720335960388184
tasty σ Rank 1.423903086589814
tasty μ Rank 1.85
tasty Median Rank 1.0
tasty mRR: 0.7866666666666667
tasty mAP: 0.694087555962556
tasty Hit@1: 0.7
tasty Hit@5: 1.0
tasty Hit@10: 1.0


In [28]:
evals = evaluate(rankings_01['weighted_lcs'])

car_report Elapsed Time in Seconds 7.567551136016846
car_report σ Rank 2.342541355024496
car_report μ Rank 2.25
car_report Median Rank 1.0
car_report mRR: 0.7538095238095238
car_report mAP: 0.5611230898730898
car_report Hit@1: 0.65
car_report Hit@5: 0.9
car_report Hit@10: 1.0
king Elapsed Time in Seconds 13.515673160552979
king σ Rank 2.342541355024496
king μ Rank 2.25
king Median Rank 1.0
king mRR: 0.7583333333333333
king mAP: 0.5491309126603244
king Hit@1: 0.65
king Hit@5: 0.9
king Hit@10: 1.0
tasty Elapsed Time in Seconds 7.720335960388184
tasty σ Rank 3.057368149242089
tasty μ Rank 2.05
tasty Median Rank 1.0
tasty mRR: 0.8844017094017094
tasty mAP: 0.6918369334913452
tasty Hit@1: 0.85
tasty Hit@5: 0.9
tasty Hit@10: 0.95


In [18]:
evals = evaluate(rankings_01['bovw'])

car_report Elapsed Time in Seconds 75.09627985954285
car_report σ Rank 1.7204650534085255
car_report μ Rank 1.8
car_report Median Rank 1.0
car_report mRR: 0.8229166666666667
car_report mAP: 0.7260905736563632
car_report Hit@1: 0.75
car_report Hit@5: 0.95
car_report Hit@10: 1.0
king Elapsed Time in Seconds 119.46583867073059
king σ Rank 1.0198039027185568
king μ Rank 1.6
king Median Rank 1.0
king mRR: 0.7933333333333333
king mAP: 0.6480639268139268
king Hit@1: 0.65
king Hit@5: 1.0
king Hit@10: 1.0
tasty Elapsed Time in Seconds 90.12990093231201
tasty σ Rank 0.6403124237432849
tasty μ Rank 1.3
tasty Median Rank 1.0
tasty mRR: 0.8833333333333332
tasty mAP: 0.8186261423761423
tasty Hit@1: 0.8
tasty Hit@5: 1.0
tasty Hit@10: 1.0


In [152]:
rankings_01.keys()

dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])

In [168]:
rankings_01['weighted_lcs']

{'car_report': {'elapsed_time': 6.928661823272705,
  'S0': {'carlos': OrderedDict([(('S1', 'kevin'), 1.881180198569047),
                (('S2', 'oscar'), 1.661018923918406),
                (('S1', 'carlos'), 1.6209304771925273),
                (('S2', 'kevin'), 1.5560796737670894),
                (('S2', 'nathan'), 1.3895084385286298),
                (('S0', 'kevin'), 1.3869413978175114),
                (('S1', 'nathan'), 1.3206933444006401),
                (('S0', 'nathan'), 1.1197908171585627),
                (('S0', 'oscar'), 1.0075628870411921),
                (('S1', 'oscar'), 0.9695342268860132),
                (('S3', 'oscar'), 0.9181006458767673),
                (('S4', 'carlos'), 0.9117205260092751),
                (('S4', 'oscar'), 0.8859970736921879),
                (('S4', 'nathan'), 0.8665752718323156),
                (('S3', 'kevin'), 0.8463253951909249),
                (('S3', 'carlos'), 0.839952434154979),
                (('S2', 'carlos'), 0.821359382386

In [155]:
evaluate(
    rankings_01['weighted_lcs']
)

car_report Elapsed Time in Seconds 7.055278778076172
car_report σ Rank 4.552746423863293
car_report μ Rank 8.35
car_report Median Rank 7.5
car_report mRR: 0.16373644329526688
car_report mAP: 0.17915332875472195
car_report Hit@1: 0.0
car_report Hit@5: 0.45
car_report Hit@10: 0.7
king Elapsed Time in Seconds 12.535198211669922
king σ Rank 3.667083309661781
king μ Rank 5.05
king Median Rank 4.0
king mRR: 0.35958333333333325
king mAP: 0.31649562884083005
king Hit@1: 0.15
king Hit@5: 0.7
king Hit@10: 0.95
tasty Elapsed Time in Seconds 7.070803880691528
tasty σ Rank 3.8506492958980307
tasty μ Rank 6.85
tasty Median Rank 6.0
tasty mRR: 0.22198412698412695
tasty mAP: 0.22947251471438776
tasty Hit@1: 0.05
tasty Hit@5: 0.35
tasty Hit@10: 0.9


{'car_report': {'S0': {'carlos': {'ranks': [(('S3', 'kevin'),
      2.6354802673323117),
     (('S1', 'kevin'), 2.4786101291054177),
     (('S1', 'oscar'), 2.281448429300074),
     (('S2', 'carlos'), 2.1744416947950396),
     (('S3', 'oscar'), 1.9783615953043887),
     (('S2', 'kevin'), 1.9498842858431629),
     (('S2', 'oscar'), 1.946714142749184),
     (('S1', 'carlos'), 1.9439649970907915),
     (('S3', 'carlos'), 1.9241713021930893),
     (('S4', 'oscar'), 1.9161375894881132),
     (('S4', 'kevin'), 1.837574443064238),
     (('S4', 'nathan'), 1.716967439233211),
     (('S3', 'nathan'), 1.7122483291124042),
     (('S4', 'carlos'), 1.6449110907420779),
     (('S1', 'nathan'), 1.6209021313148635),
     (('S2', 'nathan'), 1.4331826824890939),
     (('S0', 'kevin'), 1.3869413978175114),
     (('S0', 'nathan'), 1.2385220480816703),
     (('S0', 'oscar'), 1.1624756332029376)],
    'rank': 17,
    'average_precision': 0.10927645912166035},
   'kevin': {'ranks': [(('S3', 'kevin'), 1.9680475

In [107]:
rankings_01.keys()

dict_keys(['lcs', 'weighted_lcs', 'bovw', 'bovw_lcs', 'bovw_weighted_lcs'])

In [137]:
rankings_01#['bovw']['car_report']['elapsed_time']

{'lcs': {'car_report': {'elapsed_time': defaultdict(<function __main__.approach.<locals>.<lambda>.<locals>.<lambda>.<locals>.<lambda>.<locals>.<lambda>()>,
               {}),
   'S0': {'carlos': OrderedDict([(('S4', 'carlos'), 0.9466408358679878),
                 (('S0', 'kevin'), 0.9419205453660753),
                 (('S0', 'nathan'), 0.9379733119692121),
                 (('S3', 'oscar'), 0.937370604938931),
                 (('S4', 'oscar'), 0.933237738079495),
                 (('S4', 'nathan'), 0.9232356879446242),
                 (('S1', 'carlos'), 0.909669988685184),
                 (('S3', 'carlos'), 0.9089838332600064),
                 (('S0', 'oscar'), 0.9043333596653409),
                 (('S1', 'kevin'), 0.9031474921438429),
                 (('S3', 'kevin'), 0.9030871921115451),
                 (('S3', 'nathan'), 0.8960431880421109),
                 (('S2', 'kevin'), 0.894526117377811),
                 (('S4', 'kevin'), 0.8895332283443875),
                 (('S2

In [205]:
def tst_evaluate(rankings, top_k = [1, 5, 10]):
    output = {}
    for app in rankings:
        output[app] = {}
        app_rs = []
        for bug in rankings[app]:
            if bug == 'elapsed_time': continue
            output[app][bug] = {}
            bug_rs = []
            for report in rankings[app][bug]:
                output[app][bug][report] = {'ranks': []}
                r = []
                for labels, score in rankings[app][bug][report].items():
                    output[app][bug][report]['ranks'].append((labels, score))
                    if labels[0] == bug: r.append(1)
                    else: r.append(0)
                r = np.asarray(r)
                output[app][bug][report]['rank'] = r.nonzero()[0][0] + 1
                output[app][bug][report]['average_precision'] = average_precision(r)
                bug_rs.append(r)

            bug_rs_std, bug_rs_mean, bug_rs_med, bug_mRR = rank_stats(bug_rs)
            bug_mAP = mean_average_precision(bug_rs)

            output[app][bug]['Bug std rank'] = bug_rs_std
            output[app][bug]['Bug mean rank'] = bug_rs_mean
            output[app][bug]['Bug median rank'] = bug_rs_med
            output[app][bug]['Bug mRR'] = bug_mRR
            output[app][bug]['Bug mAP'] = bug_mAP
            for k in top_k:
                bug_hit_rate = hit_rate_at_k(bug_rs, k)
                output[app][f'Bug Hit@{k}'] = bug_hit_rate
            app_rs.extend(bug_rs)

        app_rs_std, app_rs_mean, app_rs_med, app_mRR = rank_stats(app_rs)
        app_mAP = mean_average_precision(app_rs)

        output[app]['App std rank'] = app_rs_std
        output[app]['App mean rank'] = app_rs_mean
        output[app]['App median rank'] = app_rs_med
        output[app]['App mRR'] = app_mRR
        output[app]['App mAP'] = app_mAP
        print(f'{app} Elapsed Time in Seconds', rankings[app]['elapsed_time'])
        print(f'{app} σ Rank', app_rs_std)
        print(f'{app} μ Rank', app_rs_mean)
        print(f'{app} Median Rank', app_rs_med)
        print(f'{app} mRR:', app_mRR)
        print(f'{app} mAP:', app_mAP)
        for k in top_k:
            app_hit_rate = hit_rate_at_k(app_rs, k)
            output[app][f'App Hit@{k}'] = app_hit_rate
            print(f'{app} Hit@{k}:', app_hit_rate)

In [206]:
tst_evaluate(rankings_01)

car_report Elapsed Time in Seconds 64.54483795166016
car_report σ Rank 3.6318039594669758
car_report μ Rank 2.9
car_report Median Rank 1.0
car_report mRR: 0.7559294871794873
car_report mAP: 0.6176476198380222
car_report Hit@1: 0.7
car_report Hit@5: 0.85
car_report Hit@10: 0.9
king Elapsed Time in Seconds 107.04503345489502
king σ Rank 0.6538348415311012
king μ Rank 1.15
king Median Rank 1.0
king mRR: 0.9625
king mAP: 0.8069936058093953
king Hit@1: 0.95
king Hit@5: 1.0
king Hit@10: 1.0
tasty Elapsed Time in Seconds 80.55947613716125
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.8740277777777777
tasty Hit@1: 0.9
tasty Hit@5: 1.0
tasty Hit@10: 1.0


In [43]:
from nbdev.export import notebook2script
notebook2script()

Converted 00_prep.ipynb.
Converted 01_features.ipynb.
Converted 02_eval.ipynb.
Converted 03_cnn.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 04.0_experiments.ipynb.
Converted 04.1_experiments.codebooks.ipynb.
Converted 05_model.ipynb.
Converted 06_approach.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 07_results.ipynb.
Converted SimCLR.old.ipynb.
Converted [Scratch 1] Tango SimCLR.ipynb.
Converted [Scratch 2] Tango SimCLR.ipynb.
Converted index.ipynb.
Converted lesson1-pets.ipynb.


# SCRATCH WORK

In [112]:
# def fix_sims(vid_sims, vid_ds):
#     fixed_vid_sims = defaultdict(
#         lambda: defaultdict(
#             lambda: defaultdict(
#                 lambda: defaultdict(
#                     lambda: defaultdict(
#                         lambda: defaultdict(
#                             lambda: defaultdict(int)
#                         )
#                     )
#                 )
#             )
#         )
#     )
    
#     for app in vid_sims:
#         l = [(bug, report) for bug in vid_ds[app] for report in vid_ds[app][bug] if bug != 'elapsed_time']
#         pairs = reversed(list(x for x in permutations(l, 2) if x[0] != x[1]))
#         for (bug_i, report_i), (bug_j, report_j) in pairs:
#             if (bug_i, report_i) == (bug_j, report_j): continue
#             vid_sims[app][bug_i][report_i][bug_j][report_j] = vid_sims[app][bug_j][report_j][bug_i][report_i]
#     return vid_sims

In [None]:
def sort_rankings(vid_sims):
    sorted_rankings = {}
    for app in vid_sims:
        sorted_rankings[app] = {'elapsed_time': vid_sims[app]['elapsed_time']}
        for bug in vid_sims[app]:
            if bug == 'elapsed_time' or bug == 'bovw_elapsed_time' \
            or bug == 'lcs_elapsed_time' or bug == 'bovw_lcs_elapsed_time': continue
            sorted_rankings[app][bug] = {}
            for report in vid_sims[app][bug]:
                sorted_rankings[app][bug][report] = []
                d_out = {}
                flatten_dict(vid_sims[app][bug][report], d_out, tuple())
                sorted_rankings[app][bug][report] = OrderedDict(
                    sorted(d_out.items(), key = lambda x: str(x[1]), reverse = True)
                )
    
    return sorted_rankings

In [None]:
def approach(
    vid_ds, vid_ds_features, vid_ds_sims, mdl, sim_func,
    codebook, df, vw, fps = 30, ftk = 1, mode = 'bovw'
):
    vid_ds_features = copy.deepcopy(vid_ds_features)
    vid_ds_sims = copy.deepcopy(vid_ds_sims)
    if 'lcs' in mode:
        for app, bugs in vid_ds.labels.items():
            start = time.time()
            l = [(bug, report) for bug in bugs for report in bugs[bug] if bug != 'elapsed_time']
            pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
            for (bug_i, report_i), (bug_j, report_j) in tqdm(pairs):
                tfidf_sim = vid_ds_sims[app][bug_i][report_i][bug_j][report_j]
                lcs_sim = fuzzy_LCS(
                    vid_ds_features[app][bug_i][report_i]['features'],
                    vid_ds_features[app][bug_j][report_j]['features'],
                    len(vid_ds_features[app][bug_i][report_i]['features']),
                    len(vid_ds_features[app][bug_j][report_j]['features']),
                    sim_func, codebook, df, vw
                )
                if 'bovw_lcs' == mode:
                    vid_ds_sims[app][bug_i][report_i][bug_j][report_j] = (tfidf_sim + lcs_sim) / 2
                elif 'lcs' == mode:
                    vid_ds_sims[app][bug_i][report_i][bug_j][report_j] = lcs_sim
            end = time.time()
            if 'bovw_lcs' == mode:
                vid_ds_sims[app]['elapsed_time'] = end - start + vid_ds_sims[app]['elapsed_time']
            elif 'lcs' == mode:
                vid_ds_sims[app]['elapsed_time'] = end - start

    vid_ds_sims = fix_sims(vid_ds_sims, vid_ds)
    rankings = sort_rankings(vid_ds_sims)
    return rankings

In [5]:
def gen_all_codebooks(imgs, models, vw):
    for i, model in enumerate(models):
        codebook = gen_vcodebook(imgs, model, vw)
        fname = f'/tf/data/models/cookbook_M{i:02}_{len(imgs)}n_{vw}vw.model'
        pickle.dump(codebook, open(fname, 'wb'))

In [6]:
# def gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk):
#     vid_tfids = defaultdict(
#         lambda: defaultdict(list)
#     )
#     for app, reports in tqdm(vid_ds.labels.items()):
# #         if app != 'car_report': continue
#         print(app)
#         for i, (report, vids) in enumerate(reports.items()):
#             for vid in vids:
#                 bovw = new_get_bovw(vid, mdl, codebook, vw, frames_to_keep = ftk)
#                 vid_tfids[app][report].append(calc_tf_idf(bovw, df))
    
#     return vid_tfids

In [7]:
def gen_tfidfs(vid_ds_features, vw, codebook, df, ftk):
    vid_tfids = defaultdict(
        lambda: defaultdict(list)
    )
    
    for app in vid_ds_features:
        for report in vid_ds_features[app]:
            for vid in vid_ds_features[app][report]:
                bovw = new_get_bovw(
                    vid_ds_features[app][report][vid]['features'],
                    codebook, vw
                )
                vid_tfids[app][report].append(calc_tf_idf(bovw, df))
    
#     for app, reports in tqdm(vid_ds.labels.items()):
# #         if app != 'car_report': continue
#         print(app)
#         for i, (report, vids) in enumerate(reports.items()):
#             for vid in vids:
# #                 bovw = new_get_bovw(vid, mdl, codebook, vw, frames_to_keep = ftk)
#                 bovw = new_get_bovw(features, codebook, vw, frames_to_keep = ftk)
#                 vid_tfids[app][report].append(calc_tf_idf(bovw, df))
    
    return vid_tfids

In [8]:
def gen_extracted_features(vid_ds, mdl, fps, ftk):
    vid_ds_features = {}
    for app in tqdm(vid_ds.labels):
        start = time.time()
        vid_ds_features[app] = {}
        for report in vid_ds[app]:
            vid_ds_features[app][report] = {}
            for i in range(len(vid_ds[app][report])):
                vid_ds_features[app][report][f'vid_{i}'] = {
                    'features': extract_features(vid_ds[app][report][i], mdl, fps, frames_to_keep = ftk)
                }
        end = time.time()
        vid_ds_features[app]['elapsed_time'] = end - start
        
    return vid_ds_features

In [9]:
# def gen_similarity(vid_ds, mdl, codebook, vw, ftk):
#     results = {}
    
#     df = np.histogram(codebook.labels_, bins = range(vw + 1))[0]
#     vid_tfids = gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk)
#     for app, reports in vid_ds.labels.items():
#         if app != 'car_report': continue
#         results[app] = {}
#         for report_i in reports:
#             results[app][report_i] = {}
#             for report_j in reports:
#                 results[app][report_i][report_j] = {}
#                 for k in range(len(vid_tfids[app][report_i])):
#                     results[app][report_i][report_j][f'vid_{k}'] = {}
#                     for l in range(len(vid_tfids[app][report_j])):
#                         results[app][report_i][report_j][f'vid_{k}'][f'vid_{l}'] = np.dot(vid_tfids[app][report_i][k], vid_tfids[app][report_j][l]) / (np.linalg.norm(vid_tfids[app][report_i][k]) * np.linalg.norm(vid_tfids[app][report_j][l]))
    
#     return results

In [10]:
# def gen_similarity(vid_ds, vid_ds_features, mdl, codebook, vw, ftk):
#     results = defaultdict(
#         lambda: defaultdict(
#             lambda: defaultdict(
#                 lambda: defaultdict(
#                     lambda: defaultdict(int)
#                 )
#             )
#         )
#     )
    
#     df = np.histogram(codebook.labels_, bins = range(vw + 1))[0]
#     vid_tfids = gen_tfidfs(vid_ds_features, vw, codebook, df, ftk)
#     for app, reports in vid_ds.labels.items():
# #         if app != 'car_report': continue
#         l = [(report, i) for report in reports for i in range(len(reports[report]))]
#         pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
#         for (report_i, i), (report_j, j) in pairs:
#             results[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = np.dot(vid_tfids[app][report_i][i], vid_tfids[app][report_j][j]) / (np.linalg.norm(vid_tfids[app][report_i][i]) * np.linalg.norm(vid_tfids[app][report_j][j]))

#     return df, results

In [None]:
def gen_similarity(vid_ds, vid_ds_features, mdl, codebook, vw, ftk):
    results = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(int)
                )
            )
        )
    )
    
    df = np.histogram(codebook.labels_, bins = range(vw + 1))[0]
    vid_tfids = gen_tfidfs(vid_ds_features, vw, codebook, df, ftk)
    for app, reports in vid_ds.labels.items():
        start = time.time()
        l = [(report, i) for report in reports for i in range(len(reports[report]))]
        pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
        for (report_i, i), (report_j, j) in pairs:
            results[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = np.dot(vid_tfids[app][report_i][i], vid_tfids[app][report_j][j]) / (np.linalg.norm(vid_tfids[app][report_i][i]) * np.linalg.norm(vid_tfids[app][report_j][j]))
        end = time.time()
        results[app]['elapsed_time'] = end - start + vid_ds_features[app]['time']
            
    return df, results

In [11]:
def filter_vids(vid_similarities, mdl_vid_threshold = 0.8):
    vid_dict = copy.deepcopy(vid_similarities)
    for app in vid_dict:
        for report_i in vid_dict[app]:
            for report_j in vid_dict[app][report_i]:
                for vid_i in vid_dict[app][report_i][report_j]:
                    for vid_j in vid_dict[app][report_i][report_j][vid_i]:
                        if vid_dict[app][report_i][report_j][vid_i] < mdl_threshold:
                            del vid_dict[app][report_i][report_j][vid_i]
    
    return vid_dict

In [12]:
# def sift_frame_sim(mdl, codebook, frame_i, frame_j, vw, mdl_frame_threshold):
#     if frame_i is None or frame_j is None: return 0
#     features_i = mdl.extract(frame_i)
#     vws_i = np.expand_dims(codebook.predict(features_i), axis=0)
#     bowv_i = np.expand_dims(np.histogram(vws_i, bins = range(vw + 1))[0], axis=0)
    
#     features_j = mdl.extract(frame_j)
#     vws_j = np.expand_dims(codebook.predict(features_j), axis=0)
#     bowv_j = np.expand_dims(np.histogram(vws_j, bins = range(vw + 1))[0], axis=0)
    
#     sim = cosine_similarity(bowv_i, bowv_j)[0][0]
#     return sim

In [13]:
def sift_frame_sim(features_i, features_j, codebook, df, vw):
    vws_i = np.expand_dims(codebook.predict(features_i), axis=0)
    bowv_i = np.expand_dims(np.histogram(vws_i, bins = range(vw + 1))[0], axis=0)
#     tf_idf_i = calc_tf_idf(bowv_i, df)
    
    vws_j = np.expand_dims(codebook.predict(features_j), axis=0)
    bowv_j = np.expand_dims(np.histogram(vws_j, bins = range(vw + 1))[0], axis=0)
#     tf_idf_j = calc_tf_idf(bowv_j, df)
#     sim = np.dot(tf_idf_i, tf_idf_j) / (np.linalg.norm(tf_idf_i) * np.linalg.norm(tf_idf_j))
    # TODO Add getting tfidf and then performing cosine...
    sim = cosine_similarity(bowv_i, bowv_j)[0][0]
    return sim

In [14]:
# def simclr_frame_sim(mdl, codebook, frame_i, frame_j, vw, mdl_frame_threshold):
#     if frame_i is None or frame_j is None: return 0
#     features_i = mdl.extract(frame_i)
#     features_j = mdl.extract(frame_j)
    
#     sim = cosine_similarity(features_i, features_j)[0][0]
#     return sim

In [15]:
def simclr_frame_sim(features_i, features_j, codebook, df, vw):
    sim = cosine_similarity(features_i, features_j)[0][0]
    return sim

In [16]:
# # Modified from geeksforgeeks: https://www.geeksforgeeks.org/longest-common-substring-dp-29/
# def fuzzy_LCS(X, Y, m, n, sim_func, mdl, codebook, vw, mdl_frame_threshold, fps = 30, frames_to_keep = 1):
#     LCSuff = [[0 for k in range(n + 1)] for l in range(m + 1)] 
      
#     # To store the length of  
#     # longest common substring 
#     result = 0 
  
#     # Following steps to build 
#     # LCSuff[m+1][n+1] in bottom up fashion 
#     for i in range(0, m + 1):
#         for j in range(0, n + 1):
#             if (i == 0 or j == 0): 
#                 LCSuff[i][j] = 0
#                 continue
#             sim = sim_func(mdl, codebook, X[i * int(fps / frames_to_keep) -1], Y[j * int(fps / frames_to_keep) -1], vw, mdl_frame_threshold)
# #             print('SIM:', sim)
#             if sim > mdl_frame_threshold: 
#                 LCSuff[i][j] = LCSuff[i-1][j-1] + sim
#                 result = max(result, LCSuff[i][j]) 
#             else: 
#                 LCSuff[i][j] = 0
# #     print('Fuzzy:', result, min(m, n))
#     return result / min(m, n)

In [17]:
# Modified from geeksforgeeks: https://www.geeksforgeeks.org/longest-common-substring-dp-29/
def fuzzy_LCS(X, Y, m, n, sim_func, codebook, df, vw, mdl_frame_threshold = 0.0):
    LCSuff = [[0 for k in range(n + 1)] for l in range(m + 1)] 
      
    # To store the length of  
    # longest common substring 
    result = 0 
  
    # Following steps to build 
    # LCSuff[m+1][n+1] in bottom up fashion 
    for i in range(0, m + 1):
        for j in range(0, n + 1):
            if (i == 0 or j == 0): 
                LCSuff[i][j] = 0
                continue
            sim = sim_func(X[i - 1], Y[j - 1], codebook, df, vw)
#             print('SIM:', sim)
            if sim > mdl_frame_threshold:
                LCSuff[i][j] = LCSuff[i-1][j-1] + sim
                result = max(result, LCSuff[i][j]) 
            else: 
                LCSuff[i][j] = 0
#     print('Fuzzy:', result, min(m, n))
    return result / min(m, n)

In [18]:
def flatten_dict(d_in, d_out, parent_key):
    for k, v in d_in.items():
        if isinstance(v, dict):
            flatten_dict(v, d_out, parent_key + (k,))
        else:
            d_out[parent_key + (k,)] = v

In [19]:
def fix_corpus(results, corpus_size):
    
    for app in results:
#         corpus_size = len(results[app])
        s0 = 0
        v0 = 1
        for i in range(corpus_size):
            if v0 % 4 == 0: s0 += 1
            s1 = 0
            for v1 in range(i + 1):
                if v1 % 4 == 0: s1 += 1
                results[app][f'S{s0}'][f'vid_{v0 % 4}'][f'S{s1 - 1}'][f'vid_{v1 % 4}'] = results[app][f'S{s1 - 1}'][f'vid_{v1 % 4}'][f'S{s0}'][f'vid_{v0 % 4}']
#                 results[app][f'S{s0}'][f'vid_{v0 % 4}'][(f'S{s1 - 1}', f'vid_{v1 % 4}')] = results[app][f'S{s1 - 1}'][f'vid_{v1 % 4}'][(f'S{s0}', f'vid_{v0 % 4}')]
                
#             print('-' * 5)
            v0 += 1
    return results
#     print(i + 1)

In [20]:
def sort_results(results):
    sorted_results = {}
    for app in results:
        sorted_results[app] = {}
        for report in results[app]:
            sorted_results[app][report] = {}
            for vid in results[app][report]:
                sorted_results[app][report][vid] = []
                d_out = {}
                flatten_dict(results[app][report][vid], d_out, tuple())
                sorted_results[app][report][vid] = OrderedDict(
                    sorted(d_out.items(), key = lambda x: str(x[1]), reverse = True)
                )
    
    return sorted_results

In [21]:
def approach(
    vid_ds, mdl, mdl_vid_threshold, sim_func,
    mdl_frame_threshold, codebook, vw, corpus_size,
    fps = 30, ftk = 1, align_sim = False
):
    vid_ds_sims = gen_similarity(vid_ds, mdl, codebook, vw, ftk)
    if align_sim:
        for app, reports in vid_ds.labels.items():
    #         if app != 'car_report': continue
            l = [(report, i) for report in reports for i in range(len(reports[report]))]
            pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
            for (report_i, i), (report_j, j) in tqdm(pairs):
                tfidf_sim = vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}']
                lcs_sim = fuzzy_LCS(
                    vid_ds[app][report_i][i], vid_ds[app][report_j][j],
                    int(len(vid_ds[app][report_i][i]) / int(fps / ftk)),
                    int(len(vid_ds[app][report_j][j]) / int(fps / ftk)),
                    sim_func, mdl, codebook, vw, mdl_frame_threshold, fps, ftk
                )
                vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = (tfidf_sim + lcs_sim) / 2
#     for app, reports in vid_ds.labels.items():
#         if app != 'car_report': continue
#         for i, report_i in tqdm(enumerate(reports), total = len(reports)):
# #             if i > 1: break
#             for j, report_j in tqdm(enumerate(reports), total = len(reports)):
# #                 if j > 1: break
#                 for k in range(len(vid_ds[app][report_i])):
# #                     if k > 1: break
#                     for l in range(len(vid_ds[app][report_j])):
# #                         if l > 1: break
#                         tfidf_sim = vid_ds_sims[app][report_i][report_j][f'vid_{k}'][f'vid_{l}']
#                         lcs_sim = fuzzy_LCS(
#                             vid_ds[app][report_i][k], vid_ds[app][report_j][l],
#                             int(len(vid_ds[app][report_i][k]) / int(fps / ftk)), int(len(vid_ds[app][report_i][l]) / int(fps / ftk)),
#                             sim_func, mdl, codebook, vw, mdl_frame_threshold, fps, ftk
#                         )
#                         print(tfidf_sim, lcs_sim)
#                         vid_ds_sims[app][report_i][report_j][f'vid_{k}'][f'vid_{l}'] = (tfidf_sim + lcs_sim) / 2
#     print(vid_ds_sims)

    vid_ds_sims = fix_corpus(vid_ds_sims, corpus_size)
    results = sort_results(vid_ds_sims)
    return results

In [22]:
def cached_approach(
    vid_ds, mdl, mdl_vid_threshold, sim_func,
    mdl_frame_threshold, codebook, vw, corpus_size,
    fps = 30, ftk = 1, lcs = False
):
    vid_ds_features = gen_extracted_features(vid_ds, mdl, fps, ftk)
    df, vid_ds_sims = gen_similarity(vid_ds, vid_ds_features, mdl, codebook, vw, ftk)
    if lcs:
        for app, reports in vid_ds.labels.items():
    #         if app != 'car_report': continue
            l = [(report, i) for report in reports for i in range(len(reports[report]))]
            pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
            for (report_i, i), (report_j, j) in tqdm(pairs):
                tfidf_sim = vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}']
                lcs_sim = fuzzy_LCS(
                    vid_ds_features[app][report_i][f'vid_{i}']['features'],
                    vid_ds_features[app][report_j][f'vid_{j}']['features'],
                    int(len(vid_ds[app][report_i][i]) / int(fps / ftk)),
                    int(len(vid_ds[app][report_j][j]) / int(fps / ftk)),
#                     len(vid_ds_features[app][report_i][f'vid_{i}']['features']),
#                     len(vid_ds_features[app][report_j][f'vid_{j}']['features']),
                    sim_func, codebook, df, vw, mdl_frame_threshold
                )
                vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = (tfidf_sim + lcs_sim) / 2

    vid_ds_sims = fix_corpus(vid_ds_sims, corpus_size)
    results = sort_results(vid_ds_sims)
    return results

In [24]:
def new_cached_approach(
    vid_ds, vid_ds_features, vid_ds_sims, mdl, mdl_vid_threshold, sim_func,
    mdl_frame_threshold, codebook, df, vw, corpus_size,
    fps = 30, ftk = 1, lcs = False
):
    if lcs:
        for app, reports in vid_ds.labels.items():
            l = [(report, i) for report in reports for i in range(len(reports[report]))]
            pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
            for (report_i, i), (report_j, j) in tqdm(pairs):
                tfidf_sim = vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}']
                lcs_sim = fuzzy_LCS(
                    vid_ds_features[app][report_i][f'vid_{i}']['features'],
                    vid_ds_features[app][report_j][f'vid_{j}']['features'],
                    len(vid_ds_features[app][report_i][f'vid_{i}']['features']),
                    len(vid_ds_features[app][report_j][f'vid_{j}']['features']),
                    sim_func, codebook, df, vw, mdl_frame_threshold
                )
                vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = (tfidf_sim + lcs_sim) / 2

    vid_ds_sims = fix_corpus(vid_ds_sims, corpus_size)
    results = sort_results(vid_ds_sims)
    return results

In [25]:
def get_eval_results(evals, app, item):
    for s in evals[app]:
        for vid in evals[app][s]:
            try:
                print(evals[app][s][vid][item])
            except: continue

In [21]:
l = [(f'S{i}', f'vid{j}') for i in range(5) for j in range(4)]
pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])

In [38]:
path = Path("/tf/data/datasets/validation_set")
vid_ds = VideoDataset.from_path(path).label_from_paths()
vid_ds.get_labels()

['car_report', 'king', 'tasty']

In [27]:
mdl_00_vid_threshold = 0.9
mdl_00_frame_threshold = 0.9
vw = 100

M00 = SIFTExtractor(cv2.xfeatures2d.SIFT_create())
fname = f'/tf/data/models/cookbook_M00_1000n_100vw.model'
codebook_00 = pickle.load(open(fname, 'rb'))

In [None]:
results_00 = approach(
    vid_ds, M00, mdl_00_vid_threshold, sift_frame_sim,
    mdl_00_frame_threshold, codebook_00, vw, 19, fps = 30, ftk = 1
)

In [None]:
results_00_bovw_align = approach(
    vid_ds, M00, mdl_00_vid_threshold, sift_frame_sim,
    mdl_00_frame_threshold, codebook_00, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

In [35]:
evals_00 = evaluate(results_00) # ftk = 1

car_report σ Rank 2.277608394786075
car_report μ Rank 2.25
car_report Median Rank 1.5
car_report mRR: 0.6937121212121212
car_report mAP: 0.5326424678262913
king σ Rank 4.363484845854286
king μ Rank 3.4
king Median Rank 1.0
king mRR: 0.6980373303167421
king mAP: 0.4601591918716531
tasty σ Rank 0.7810249675906654
tasty μ Rank 1.3
tasty Median Rank 1.0
tasty mRR: 0.9041666666666666
tasty mAP: 0.8303373015873016


In [60]:
get_eval_results(evals_00, 'tasty', 'average_precision')

0.3222222222222222
0.29444444444444445
0.7916666666666666
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.7916666666666666
0.5555555555555555
0.8666666666666667
0.625
0.7666666666666666
0.8095238095238094
0.9166666666666666
0.8666666666666667


TypeError: 'numpy.float64' object is not iterable

In [69]:
results_00_cache = cached_approach(
    vid_ds, M00, mdl_00_vid_threshold, simclr_frame_sim,
    mdl_00_frame_threshold, codebook_00, vw, 19, fps = 30, ftk = 1
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




In [70]:
evaluate(results_00_cache) # ftk = 1

car_report σ Rank 2.277608394786075
car_report μ Rank 2.25
car_report Median Rank 1.5
car_report mRR: 0.6937121212121212
car_report mAP: 0.5326424678262913
car_report Hit@5: 0.95
king σ Rank 4.363484845854286
king μ Rank 3.4
king Median Rank 1.0
king mRR: 0.6980373303167421
king mAP: 0.4601591918716531
king Hit@5: 0.8
tasty σ Rank 0.7810249675906654
tasty μ Rank 1.3
tasty Median Rank 1.0
tasty mRR: 0.9041666666666666
tasty mAP: 0.8303373015873016
tasty Hit@5: 1.0


{'car_report': {'S0': {'vid_0': {'ranks': [(('S4', 'vid_0'),
      0.9954718971541773),
     (('S4', 'vid_2'), 0.9945964770493874),
     (('S3', 'vid_0'), 0.9939976838697654),
     (('S3', 'vid_2'), 0.9931883158114038),
     (('S0', 'vid_2'), 0.9929479446868809),
     (('S4', 'vid_3'), 0.9928277720624915),
     (('S2', 'vid_0'), 0.9924880112472121),
     (('S0', 'vid_3'), 0.9916407619640583),
     (('S3', 'vid_3'), 0.9916281614374629),
     (('S3', 'vid_1'), 0.9908729735712648),
     (('S0', 'vid_1'), 0.9904742624406746),
     (('S4', 'vid_1'), 0.9896292570686976),
     (('S1', 'vid_2'), 0.9869854517117607),
     (('S1', 'vid_0'), 0.9865547211253137),
     (('S1', 'vid_3'), 0.986118447341411),
     (('S2', 'vid_1'), 0.9850550846216436),
     (('S2', 'vid_3'), 0.983298468501515),
     (('S1', 'vid_1'), 0.9793310935304032),
     (('S2', 'vid_2'), 0.9771318857464869)],
    'rank': 5,
    'average_precision': 0.24090909090909088},
   'vid_1': {'ranks': [(('S3', 'vid_1'), 0.9939760064728501

In [76]:
results_00_cache_bovw_align = cached_approach(
    vid_ds, M00, mdl_00_vid_threshold, simclr_frame_sim,
    mdl_00_frame_threshold, codebook_00, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




In [39]:
results_00_cache_bovw_align = cached_approach(
    vid_ds, M00, mdl_00_vid_threshold, sift_frame_sim,
    0.9, codebook_00, vw, 19, fps = 30, ftk = 1,
    lcs = True
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




In [41]:
evals_00_bovw_align = evaluate(results_00_cache_bovw_align) # ftk = 1, threshold = 0.9

car_report σ Rank 1.061838029079765
car_report μ Rank 1.65
car_report Median Rank 1.0
car_report mRR: 0.7849999999999999
car_report mAP: 0.5764307424601541
car_report Hit@1: 0.65
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king σ Rank 1.5898113095584645
king μ Rank 1.85
king Median Rank 1.0
king mRR: 0.7958333333333333
king mAP: 0.563292371328362
king Hit@1: 0.7
king Hit@5: 0.9
king Hit@10: 1.0
tasty σ Rank 3.0409702399069944
tasty μ Rank 2.95
tasty Median Rank 1.5
tasty mRR: 0.6344047619047618
tasty mAP: 0.5638939995189995
tasty Hit@1: 0.5
tasty Hit@5: 0.85
tasty Hit@10: 0.95


In [120]:
evals_00_bovw_align = evaluate(results_00_cache_bovw_align) # ftk = 1, threshold = 0.9

car_report σ Rank 0.7399324293474372
car_report μ Rank 1.45
car_report Median Rank 1.0
car_report mRR: 0.825
car_report mAP: 0.6048242933537049
car_report Hit@1: 0.7
car_report Hit@5: 1.0
car_report Hit@10: 1.0
king σ Rank 1.5459624833740306
king μ Rank 1.9
king Median Rank 1.0
king mRR: 0.7699999999999999
king mAP: 0.5763462292351609
king Hit@1: 0.65
king Hit@5: 0.95
king Hit@10: 1.0
tasty σ Rank 3.0594117081556713
tasty μ Rank 2.8
tasty Median Rank 1.5
tasty mRR: 0.6594047619047618
tasty mAP: 0.575558420337832
tasty Hit@1: 0.5
tasty Hit@5: 0.85
tasty Hit@10: 0.95


In [104]:
evals_00_bovw_align = evaluate(results_00_cache_bovw_align) # ftk = 1, threshold = 0.9

car_report σ Rank 2.868361901852693
car_report μ Rank 4.85
car_report Median Rank 4.0
car_report mRR: 0.3191040903540904
car_report mAP: 0.26913587909911435
car_report Hit@1: 0.1
car_report Hit@5: 0.65
car_report Hit@10: 0.95
king σ Rank 3.5982634700644147
king μ Rank 5.05
king Median Rank 4.0
king mRR: 0.40775793650793657
king mAP: 0.3212196782262572
king Hit@1: 0.25
king Hit@5: 0.6
king Hit@10: 0.9
tasty σ Rank 1.7578395831246945
tasty μ Rank 2.9
tasty Median Rank 2.0
tasty mRR: 0.5449999999999999
tasty mAP: 0.4172521922521922
tasty Hit@1: 0.35
tasty Hit@5: 1.0
tasty Hit@10: 1.0


In [99]:
evaluate(results_00_cache_bovw_align) # ftk = 1, threshold = 0.0

car_report σ Rank 2.868361901852693
car_report μ Rank 4.85
car_report Median Rank 4.0
car_report mRR: 0.3191040903540904
car_report mAP: 0.26913587909911435
car_report Hit@1: 0.1
car_report Hit@5: 0.65
car_report Hit@10: 0.95
king σ Rank 3.5982634700644147
king μ Rank 5.05
king Median Rank 4.0
king mRR: 0.40775793650793657
king mAP: 0.3214280115595905
king Hit@1: 0.25
king Hit@5: 0.6
king Hit@10: 0.9
tasty σ Rank 1.7578395831246945
tasty μ Rank 2.9
tasty Median Rank 2.0
tasty mRR: 0.5449999999999999
tasty mAP: 0.4172521922521922
tasty Hit@1: 0.35
tasty Hit@5: 1.0
tasty Hit@10: 1.0


{'car_report': {'S0': {'vid_0': {'ranks': [(('S2', 'vid_0'),
      0.808728590372351),
     (('S1', 'vid_0'), 0.8057658754926667),
     (('S4', 'vid_0'), 0.7476912972475713),
     (('S4', 'vid_3'), 0.7464049639609809),
     (('S3', 'vid_0'), 0.6844920842582813),
     (('S3', 'vid_1'), 0.6829318711509527),
     (('S0', 'vid_2'), 0.6631147854954358),
     (('S4', 'vid_2'), 0.4972982385246937),
     (('S3', 'vid_2'), 0.4965941579057019),
     (('S0', 'vid_3'), 0.49582038098202913),
     (('S3', 'vid_3'), 0.49581408071873145),
     (('S0', 'vid_1'), 0.4952371312203373),
     (('S4', 'vid_1'), 0.4948146285343488),
     (('S1', 'vid_2'), 0.49349272585588033),
     (('S1', 'vid_3'), 0.4930592236707055),
     (('S2', 'vid_1'), 0.4925275423108218),
     (('S2', 'vid_3'), 0.4916492342507575),
     (('S1', 'vid_1'), 0.4896655467652016),
     (('S2', 'vid_2'), 0.48856594287324345)],
    'rank': 7,
    'average_precision': 0.1976190476190476},
   'vid_1': {'ranks': [(('S1', 'vid_3'), 0.898623735709

In [86]:
get_eval_results(evals_00_bovw_align, 'tasty', 'average_precision')

0.21666666666666667
0.16402116402116404
0.6333333333333333
0.29870129870129875
0.2853535353535353
0.6388888888888888
0.6388888888888888
0.7555555555555555
0.2888888888888889
0.2686868686868687
0.425
0.3833333333333333
0.4444444444444444
0.35555555555555557
0.12103174603174603
0.27777777777777773
1.0
1.0
1.0
0.9166666666666666


TypeError: 'numpy.float64' object is not iterable

In [111]:
import pickle
results_path = Path('/tf/data/results/M00')

with open(results_path/'results_00.pkl', 'wb') as f:
    pickle.dump(results_00, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(results_path/'evals_00.pkl', 'wb') as f:
    pickle.dump(evals_00, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(results_path/'results_00_bovw_align.pkl', 'wb') as f:
    pickle.dump(results_00_bovw_align, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(results_path/'evals_00_bovw_align.pkl', 'wb') as f:
    pickle.dump(evals_00_bovw_align, f, protocol=pickle.HIGHEST_PROTOCOL)

In [44]:
import pickle

results_path = Path('/tf/data/results/M00')
results_00 = pickle.load(open(results_path/'results_00.pkl', 'rb'))

In [91]:
results_path = Path('/tf/data/results/M00')

eval_sift_bovw_align = pickle.load( open( results_path/'evals_00_bovw_align.pkl', "rb" ) )

In [95]:
eval_sift_bovw_align['car_report']

{'S0': {'vid_0': {'ranks': [(('S0', 'vid_2'), 0.9841517610647245),
    (('S0', 'vid_1'), 0.8032278577774794),
    (('S4', 'vid_0'), 0.7394318735477277),
    (('S0', 'vid_3'), 0.7377530739649625),
    (('S4', 'vid_2'), 0.7364081734894096),
    (('S1', 'vid_2'), 0.7347430196774605),
    (('S2', 'vid_0'), 0.7344045295622541),
    (('S1', 'vid_3'), 0.732794667168452),
    (('S4', 'vid_3'), 0.731848462172171),
    (('S1', 'vid_0'), 0.7287487702039421),
    (('S2', 'vid_3'), 0.7224495133817408),
    (('S3', 'vid_2'), 0.6828160863506159),
    (('S3', 'vid_0'), 0.6828127740633692),
    (('S3', 'vid_3'), 0.6816461100753022),
    (('S3', 'vid_1'), 0.6812751300108997),
    (('S4', 'vid_1'), 0.6808115291281194),
    (('S2', 'vid_1'), 0.6781688814778082),
    (('S1', 'vid_1'), 0.6758977344828558),
    (('S2', 'vid_2'), 0.6731585643976764)],
   'rank': 1,
   'average_precision': 0.9166666666666666},
  'vid_1': {'ranks': [(('S0', 'vid_2'), 0.895140096952731),
    (('S0', 'vid_0'), 0.8032278577774794)

In [100]:
# import json
# class NpEncoder(json.JSONEncoder):
#     def default(self, obj):
#         if isinstance(obj, np.integer):
#             return int(obj)
#         elif isinstance(obj, np.floating):
#             return float(obj)
#         elif isinstance(obj, np.ndarray):
#             return obj.tolist()
#         elif isinstance(obj, tuple):
#             return str(obj)
#         else:
#             return super(NpEncoder, self).default(obj)
# results_path = Path('/tf/data/results/M00')
# with open(results_path/'results_00.json', 'w') as fp:
#     json.dump(results_00, fp, cls = NpEncoder)

# with open(results_path/'evals_00.json', 'w') as fp:
#     json.dump(evals_00, fp, cls = NpEncoder)

# # with open(results_path/'results_00_bovw_align.json', 'w') as fp:
# #     json.dump(results_00_bovw_align, fp)

# with open(results_path/'evals_00_bovw_align.json', 'w') as fp:
#     json.dump(evals_00_bovw_align, fp, cls = NpEncoder)

TypeError: key ('S4', 'vid_0') is not a string

In [31]:
mdl_01_vid_threshold = 0.0
mdl_01_frame_threshold = 0.0
model = SimCLRModel.load_from_checkpoint(checkpoint_path='/tf/data/models/simclr/checkpointepoch=98.ckpt').eval()
M01 = SimCLRExtractor(model)

fname = f'/tf/data/models/cookbook_M01_1000n_100vw.model'
codebook_01 = pickle.load(open(fname, 'rb'))

In [48]:
results_01_cache = cached_approach(
    vid_ds, M01, mdl_01_vid_threshold, simclr_frame_sim,
    mdl_01_frame_threshold, codebook_01, vw, 19, fps = 30, ftk = 1
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




In [112]:
evaluate(results_01_cache) # ftk = 1, no threshold

car_report σ Rank 3.6318039594669758
car_report μ Rank 2.9
car_report Median Rank 1.0
car_report mRR: 0.7559294871794873
car_report mAP: 0.6176476198380222
car_report Hit@1: 0.7
car_report Hit@5: 0.85
car_report Hit@10: 0.9
king σ Rank 0.6538348415311012
king μ Rank 1.15
king Median Rank 1.0
king mRR: 0.9625
king mAP: 0.8069936058093953
king Hit@1: 0.95
king Hit@5: 1.0
king Hit@10: 1.0
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.8740277777777777
tasty Hit@1: 0.9
tasty Hit@5: 1.0
tasty Hit@10: 1.0


{'car_report': {'S0': {'vid_0': {'ranks': [(('S4', 'vid_3'),
      0.9179039973367417),
     (('S1', 'vid_2'), 0.916244867784407),
     (('S2', 'vid_0'), 0.8988486868327271),
     (('S1', 'vid_0'), 0.8926742868310932),
     (('S2', 'vid_1'), 0.886525676974221),
     (('S4', 'vid_0'), 0.886018344735942),
     (('S4', 'vid_2'), 0.886018344735942),
     (('S2', 'vid_3'), 0.8846630799210273),
     (('S4', 'vid_1'), 0.8826717449140168),
     (('S2', 'vid_2'), 0.8704217558863261),
     (('S1', 'vid_1'), 0.8674557230011675),
     (('S0', 'vid_2'), 0.8669034323511741),
     (('S1', 'vid_3'), 0.8322814715939507),
     (('S0', 'vid_1'), 0.8062496404103776),
     (('S3', 'vid_3'), 0.7864373613335622),
     (('S3', 'vid_0'), 0.7545381883890863),
     (('S3', 'vid_1'), 0.7221121934825507),
     (('S3', 'vid_2'), 0.650566822070925),
     (('S0', 'vid_3'), 0.6222576366794502)],
    'rank': 12,
    'average_precision': 0.1280284043441938},
   'vid_1': {'ranks': [(('S2', 'vid_1'), 0.9127318198237152),


In [49]:
evaluate(results_01_cache) # ftk = 1, no threshold

car_report σ Rank 3.6318039594669758
car_report μ Rank 2.9
car_report Median Rank 1.0
car_report mRR: 0.7559294871794873
car_report mAP: 0.6176476198380222
king σ Rank 0.6538348415311012
king μ Rank 1.15
king Median Rank 1.0
king mRR: 0.9625
king mAP: 0.8069936058093953
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.8740277777777777


{'car_report': {'S0': {'vid_0': {'ranks': [(('S4', 'vid_3'),
      0.9179039973367417),
     (('S1', 'vid_2'), 0.916244867784407),
     (('S2', 'vid_0'), 0.8988486868327271),
     (('S1', 'vid_0'), 0.8926742868310932),
     (('S2', 'vid_1'), 0.886525676974221),
     (('S4', 'vid_0'), 0.886018344735942),
     (('S4', 'vid_2'), 0.886018344735942),
     (('S2', 'vid_3'), 0.8846630799210273),
     (('S4', 'vid_1'), 0.8826717449140168),
     (('S2', 'vid_2'), 0.8704217558863261),
     (('S1', 'vid_1'), 0.8674557230011675),
     (('S0', 'vid_2'), 0.8669034323511741),
     (('S1', 'vid_3'), 0.8322814715939507),
     (('S0', 'vid_1'), 0.8062496404103776),
     (('S3', 'vid_3'), 0.7864373613335622),
     (('S3', 'vid_0'), 0.7545381883890863),
     (('S3', 'vid_1'), 0.7221121934825507),
     (('S3', 'vid_2'), 0.650566822070925),
     (('S0', 'vid_3'), 0.6222576366794502)],
    'rank': 12,
    'average_precision': 0.1280284043441938},
   'vid_1': {'ranks': [(('S2', 'vid_1'), 0.9127318198237152),


In [61]:
evaluate(results_01_bovw_align_cache) # ftk = 1, no threshold

car_report σ Rank 2.2555487137279924
car_report μ Rank 2.25
car_report Median Rank 1.0
car_report mRR: 0.75
car_report mAP: 0.6847485567551357
king σ Rank 0.6403124237432849
king μ Rank 1.3
king Median Rank 1.0
king mRR: 0.8833333333333332
king mAP: 0.7433590921748816
tasty σ Rank 0.47696960070847283
tasty μ Rank 1.15
tasty Median Rank 1.0
tasty mRR: 0.9416666666666667
tasty mAP: 0.8956150793650794


{'car_report': {'S0': {'vid_0': {'ranks': [(('S4', 'vid_3'),
      0.9255708677081183),
     (('S4', 'vid_0'), 0.9163295903019648),
     (('S4', 'vid_2'), 0.9046270163402831),
     (('S2', 'vid_0'), 0.9026971596240374),
     (('S0', 'vid_2'), 0.9024383721601931),
     (('S1', 'vid_0'), 0.9011721377581385),
     (('S1', 'vid_2'), 0.8960606406178538),
     (('S2', 'vid_1'), 0.8905258971760159),
     (('S4', 'vid_1'), 0.8861024866292022),
     (('S1', 'vid_1'), 0.8853016075725052),
     (('S2', 'vid_3'), 0.8834868577173784),
     (('S0', 'vid_1'), 0.8740850928882264),
     (('S2', 'vid_2'), 0.8740055669891751),
     (('S3', 'vid_3'), 0.8619039831362466),
     (('S1', 'vid_3'), 0.8559371802395124),
     (('S3', 'vid_0'), 0.8317610108245463),
     (('S3', 'vid_1'), 0.8125996927970479),
     (('S3', 'vid_2'), 0.773305005056518),
     (('S0', 'vid_3'), 0.7632954981723956)],
    'rank': 5,
    'average_precision': 0.17485380116959068},
   'vid_1': {'ranks': [(('S2', 'vid_1'), 0.907841045893894

In [60]:
results_01_bovw_align_cache = cached_approach(
    vid_ds, M01, mdl_01_vid_threshold, simclr_frame_sim,
    mdl_01_frame_threshold, codebook_01, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




In [63]:
results_01_bovw_align_cache = cached_approach(
    vid_ds, M01, mdl_01_vid_threshold, simclr_frame_sim,
    0.9, codebook_01, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))




In [65]:
evaluate(results_01_bovw_align_cache) # ftk = 1, threshold = 0.9

car_report σ Rank 1.9358460682605938
car_report μ Rank 2.55
car_report Median Rank 2.0
car_report mRR: 0.6245833333333334
car_report mAP: 0.5270158033973823
car_report Hit@5: 0.9
king σ Rank 1.5644487847162016
king μ Rank 1.55
king Median Rank 1.0
king mRR: 0.8729166666666666
king mAP: 0.781579439079439
king Hit@5: 0.95
tasty σ Rank 0.3570714214271425
tasty μ Rank 1.15
tasty Median Rank 1.0
tasty mRR: 0.925
tasty mAP: 0.9129761904761905
tasty Hit@5: 1.0


{'car_report': {'S0': {'vid_0': {'ranks': [(('S0', 'vid_2'),
      0.7105213571866889),
     (('S4', 'vid_3'), 0.6802873402013959),
     (('S0', 'vid_1'), 0.67666704225469),
     (('S2', 'vid_0'), 0.6708428996562334),
     (('S1', 'vid_0'), 0.6681127248548827),
     (('S4', 'vid_0'), 0.6646736573721841),
     (('S4', 'vid_2'), 0.6646495307365267),
     (('S2', 'vid_1'), 0.6642186772468507),
     (('S2', 'vid_3'), 0.6637950910844507),
     (('S1', 'vid_2'), 0.663617529807468),
     (('S4', 'vid_1'), 0.6620987842270872),
     (('S1', 'vid_1'), 0.6550957173673339),
     (('S1', 'vid_3'), 0.6339922653157386),
     (('S3', 'vid_3'), 0.6091770242786043),
     (('S2', 'vid_2'), 0.6014883792613529),
     (('S3', 'vid_0'), 0.5984580235404158),
     (('S3', 'vid_1'), 0.5818367243363292),
     (('S3', 'vid_2'), 0.5464383524883365),
     (('S0', 'vid_3'), 0.47716250354709205)],
    'rank': 1,
    'average_precision': 0.6081871345029239},
   'vid_1': {'ranks': [(('S4', 'vid_0'), 0.6899753222571436)

In [64]:
evaluate(results_01_bovw_align_cache) # ftk = 1, threshold = 0.9

car_report σ Rank 1.9358460682605938
car_report μ Rank 2.55
car_report Median Rank 2.0
car_report mRR: 0.6245833333333334
car_report mAP: 0.5270158033973823
king σ Rank 1.5644487847162016
king μ Rank 1.55
king Median Rank 1.0
king mRR: 0.8729166666666666
king mAP: 0.781579439079439
tasty σ Rank 0.3570714214271425
tasty μ Rank 1.15
tasty Median Rank 1.0
tasty mRR: 0.925
tasty mAP: 0.9129761904761905


{'car_report': {'S0': {'vid_0': {'ranks': [(('S0', 'vid_2'),
      0.7105213571866889),
     (('S4', 'vid_3'), 0.6802873402013959),
     (('S0', 'vid_1'), 0.67666704225469),
     (('S2', 'vid_0'), 0.6708428996562334),
     (('S1', 'vid_0'), 0.6681127248548827),
     (('S4', 'vid_0'), 0.6646736573721841),
     (('S4', 'vid_2'), 0.6646495307365267),
     (('S2', 'vid_1'), 0.6642186772468507),
     (('S2', 'vid_3'), 0.6637950910844507),
     (('S1', 'vid_2'), 0.663617529807468),
     (('S4', 'vid_1'), 0.6620987842270872),
     (('S1', 'vid_1'), 0.6550957173673339),
     (('S1', 'vid_3'), 0.6339922653157386),
     (('S3', 'vid_3'), 0.6091770242786043),
     (('S2', 'vid_2'), 0.6014883792613529),
     (('S3', 'vid_0'), 0.5984580235404158),
     (('S3', 'vid_1'), 0.5818367243363292),
     (('S3', 'vid_2'), 0.5464383524883365),
     (('S0', 'vid_3'), 0.47716250354709205)],
    'rank': 1,
    'average_precision': 0.6081871345029239},
   'vid_1': {'ranks': [(('S4', 'vid_0'), 0.6899753222571436)

In [75]:
evals_01_bovw_align = evaluate(results_01_bovw_align) # ftk = 1, threshold = 0.9

car_report σ Rank 3.528101472463625
car_report μ Rank 3.05
car_report Median Rank 2.0
car_report mRR: 0.6248511904761905
car_report mAP: 0.5223357816217723
king σ Rank 1.5652475842498528
king μ Rank 1.5
king Median Rank 1.0
king mRR: 0.8979166666666666
king mAP: 0.7741214779372674
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.9030555555555555


In [28]:
results_01 = approach(
    vid_ds, M01, mdl_01_vid_threshold, simclr_frame_sim,
    mdl_01_frame_threshold, codebook_01, vw, 19, fps = 30, ftk = 1
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

car_report
king
tasty



In [None]:
results_01_bovw_align = approach(
    vid_ds, M01, mdl_01_vid_threshold, sift_frame_sim,
    mdl_01_frame_threshold, codebook_01, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

car_report
king
tasty



HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))

Fuzzy: 3.0 8
Fuzzy: 3.0 6
Fuzzy: 2.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 4.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8


In [40]:
evals_01_bovw = evaluate(results_01) # ftk = 1

car_report σ Rank 3.6318039594669758
car_report μ Rank 2.9
car_report Median Rank 1.0
car_report mRR: 0.7559294871794873
car_report mAP: 0.6176476198380222
king σ Rank 0.6538348415311012
king μ Rank 1.15
king Median Rank 1.0
king mRR: 0.9625
king mAP: 0.8069936058093953
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.8740277777777777


In [87]:
get_eval_results(evals_01, 'car_report', 'average_precision')

0.4666666666666666
0.7222222222222222
0.8055555555555555
1.0
1.0
1.0
1.0
1.0
1.0
0.9166666666666666
1.0
1.0
0.40277777777777773
0.9166666666666666
0.8666666666666667
1.0
0.5777777777777777
1.0
1.0
0.8055555555555555


TypeError: 'numpy.float64' object is not iterable

In [91]:
get_eval_results(evals_01_bovw_align, 'car_report', 'rank')

2
16
1
1
3
3
3
2
7
1
2
8
1
1
1
1
3
1
3
1


TypeError: 'numpy.float64' object is not iterable

In [112]:
import pickle
results_path = Path('/tf/data/results/M01')

with open(results_path/'results_01.pkl', 'wb') as f:
    pickle.dump(results_01, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(results_path/'evals_01.pkl', 'wb') as f:
    pickle.dump(evals_01, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(results_path/'results_01_bovw_align.pkl', 'wb') as f:
    pickle.dump(results_01_bovw_align, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(results_path/'evals_01_bovw_align.pkl', 'wb') as f:
    pickle.dump(evals_01_bovw_align, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
vw = 100

M00 = SIFTExtractor(cv2.xfeatures2d.SIFT_create())
fname = f'/tf/data/models/cookbook_M00_1000n_100vw.model'
codebook_00 = pickle.load(open(fname, 'rb'))

In [None]:
m = len(vid_0)
n = len(vid_1)

fuzzy_LCS(vid_0, vid_1, 10, 10, sift_frame_sim, M00, codebook_00, vw, 0.955, fps = 30, frames_to_keep = 30)

In [None]:
model = SimCLRModel.load_from_checkpoint(checkpoint_path='/tf/data/models/simclr/checkpointepoch=98.ckpt').eval()
M01 = SimCLRExtractor(model)

fname = f'/tf/data/models/cookbook_M01_1000n_100vw.model'
codebook_01 = pickle.load(open(fname, 'rb'))

In [None]:
m = len(vid_0)
n = len(vid_1)

fuzzy_LCS(vid_0, vid_1, 10, 10, simclr_frame_sim, M01, codebook_01, vw, 0.76, frames_to_keep = 30)

In [None]:
# From geeksforgeeks: https://www.geeksforgeeks.org/longest-common-substring-dp-29/
def fuzzy_LCS(X, Y, m, n):
    # Create a table to store lengths of 
    # longest common suffixes of substrings.  
    # Note that LCSuff[i][j] contains the  
    # length of longest common suffix of  
    # X[0...i-1] and Y[0...j-1]. The first 
    # row and first column entries have no 
    # logical meaning, they are used only 
    # for simplicity of the program. 
      
    # LCSuff is the table with zero  
    # value initially in each cell 
    LCSuff = [[0 for k in range(n+1)] for l in range(m+1)] 
      
    # To store the length of  
    # longest common substring 
    result = 0 
  
    # Following steps to build 
    # LCSuff[m+1][n+1] in bottom up fashion 
    for i in range(m + 1): 
        for j in range(n + 1): 
            if (i == 0 or j == 0): 
                LCSuff[i][j] = 0
            elif (X[i-1] == Y[j-1]): 
                LCSuff[i][j] = LCSuff[i-1][j-1] + 1
                result = max(result, LCSuff[i][j]) 
            else: 
                LCSuff[i][j] = 0
    return result 

In [None]:
X = 'aaaaaaaaaaaaaa'
Y = 'aaaaaaaaaaaaaa'
  
m = len(X) 
n = len(Y) 
  
print('Length of Longest Common Substring is', 
                      fuzzy_LCS(X, Y, m, n)) 

In [209]:
#hide
from nbdev.export import notebook2script
notebook2script()

[autoreload of tango.model failed: Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/usr/local/lib/python3.6/dist-packages/IPython/extensions/autoreload.py", line 410, in superreload
    update_generic(old_obj, new_obj)
  File "/usr/local/lib/python3.6/dist-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/usr/local/lib/python3.6/dist-packages/IPython/extensions/autoreload.py", line 302, in update_class
    if update_generic(old_obj, new_obj): continue
  File "/usr/local/lib/python3.6/dist-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/usr/local/lib/python3.6/dist-packages/IPython/extensions/autoreload.py", line 266, in update_function
    setattr(old, name, getattr(new, name))
ValueError: __init__() requires a code object with 0 free vars, not 1
]


Converted 00_prep.ipynb.
Converted 01_features.ipynb.
Converted 02_eval.ipynb.
Converted 03_cnn.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 04.0_experiments.ipynb.
Converted 04.1_experiments.codebooks.ipynb.
Converted 05_model.ipynb.
Converted 06_approach.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 07_results.ipynb.
Converted 08_interp_results.ipynb.
Converted SimCLR.old.ipynb.
Converted [Scratch 1] Tango SimCLR.ipynb.
Converted [Scratch 2] Tango SimCLR.ipynb.
Converted index.ipynb.
Converted lesson1-pets.ipynb.


In [254]:
def LCSubStr(X, Y, m, n): 
      
    # Create a table to store lengths of 
    # longest common suffixes of substrings.  
    # Note that LCSuff[i][j] contains the  
    # length of longest common suffix of  
    # X[0...i-1] and Y[0...j-1]. The first 
    # row and first column entries have no 
    # logical meaning, they are used only 
    # for simplicity of the program. 
      
    # LCSuff is the table with zero  
    # value initially in each cell 
    LCSuff = [[0 for k in range(n+1)] for l in range(m+1)] 
      
    # To store the length of  
    # longest common substring 
    result = 0 
  
    # Following steps to build 
    # LCSuff[m+1][n+1] in bottom up fashion 
    for i in range(m + 1): 
        for j in range(n + 1): 
            if (i == 0 or j == 0): 
                LCSuff[i][j] = 0
            elif (X[i-1] == Y[j-1]): 
                LCSuff[i][j] = LCSuff[i-1][j-1] + 1 * (i / m) * (j / n)
                result = max(result, LCSuff[i][j]) 
            else: 
                LCSuff[i][j] = 0
    print(LCSuff)
    return result 
  
# Driver Program to test above function 
X = '17980'
Y = '123456'
  
m = len(X) 
n = len(Y) 
  
print('Length of Longest Common Substring is', 
                      LCSubStr(X, Y, m, n)) 

[[0, 0, 0, 0, 0, 0, 0], [0, 0.03333333333333333, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]
Length of Longest Common Substring is 0.03333333333333333
