In [None]:
#default_exp approach

In [1]:
#hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Approach


> This module contains all the code for defining the various approaches

In [2]:
#export
import copy
import cv2
import multiprocessing
import pickle

import numpy as np

from collections import defaultdict, OrderedDict
from itertools import combinations_with_replacement
from joblib import Parallel, delayed
from pathlib import Path

# tango
from tango.eval import *
from tango.features import *
from tango.model import *
from tango.prep import *

from tqdm.auto import tqdm

from sklearn.metrics.pairwise import cosine_similarity

In [3]:
def gen_all_codebooks(imgs, models, vw):
    for i, model in enumerate(models):
        codebook = gen_vcodebook(imgs, model, vw)
        fname = f'/tf/data/models/cookbook_M{i:02}_{len(imgs)}n_{vw}vw.model'
        pickle.dump(codebook, open(fname, 'wb'))

In [4]:
def gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk):
    vid_tfids = defaultdict(
        lambda: defaultdict(list)
    )
    for app, reports in tqdm(vid_ds.labels.items()):
#         if app != 'car_report': continue
        print(app)
        for i, (report, vids) in enumerate(reports.items()):
            for vid in vids:
                bovw = new_get_bovw(vid, mdl, codebook, vw, frames_to_keep = ftk)
                vid_tfids[app][report].append(calc_tf_idf(bovw, df))
    
    return vid_tfids

In [5]:
# def gen_similarity(vid_ds, mdl, codebook, vw, ftk):
#     results = {}
    
#     df = np.histogram(codebook.labels_, bins = range(vw + 1))[0]
#     vid_tfids = gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk)
#     for app, reports in vid_ds.labels.items():
#         if app != 'car_report': continue
#         results[app] = {}
#         for report_i in reports:
#             results[app][report_i] = {}
#             for report_j in reports:
#                 results[app][report_i][report_j] = {}
#                 for k in range(len(vid_tfids[app][report_i])):
#                     results[app][report_i][report_j][f'vid_{k}'] = {}
#                     for l in range(len(vid_tfids[app][report_j])):
#                         results[app][report_i][report_j][f'vid_{k}'][f'vid_{l}'] = np.dot(vid_tfids[app][report_i][k], vid_tfids[app][report_j][l]) / (np.linalg.norm(vid_tfids[app][report_i][k]) * np.linalg.norm(vid_tfids[app][report_j][l]))
    
#     return results

In [6]:
def gen_similarity(vid_ds, mdl, codebook, vw, ftk):
    results = defaultdict(
        lambda: defaultdict(
            lambda: defaultdict(
                lambda: defaultdict(
                    lambda: defaultdict(int)
                )
            )
        )
    )
    
    df = np.histogram(codebook.labels_, bins = range(vw + 1))[0]
    vid_tfids = gen_tfidfs(vid_ds, mdl, vw, codebook, df, ftk)
    for app, reports in vid_ds.labels.items():
#         if app != 'car_report': continue
        l = [(report, i) for report in reports for i in range(len(reports[report]))]
        pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
        for (report_i, i), (report_j, j) in pairs:
            results[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = np.dot(vid_tfids[app][report_i][i], vid_tfids[app][report_j][j]) / (np.linalg.norm(vid_tfids[app][report_i][i]) * np.linalg.norm(vid_tfids[app][report_j][j]))

    return results

In [7]:
def filter_vids(vid_similarities, mdl_vid_threshold = 0.8):
    vid_dict = copy.deepcopy(vid_similarities)
    for app in vid_dict:
        for report_i in vid_dict[app]:
            for report_j in vid_dict[app][report_i]:
                for vid_i in vid_dict[app][report_i][report_j]:
                    for vid_j in vid_dict[app][report_i][report_j][vid_i]:
                        if vid_dict[app][report_i][report_j][vid_i] < mdl_threshold:
                            del vid_dict[app][report_i][report_j][vid_i]
    
    return vid_dict

In [8]:
def sift_frame_sim(mdl, codebook, frame_i, frame_j, vw, mdl_frame_threshold):
    if frame_i is None or frame_j is None: return 0
    features_i = mdl.extract(frame_i)
    vws_i = np.expand_dims(codebook.predict(features_i), axis=0)
    bowv_i = np.expand_dims(np.histogram(vws_i, bins = range(vw + 1))[0], axis=0)
    
    features_j = mdl.extract(frame_j)
    vws_j = np.expand_dims(codebook.predict(features_j), axis=0)
    bowv_j = np.expand_dims(np.histogram(vws_j, bins = range(vw + 1))[0], axis=0)
    
    sim = cosine_similarity(bowv_i, bowv_j)[0][0]
    return sim

In [9]:
def simclr_frame_sim(mdl, codebook, frame_i, frame_j, vw, mdl_frame_threshold):
    if frame_i is None or frame_j is None: return 0
    features_i = mdl.extract(frame_i)
    features_j = mdl.extract(frame_j)
    
    sim = cosine_similarity(features_i, features_j)[0][0]
    return sim

In [10]:
# Modified from geeksforgeeks: https://www.geeksforgeeks.org/longest-common-substring-dp-29/
def fuzzy_LCS(X, Y, m, n, sim_func, mdl, codebook, vw, mdl_frame_threshold, fps = 30, frames_to_keep = 1):
    LCSuff = [[0 for k in range(n + 1)] for l in range(m + 1)] 
      
    # To store the length of  
    # longest common substring 
    result = 0 
  
    # Following steps to build 
    # LCSuff[m+1][n+1] in bottom up fashion 
    for i in range(0, m + 1):
        for j in range(0, n + 1):
            if (i == 0 or j == 0): 
                LCSuff[i][j] = 0
                continue
            sim = sim_func(mdl, codebook, X[i * int(fps / frames_to_keep) -1], Y[j * int(fps / frames_to_keep) -1], vw, mdl_frame_threshold)
#             print('SIM:', sim)
            if sim > mdl_frame_threshold: 
                LCSuff[i][j] = LCSuff[i-1][j-1] + sim
                result = max(result, LCSuff[i][j]) 
            else: 
                LCSuff[i][j] = 0
    print('Fuzzy:', result, min(m, n))
    return result / min(m, n)

In [11]:
def flatten_dict(d_in, d_out, parent_key):
    for k, v in d_in.items():
        if isinstance(v, dict):
            flatten_dict(v, d_out, parent_key + (k,))
        else:
            d_out[parent_key + (k,)] = v

In [12]:
def fix_corpus(results, corpus_size):
    
    for app in results:
#         corpus_size = len(results[app])
        s0 = 0
        v0 = 1
        for i in range(corpus_size):
            if v0 % 4 == 0: s0 += 1
            s1 = 0
            for v1 in range(i + 1):
                if v1 % 4 == 0: s1 += 1
                results[app][f'S{s0}'][f'vid_{v0 % 4}'][f'S{s1 - 1}'][f'vid_{v1 % 4}'] = results[app][f'S{s1 - 1}'][f'vid_{v1 % 4}'][f'S{s0}'][f'vid_{v0 % 4}']
#                 results[app][f'S{s0}'][f'vid_{v0 % 4}'][(f'S{s1 - 1}', f'vid_{v1 % 4}')] = results[app][f'S{s1 - 1}'][f'vid_{v1 % 4}'][(f'S{s0}', f'vid_{v0 % 4}')]
                
#             print('-' * 5)
            v0 += 1
    return results
#     print(i + 1)

In [13]:
def sort_results(results):
    sorted_results = {}
    for app in results:
        sorted_results[app] = {}
        for report in results[app]:
            sorted_results[app][report] = {}
            for vid in results[app][report]:
                sorted_results[app][report][vid] = []
                d_out = {}
                flatten_dict(results[app][report][vid], d_out, tuple())
                sorted_results[app][report][vid] = OrderedDict(
                    sorted(d_out.items(), key=lambda x: x[1], reverse = True)
                )
    
    return sorted_results

In [67]:
def approach(
    vid_ds, mdl, mdl_vid_threshold, sim_func,
    mdl_frame_threshold, codebook, vw, corpus_size,
    fps = 30, ftk = 1, align_sim = False
):
    vid_ds_sims = gen_similarity(vid_ds, mdl, codebook, vw, ftk)
    if align_sim:
        for app, reports in vid_ds.labels.items():
    #         if app != 'car_report': continue
            l = [(report, i) for report in reports for i in range(len(reports[report]))]
            pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])
            for (report_i, i), (report_j, j) in tqdm(pairs):
                tfidf_sim = vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}']
                lcs_sim = fuzzy_LCS(
                    vid_ds[app][report_i][i], vid_ds[app][report_j][j],
                    int(len(vid_ds[app][report_i][i]) / int(fps / ftk)),
                    int(len(vid_ds[app][report_j][j]) / int(fps / ftk)),
                    sim_func, mdl, codebook, vw, mdl_frame_threshold, fps, ftk
                )
                vid_ds_sims[app][report_i][f'vid_{i}'][report_j][f'vid_{j}'] = (tfidf_sim + lcs_sim) / 2
#     for app, reports in vid_ds.labels.items():
#         if app != 'car_report': continue
#         for i, report_i in tqdm(enumerate(reports), total = len(reports)):
# #             if i > 1: break
#             for j, report_j in tqdm(enumerate(reports), total = len(reports)):
# #                 if j > 1: break
#                 for k in range(len(vid_ds[app][report_i])):
# #                     if k > 1: break
#                     for l in range(len(vid_ds[app][report_j])):
# #                         if l > 1: break
#                         tfidf_sim = vid_ds_sims[app][report_i][report_j][f'vid_{k}'][f'vid_{l}']
#                         lcs_sim = fuzzy_LCS(
#                             vid_ds[app][report_i][k], vid_ds[app][report_j][l],
#                             int(len(vid_ds[app][report_i][k]) / int(fps / ftk)), int(len(vid_ds[app][report_i][l]) / int(fps / ftk)),
#                             sim_func, mdl, codebook, vw, mdl_frame_threshold, fps, ftk
#                         )
#                         print(tfidf_sim, lcs_sim)
#                         vid_ds_sims[app][report_i][report_j][f'vid_{k}'][f'vid_{l}'] = (tfidf_sim + lcs_sim) / 2
#     print(vid_ds_sims)

    vid_ds_sims = fix_corpus(vid_ds_sims, corpus_size)
    results = sort_results(vid_ds_sims)
    return results

In [54]:
def get_eval_results(evals, app, item):
    for s in evals[app]:
        for vid in evals[app][s]:
            try:
                print(evals[app][s][vid][item])
            except: continue

In [187]:
l = [(f'S{i}', f'vid{j}') for i in range(5) for j in range(4)]
pairs = list(x for x in combinations_with_replacement(l, 2) if x[0] != x[1])

In [17]:
path = Path("/tf/data/datasets/validation_set")
vid_ds = VideoDataset.from_path(path).label_from_paths()
vid_ds.get_labels()

['car_report', 'king', 'tasty']

In [15]:
mdl_00_vid_threshold = 0.9
mdl_00_frame_threshold = 0.9
vw = 100

M00 = SIFTExtractor(cv2.xfeatures2d.SIFT_create())
fname = f'/tf/data/models/cookbook_M00_1000n_100vw.model'
codebook_00 = pickle.load(open(fname, 'rb'))

In [None]:
results_00 = approach(
    vid_ds, M00, mdl_00_vid_threshold, sift_frame_sim,
    mdl_00_frame_threshold, codebook_00, vw, 19, fps = 30, ftk = 1
)

In [None]:
results_00_bovw_align = approach(
    vid_ds, M00, mdl_00_vid_threshold, sift_frame_sim,
    mdl_00_frame_threshold, codebook_00, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

In [35]:
evals_00 = evaluate(results_00) # ftk = 1

car_report σ Rank 2.277608394786075
car_report μ Rank 2.25
car_report Median Rank 1.5
car_report mRR: 0.6937121212121212
car_report mAP: 0.5326424678262913
king σ Rank 4.363484845854286
king μ Rank 3.4
king Median Rank 1.0
king mRR: 0.6980373303167421
king mAP: 0.4601591918716531
tasty σ Rank 0.7810249675906654
tasty μ Rank 1.3
tasty Median Rank 1.0
tasty mRR: 0.9041666666666666
tasty mAP: 0.8303373015873016


In [60]:
get_eval_results(evals_00, 'tasty', 'average_precision')

0.3222222222222222
0.29444444444444445
0.7916666666666666
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.7916666666666666
0.5555555555555555
0.8666666666666667
0.625
0.7666666666666666
0.8095238095238094
0.9166666666666666
0.8666666666666667


TypeError: 'numpy.float64' object is not iterable

In [27]:
mdl_01_vid_threshold = 0.9
mdl_01_frame_threshold = 0.9
model = SimCLRModel.load_from_checkpoint(checkpoint_path='/tf/data/models/simclr/checkpointepoch=98.ckpt').eval()
M01 = SimCLRExtractor(model)

fname = f'/tf/data/models/cookbook_M01_1000n_100vw.model'
codebook_01 = pickle.load(open(fname, 'rb'))

In [28]:
results_01 = approach(
    vid_ds, M01, mdl_01_vid_threshold, simclr_frame_sim,
    mdl_01_frame_threshold, codebook_01, vw, 19, fps = 30, ftk = 1
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

car_report
king
tasty



In [None]:
results_01_bovw_align = approach(
    vid_ds, M01, mdl_01_vid_threshold, sift_frame_sim,
    mdl_01_frame_threshold, codebook_01, vw, 19, fps = 30, ftk = 1,
    align_sim = True
)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

car_report
king
tasty



HBox(children=(FloatProgress(value=0.0, max=190.0), HTML(value='')))

Fuzzy: 3.0 8
Fuzzy: 3.0 6
Fuzzy: 2.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 4.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8
Fuzzy: 3.0 8


In [40]:
evals_01 = evaluate(results_01) # ftk = 1

car_report σ Rank 3.6318039594669758
car_report μ Rank 2.9
car_report Median Rank 1.0
car_report mRR: 0.7559294871794873
car_report mAP: 0.6176476198380222
king σ Rank 0.6538348415311012
king μ Rank 1.15
king Median Rank 1.0
king mRR: 0.9625
king mAP: 0.8069936058093953
tasty σ Rank 0.3
tasty μ Rank 1.1
tasty Median Rank 1.0
tasty mRR: 0.95
tasty mAP: 0.8740277777777777


In [66]:
get_eval_results(evals_01, 'car_report', 'average_precision')

0.1280284043441938
0.1153096416254311
0.43977591036414565
0.5254901960784314
0.31746031746031744
0.27777777777777773
0.2896825396825397
0.5166666666666667
0.20664983164983164
1.0
0.7777777777777778
0.5916666666666667
1.0
1.0
1.0
1.0
0.7222222222222222
0.9166666666666666
0.7222222222222222
0.8055555555555555


TypeError: 'numpy.float64' object is not iterable

In [None]:
vw = 100

M00 = SIFTExtractor(cv2.xfeatures2d.SIFT_create())
fname = f'/tf/data/models/cookbook_M00_1000n_100vw.model'
codebook_00 = pickle.load(open(fname, 'rb'))

In [None]:
m = len(vid_0)
n = len(vid_1)

fuzzy_LCS(vid_0, vid_1, 10, 10, sift_frame_sim, M00, codebook_00, vw, 0.955, fps = 30, frames_to_keep = 30)

In [None]:
model = SimCLRModel.load_from_checkpoint(checkpoint_path='/tf/data/models/simclr/checkpointepoch=98.ckpt').eval()
M01 = SimCLRExtractor(model)

fname = f'/tf/data/models/cookbook_M01_1000n_100vw.model'
codebook_01 = pickle.load(open(fname, 'rb'))

In [None]:
m = len(vid_0)
n = len(vid_1)

fuzzy_LCS(vid_0, vid_1, 10, 10, simclr_frame_sim, M01, codebook_01, vw, 0.76, frames_to_keep = 30)

In [None]:
# From geeksforgeeks: https://www.geeksforgeeks.org/longest-common-substring-dp-29/
def fuzzy_LCS(X, Y, m, n):
    # Create a table to store lengths of 
    # longest common suffixes of substrings.  
    # Note that LCSuff[i][j] contains the  
    # length of longest common suffix of  
    # X[0...i-1] and Y[0...j-1]. The first 
    # row and first column entries have no 
    # logical meaning, they are used only 
    # for simplicity of the program. 
      
    # LCSuff is the table with zero  
    # value initially in each cell 
    LCSuff = [[0 for k in range(n+1)] for l in range(m+1)] 
      
    # To store the length of  
    # longest common substring 
    result = 0 
  
    # Following steps to build 
    # LCSuff[m+1][n+1] in bottom up fashion 
    for i in range(m + 1): 
        for j in range(n + 1): 
            if (i == 0 or j == 0): 
                LCSuff[i][j] = 0
            elif (X[i-1] == Y[j-1]): 
                LCSuff[i][j] = LCSuff[i-1][j-1] + 1
                result = max(result, LCSuff[i][j]) 
            else: 
                LCSuff[i][j] = 0
    return result 

In [None]:
X = 'aaaaaaaaaaaaaa'
Y = 'aaaaaaaaaaaaaa'
  
m = len(X) 
n = len(Y) 
  
print('Length of Longest Common Substring is', 
                      fuzzy_LCS(X, Y, m, n)) 

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()