In [None]:
# default_exp eval

In [None]:
#hide
%load_ext autoreload
%autoreload 2
%matplotlib inline

# Eval

> This module contains all the necessary functions for evaluating different video duplication detection techniques.

In [None]:
#export
import cv2
import ffmpeg
import pickle

import numpy as np

from fastprogress.fastprogress import progress_bar


# tango
from tango.prep import *
from tango.features import *

from pathlib import Path

from matplotlib import pyplot as plt

from nbdev.showdoc import *

from sklearn.cluster import KMeans

In [None]:
#hide
path = Path("/tf/data/datasets/videos")
video_paths = sorted(path.glob("**/video.mp4")); video_paths[:6]

[PosixPath('/tf/data/datasets/videos/art_and_design/ibis_paint/scenario1/video.mp4'),
 PosixPath('/tf/data/datasets/videos/art_and_design/ibis_paint/scenario2/video.mp4'),
 PosixPath('/tf/data/datasets/videos/art_and_design/pixel_art_paint/scenario1/video.mp4'),
 PosixPath('/tf/data/datasets/videos/art_and_design/pixel_art_paint/scenario2/video.mp4'),
 PosixPath('/tf/data/datasets/videos/auto_and_vehicles/car_part/scenario1/video.mp4'),
 PosixPath('/tf/data/datasets/videos/auto_and_vehicles/car_part/scenario2/video.mp4')]

In [None]:
#export
def calc_tf_idf(tfs, dfs):
    tf_idf = np.array([])
    for tf, df in zip(tfs, dfs):
        tf = tf / np.sum(tfs)
        idf = np.log(len(tfs) / (df + 1))
        tf_idf = np.append(tf_idf, tf * idf)
    
    return tf_idf

In [None]:
#export
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [None]:
# export
def hit_rate_at_k(rs, k):
    hits = 0
    for r in rs:
        if np.sum(r[:k]) > 0: hits += 1
    
    return hits / len(rs)

## Following methods from: https://gist.github.com/bwhite/3726239

In [None]:
# export
def mean_reciprocal_rank(rs):
    """Score is reciprocal of the rank of the first relevant item

    First element is 'rank 1'.  Relevance is binary (nonzero is relevant).

    Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
    >>> rs = [[0, 0, 1], [0, 1, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.61111111111111105
    >>> rs = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0]])
    >>> mean_reciprocal_rank(rs)
    0.5
    >>> rs = [[0, 0, 0, 1], [1, 0, 0], [1, 0, 0]]
    >>> mean_reciprocal_rank(rs)
    0.75

    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Mean reciprocal rank
    """
    rs = (np.asarray(r).nonzero()[0] for r in rs)
    return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])

def r_precision(r):
    """Score is precision after all relevant documents have been retrieved

    Relevance is binary (nonzero is relevant).

    >>> r = [0, 0, 1]
    >>> r_precision(r)
    0.33333333333333331
    >>> r = [0, 1, 0]
    >>> r_precision(r)
    0.5
    >>> r = [1, 0, 0]
    >>> r_precision(r)
    1.0

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        R Precision
    """
    r = np.asarray(r) != 0
    z = r.nonzero()[0]
    if not z.size:
        return 0.
    return np.mean(r[:z[-1] + 1])


def precision_at_k(r, k):
    """Score is precision @ k

    Relevance is binary (nonzero is relevant).

    >>> r = [0, 0, 1]
    >>> precision_at_k(r, 1)
    0.0
    >>> precision_at_k(r, 2)
    0.0
    >>> precision_at_k(r, 3)
    0.33333333333333331
    >>> precision_at_k(r, 4)
    Traceback (most recent call last):
        File "<stdin>", line 1, in ?
    ValueError: Relevance score length < k


    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Precision @ k

    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return np.mean(r)


def average_precision(r):
    """Score is average precision (area under PR curve)

    Relevance is binary (nonzero is relevant).

    >>> r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
    >>> delta_r = 1. / sum(r)
    >>> sum([sum(r[:x + 1]) / (x + 1.) * delta_r for x, y in enumerate(r) if y])
    0.7833333333333333
    >>> average_precision(r)
    0.78333333333333333

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]
    if not out:
        return 0.
    return np.mean(out)


def mean_average_precision(rs):
    """Score is mean average precision

    Relevance is binary (nonzero is relevant).

    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1]]
    >>> mean_average_precision(rs)
    0.78333333333333333
    >>> rs = [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1], [0]]
    >>> mean_average_precision(rs)
    0.39166666666666666

    Args:
        rs: Iterator of relevance scores (list or numpy) in rank order
            (first element is the first item)

    Returns:
        Mean average precision
    """
    return np.mean([average_precision(r) for r in rs])

In [None]:
rs = [[1, 0, 0], [0, 1, 0], [0, 0, 0]]
mean_reciprocal_rank(rs)

0.5

In [None]:
r = [1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
average_precision(r)

0.7833333333333333

In [None]:
mean_average_precision(rs)

0.5

In [None]:
# # export
# def evaluate(results, top_n = 5):
#     # calc Precision/Recall (with 5 top docs)?, MRR, and mAP
#     for model in results:
#         rs = []
#         for app in results[model]:
#             r = []
#             for labels, score in results[model][app].items():
#                 if labels[0] == 'S01':
#                     if labels[2] != labels[3]:
#                         if labels[0] == labels[1]: r.append(1)
#                         else: r.append(0)
#             rs.append(r)
    
#         print(f'{model} mAP:', mean_average_precision(rs))
#         print(f'{model} mRR:', mean_reciprocal_rank(rs))

In [None]:
# export
def rank_stats(rs):
    ranks = []
    for r in rs:
#         print(np.asarray(r).nonzero()[0][0])
#         print(r.nonzero()[0])
        ranks.append(r.nonzero()[0][0] + 1)
    ranks = np.asarray(ranks)
    recipical_ranks = 1 / ranks
#     print(recipical_ranks, np.mean(recipical_ranks))
#     print(rs)
#     print(ranks)
#     print(ranks.shape)
    return np.std(ranks), np.mean(ranks), np.median(ranks), np.mean(recipical_ranks)

In [46]:
# def evaluate(results, top_k = [1, 5, 10]):
#     output = {}
#     for app in results:
#         output[app] = {}
#         app_rs = []
#         for report in results[app]:
#             output[app][report] = {}
#             report_rs = []
#             for vid in results[app][report]:
#                 output[app][report][vid] = {'ranks': []}
#                 r = []
#                 for labels, score in results[app][report][vid].items():
#                     output[app][report][vid]['ranks'].append((labels, score))
#                     if labels[0] == report: r.append(1)
#                     else: r.append(0)
#                 r = np.asarray(r)
#                 output[app][report][vid]['rank'] = r.nonzero()[0][0] + 1
#                 output[app][report][vid]['average_precision'] = average_precision(r)
#                 report_rs.append(r)
            
#             report_rs_std, report_rs_mean, report_rs_med, report_mRR = rank_stats(report_rs)
#             report_mAP = mean_average_precision(report_rs)
            
#             output[app][report]['Report std rank'] = report_rs_std
#             output[app][report]['Report mean rank'] = report_rs_mean
#             output[app][report]['Report median rank'] = report_rs_med
#             output[app][report]['Report mRR'] = report_mRR
#             output[app][report]['Report mAP'] = report_mAP
#             for k in top_k:
#                 report_hit_rate = hit_rate_at_k(report_rs, k)
#                 output[app][f'Report Hit@{k}'] = report_hit_rate
#             app_rs.extend(report_rs)
        
#         app_rs_std, app_rs_mean, app_rs_med, app_mRR = rank_stats(app_rs)
#         app_mAP = mean_average_precision(app_rs)
        
#         output[app]['App std rank'] = app_rs_std
#         output[app]['App mean rank'] = app_rs_mean
#         output[app]['App median rank'] = app_rs_med
#         output[app]['App mRR'] = app_mRR
#         output[app]['App mAP'] = app_mAP
#         print(f'{app} σ Rank', app_rs_std)
#         print(f'{app} μ Rank', app_rs_mean)
#         print(f'{app} Median Rank', app_rs_med)
#         print(f'{app} mRR:', app_mRR)
#         print(f'{app} mAP:', app_mAP)
#         for k in top_k:
#             app_hit_rate = hit_rate_at_k(app_rs, k)
#             output[app][f'App Hit@{k}'] = app_hit_rate
#             print(f'{app} Hit@{k}:', app_hit_rate)
            
#     return output

In [None]:
# export
def evaluate(rankings, top_k = [1, 5, 10]):
    output = {}
    for app in rankings:
        output[app] = {}
        app_rs = []
        for bug in rankings[app]:
            if bug == 'elapsed_time': continue
            output[app][bug] = {}
            bug_rs = []
            for report in rankings[app][bug]:
                output[app][bug][report] = {'ranks': []}
                r = []
                for labels, score in rankings[app][bug][report].items():
                    output[app][bug][report]['ranks'].append((labels, score))
                    if labels[0] == bug: r.append(1)
                    else: r.append(0)
                r = np.asarray(r)
                output[app][bug][report]['rank'] = r.nonzero()[0][0] + 1
                output[app][bug][report]['average_precision'] = average_precision(r)
                bug_rs.append(r)

            bug_rs_std, bug_rs_mean, bug_rs_med, bug_mRR = rank_stats(bug_rs)
            bug_mAP = mean_average_precision(bug_rs)

            output[app][bug]['Bug std rank'] = bug_rs_std
            output[app][bug]['Bug mean rank'] = bug_rs_mean
            output[app][bug]['Bug median rank'] = bug_rs_med
            output[app][bug]['Bug mRR'] = bug_mRR
            output[app][bug]['Bug mAP'] = bug_mAP
            for k in top_k:
                bug_hit_rate = hit_rate_at_k(bug_rs, k)
                output[app][f'Bug Hit@{k}'] = bug_hit_rate
            app_rs.extend(bug_rs)

        app_rs_std, app_rs_mean, app_rs_med, app_mRR = rank_stats(app_rs)
        app_mAP = mean_average_precision(app_rs)

        output[app]['App std rank'] = app_rs_std
        output[app]['App mean rank'] = app_rs_mean
        output[app]['App median rank'] = app_rs_med
        output[app]['App mRR'] = app_mRR
        output[app]['App mAP'] = app_mAP
        print(f'{app} Elapsed Time in Seconds', rankings[app]['elapsed_time'])
        print(f'{app} σ Rank', app_rs_std)
        print(f'{app} μ Rank', app_rs_mean)
        print(f'{app} Median Rank', app_rs_med)
        print(f'{app} mRR:', app_mRR)
        print(f'{app} mAP:', app_mAP)
        for k in top_k:
            app_hit_rate = hit_rate_at_k(app_rs, k)
            output[app][f'App Hit@{k}'] = app_hit_rate
            print(f'{app} Hit@{k}:', app_hit_rate)
        
    return output

In [61]:
# export
def get_eval_results(evals, app, item):
    for bug in evals[app]:
        if bug == 'elapsed_time': continue
        for vid in evals[app][bug]:
            try:
                print(evals[app][bug][vid][item])
            except: continue

In [65]:
from nbdev.export import notebook2script
notebook2script()

Converted 00_prep.ipynb.
Converted 01_features.ipynb.
Converted 02_eval.ipynb.
Converted 03_cnn.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 04.0_experiments.ipynb.
Converted 04.1_experiments.codebooks.ipynb.
Converted 05_model.ipynb.
Converted 06_approach.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted 07_results.ipynb.
Converted 08_interp_results.ipynb.
Converted SimCLR.old.ipynb.
Converted [Scratch 1] Tango SimCLR.ipynb.
Converted [Scratch 2] Tango SimCLR.ipynb.
Converted index.ipynb.
Converted lesson1-pets.ipynb.


In [56]:
results_00

{'car_report': {'S0': {'vid_0': OrderedDict([(('S4', 'vid_0'),
                 0.9954718971541773),
                (('S4', 'vid_2'), 0.9945964770493874),
                (('S3', 'vid_0'), 0.9939976838697654),
                (('S3', 'vid_2'), 0.9931883158114038),
                (('S0', 'vid_2'), 0.9929479446868809),
                (('S4', 'vid_3'), 0.9928277720624915),
                (('S2', 'vid_0'), 0.9924880112472121),
                (('S0', 'vid_3'), 0.9916407619640583),
                (('S3', 'vid_3'), 0.9916281614374629),
                (('S3', 'vid_1'), 0.9908729735712648),
                (('S0', 'vid_1'), 0.9904742624406746),
                (('S4', 'vid_1'), 0.9896292570686976),
                (('S1', 'vid_2'), 0.9869854517117607),
                (('S1', 'vid_0'), 0.9865547211253137),
                (('S1', 'vid_3'), 0.986118447341411),
                (('S2', 'vid_1'), 0.9850550846216436),
                (('S2', 'vid_3'), 0.983298468501515),
                (('S1

In [57]:
results_00['car_report']['S0']['vid_0']

OrderedDict([(('S4', 'vid_0'), 0.9954718971541773),
             (('S4', 'vid_2'), 0.9945964770493874),
             (('S3', 'vid_0'), 0.9939976838697654),
             (('S3', 'vid_2'), 0.9931883158114038),
             (('S0', 'vid_2'), 0.9929479446868809),
             (('S4', 'vid_3'), 0.9928277720624915),
             (('S2', 'vid_0'), 0.9924880112472121),
             (('S0', 'vid_3'), 0.9916407619640583),
             (('S3', 'vid_3'), 0.9916281614374629),
             (('S3', 'vid_1'), 0.9908729735712648),
             (('S0', 'vid_1'), 0.9904742624406746),
             (('S4', 'vid_1'), 0.9896292570686976),
             (('S1', 'vid_2'), 0.9869854517117607),
             (('S1', 'vid_0'), 0.9865547211253137),
             (('S1', 'vid_3'), 0.986118447341411),
             (('S2', 'vid_1'), 0.9850550846216436),
             (('S2', 'vid_3'), 0.983298468501515),
             (('S1', 'vid_1'), 0.9793310935304032),
             (('S2', 'vid_2'), 0.9771318857464869)])

In [58]:
results_00['car_report']['S3']['vid_2']

OrderedDict([(('S3', 'vid_3'), 0.9974038726941241),
             (('S0', 'vid_3'), 0.9959295871275722),
             (('S4', 'vid_2'), 0.9936751823924261),
             (('S4', 'vid_0'), 0.9935190270286357),
             (('S0', 'vid_0'), 0.9931883158114038),
             (('S2', 'vid_1'), 0.9918360169685073),
             (('S3', 'vid_0'), 0.991638748000079),
             (('S2', 'vid_3'), 0.9911052991068987),
             (('S4', 'vid_3'), 0.9908727497029006),
             (('S2', 'vid_0'), 0.9905291542036742),
             (('S0', 'vid_2'), 0.9903796871998533),
             (('S3', 'vid_1'), 0.9876266003899643),
             (('S2', 'vid_2'), 0.9865122651048643),
             (('S1', 'vid_0'), 0.9845948113235389),
             (('S1', 'vid_3'), 0.984062658344715),
             (('S1', 'vid_2'), 0.9839271505708713),
             (('S4', 'vid_1'), 0.9836702232253675),
             (('S0', 'vid_1'), 0.9822985850347496),
             (('S1', 'vid_1'), 0.9713923283154741)])