In [None]:
! pip install tqdm
! pip install opencv-python
! pip install boto3
! pip install scipy

In [2]:
import boto3
from boto3.s3.transfer import S3Transfer
from tqdm import tqdm_notebook as tqdm
import json
import os
from os.path import isfile, join
from os import listdir
from os.path import isfile, join
import numpy as np
import pickle
import faiss
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,10)
import pandas as pd
import traceback
import scipy.ndimage
import itertools, operator
import datetime
import math

In [3]:
files = [f for f in os.listdir('videos') if isfile(join('videos', f))]

shows = {}
for file in files:
    fullname = file.split('/')[-1]
    fn = fullname.replace('_VOD','').replace('_high','')
    fn_split = fn.split('_')
    
    name = "_".join(fn_split[1:-2])
    season = fn_split[-2]
    episode = fn_split[-1].split('.')[0]
    
#     print("name: {}, season: {}, episode: {}".format(name, season, episode))
    
    if name not in shows:
        shows[name] = {season : {episode : fullname}}
    
    if season not in shows[name]:
        shows[name][season] = {episode : fullname}
        
    if episode not in shows[name][season]:
        shows[name][season][episode] = fullname

    
print(json.dumps(shows, indent=2))
print("\n Total of {} files".format(len(files)))

{
  "Divorce": {
    "S3": {
      "E1": "217394H1_Divorce_S3_E1.mxf.mp4",
      "E3": "217396H1_Divorce_S3_E3.mxf.mp4",
      "E2": "217395H1_Divorce_S3_E2.mxf.mp4"
    },
    "S2": {
      "E2": "208888H1_Divorce_S2_E2.mxf.mp4",
      "E3": "208889H1_Divorce_S2_E3.mxf.mp4",
      "E1": "208887H1_Divorce_S2_E1.mxf.mp4"
    },
    "S1": {
      "E1": "193757H1_Divorce_S1_E1.mxf.mp4",
      "E3": "193917H1_Divorce_S1_E3.mxf.mp4",
      "E2": "193916H1_Divorce_S1_E2.mxf.mp4"
    }
  },
  "Expeditie_Robinson": {
    "S11": {
      "E2": "226558H1_Expeditie_Robinson_S11_E2.mxf.mp4",
      "E3": "226559H1_Expeditie_Robinson_S11_E3.mxf.mp4",
      "E1": "226557H1_Expeditie_Robinson_S11_E1.mxf.mp4"
    },
    "S13": {
      "E1": "251218H1_Expeditie_Robinson_S13_E1.mxf.mp4",
      "E2": "250463H1_Expeditie_Robinson_S13_E2.mxf.mp4"
    },
    "S14": {
      "E3": "260888H1_Expeditie_Robinson_S14_E3.mxf.mp4",
      "E1": "262135H1_Expeditie_Robinson_S14_E1.mxf.mp4",
      "E2": "262136H1_Expedi

In [16]:
def merge_consecutive_timestamps(timestamps):
    """
    Merges consecutive timestamps in a list if they're less than 2 seconds apart
    Example: [(0,5), (5,10), (20,30)] gets combined into [(0,10),[20,30]
    """
    result = []
    i = 0
    while i < len(timestamps):
        (start, end) = timestamps[i]

        # check if we're not already at the last element
        if i < len(timestamps) - 1:
            (start_next, end_next) = timestamps[i + 1]
            # merge if less than 2 seconds apart
            if abs(end - start_next) < 2:
                result.append((start, end_next))
                i += 1
            else:
                result.append((start,end))
            
        else:
            result.append((start, end))

        i += 1

    return result

def to_seconds(time):
    """
    Converts string of format hh:mm:ss to total number of seconds
    """
    if time == 'None':
        return -1
    try:
        hours = int(time.split(":")[0])
        minutes = int(time.split(":")[1])
        seconds = int(float(time.split(":")[2]))
        return hours*60*60 + minutes * 60 + seconds
    except:
#         print(time)
        if math.isnan(time):
            return -1

def get_skippable_timestamps_by_filename(filename, df):
    row = df.loc[df['filename'] == filename].to_dict(orient='records')[0]
    result = []
    
    if not row["recap_start"] == -1:
        result.append((row["recap_start"], row["recap_end"]))
    if not row["openingcredits_start"] == -1:
        result.append((row["openingcredits_start"], row["openingcredits_end"]))
    if not row["preview_start"] == -1:
        result.append((row["preview_start"], row["preview_end"]))
    if not row["closingcredits_start"] == -1:
        result.append((row["closingcredits_start"], row["closingcredits_end"]))
        
    return merge_consecutive_timestamps(result)

annotations = pd.read_csv("annotations_legal_new_final.csv").dropna(how='all')
annotations['recap_start'] = annotations['recap_start'].apply(to_seconds)
annotations['recap_end'] = annotations['recap_end'].apply(to_seconds)
annotations['openingcredits_end'] = annotations['openingcredits_end'].apply(to_seconds)
annotations['openingcredits_start'] = annotations['openingcredits_start'].apply(to_seconds)
annotations['preview_start'] = annotations['preview_start'].apply(to_seconds)
annotations['preview_end'] = annotations['preview_end'].apply(to_seconds)
annotations['closingcredits_end'] = annotations['closingcredits_end'].apply(to_seconds)
annotations['closingcredits_start'] = annotations['closingcredits_start'].apply(to_seconds)

annotations.head()

Unnamed: 0,filename,recap_start,recap_end,openingcredits_start,openingcredits_end,preview_start,preview_end,closingcredits_start,closingcredits_end,Unnamed: 9
0,193757H1_Divorce_S1_E1.mxf.mp4,-1,-1,82,141,2473,2503,-1,-1,
1,193916H1_Divorce_S1_E2.mxf.mp4,0,75,98,158,2561,2590,-1,-1,
2,193917H1_Divorce_S1_E3.mxf.mp4,0,88,111,171,-1,-1,-1,-1,
5,208887H1_Divorce_S2_E1.mxf.mp4,0,100,115,175,2508,2538,-1,-1,
6,208888H1_Divorce_S2_E2.mxf.mp4,0,71,84,143,2737,2767,-1,-1,


In [17]:
get_skippable_timestamps_by_filename("217394H1_Divorce_S3_E1.mxf.mp4", annotations)

[(0, 70), (99, 159), (2494, 2525)]

In [33]:
# vectors_dir = "./videos/full_histograms_binsize30_framejump3/"
# vectors_dir = "./videos_resized_w320/colortexturemoments_framejump3/"
vectors_dir = "./videos_resized_w320/color_histogram_binsize180_framejump3/"
# vectors_dir = "./videos_resized_w320/color_histogram_binsize300_framejump3/"
# vectors_dir = "./videos_resized_w224/cnn_vectors_framejump3/" #verander dir name

In [34]:
def do_faiss(episodes):
    print(episodes)
    dir = vectors_dir

    vector_files = [dir+e+'.p' for e in episodes]

    histograms = []
    lengths = []

    for f in vector_files:
        h = np.array(pickle.load(open(f, "rb")), np.float32)
        lengths.append(h.shape[0])
        histograms.append(h)

    print(histograms[0].shape)
    histograms = np.vstack(histograms)
    print(lengths)
    histograms.shape

    results = []
    for i, length in enumerate(lengths):
        i += 1
        s = sum(lengths[:i-1])
        e = sum(lengths[:i])
        print("q - {} : {}".format(s,e))
        print("rest - < {}".format(s))
        q = histograms[s:e]
        rest = np.append(histograms[:s], histograms[e:], axis=0)
    #     rest = histograms[:s]

        vector_size = q.shape[1]
        # build the index, set vector size
        print("Building index with size: {}".format(rest.shape[0]))
        index = faiss.IndexFlatL2(vector_size)    
        # add vectors to the index
        index.add(rest)

        # we want to see k nearest neighbors
        k = 1
        # search with for matches with q
        scores, indexes = index.search(q, k)

    #     print(scores)
    #     print(indexes)
        
        result = scores[:,0]
        result = scipy.ndimage.median_filter(result, 10)
        
        results.append((episodes[i-1], result))
        
#         l = result.shape[0]
#         plt.figure()
#         plt.bar(range(0,l), result)
#         plt.ylim(0, 0.2)  
#         plt.show()
        
#         plt.figure()
#         s = pd.Series(result)
#         s.rolling(window=15).mean().plot()
#         plt.ylim(0, 0.2)
#         plt.show()
        
        print()
    print()
    print()
    print("=========================================================================================")
    return results

In [35]:
# put inside a function to be able to stop it with return
def run():
    results = []
    for serie, seizoenen in shows.items():
#         seizoenen = result['Judas']
        for seizoen, episodes in seizoenen.items():
            # bit hacky way to make sure episodes are sorted by E1, E2, .., E10
            keys = sorted([int(k.replace("E","")) for k in episodes.keys()])
            episodes = [episodes["E{}".format(k)] for k in keys]
            print(episodes)
            try:
                results = results + do_faiss(episodes)
                
            except KeyboardInterrupt:
                print("STOPPPPP!!!!!!!!")
                return results
                
    return results
    
results = run()

['217394H1_Divorce_S3_E1.mxf.mp4', '217395H1_Divorce_S3_E2.mxf.mp4', '217396H1_Divorce_S3_E3.mxf.mp4']
['217394H1_Divorce_S3_E1.mxf.mp4', '217395H1_Divorce_S3_E2.mxf.mp4', '217396H1_Divorce_S3_E3.mxf.mp4']
(21041, 180)
[21041, 21873, 21891]
q - 0 : 21041
rest - < 0
Building index with size: 43764

q - 21041 : 42914
rest - < 21041
Building index with size: 42932

q - 42914 : 64805
rest - < 42914
Building index with size: 42914



['208887H1_Divorce_S2_E1.mxf.mp4', '208888H1_Divorce_S2_E2.mxf.mp4', '208889H1_Divorce_S2_E3.mxf.mp4']
['208887H1_Divorce_S2_E1.mxf.mp4', '208888H1_Divorce_S2_E2.mxf.mp4', '208889H1_Divorce_S2_E3.mxf.mp4']
(21153, 180)
[21153, 23059, 22895]
q - 0 : 21153
rest - < 0
Building index with size: 45954

q - 21153 : 44212
rest - < 21153
Building index with size: 44048

q - 44212 : 67107
rest - < 44212
Building index with size: 44212



['193757H1_Divorce_S1_E1.mxf.mp4', '193916H1_Divorce_S1_E2.mxf.mp4', '193917H1_Divorce_S1_E3.mxf.mp4']
['193757H1_Divorce_S1_E1.mxf.mp

(24409, 180)
[24409, 23572, 21110, 21990, 21177, 22691]
q - 0 : 24409
rest - < 0
Building index with size: 110540

q - 24409 : 47981
rest - < 24409
Building index with size: 111377

q - 47981 : 69091
rest - < 47981
Building index with size: 113839

q - 69091 : 91081
rest - < 69091
Building index with size: 112959

q - 91081 : 112258
rest - < 91081
Building index with size: 113772

q - 112258 : 134949
rest - < 112258
Building index with size: 112258



['219119H1_Familie_Kruys_S1_E1_VOD.mxf.mp4', '219120H1_Familie_Kruys_S1_E2_VOD.mxf.mp4', '219121H1_Familie_Kruys_S1_E3_VOD.mxf.mp4']
['219119H1_Familie_Kruys_S1_E1_VOD.mxf.mp4', '219120H1_Familie_Kruys_S1_E2_VOD.mxf.mp4', '219121H1_Familie_Kruys_S1_E3_VOD.mxf.mp4']
(19868, 180)
[19868, 20406, 20828]
q - 0 : 19868
rest - < 0
Building index with size: 41234

q - 19868 : 40274
rest - < 19868
Building index with size: 40696

q - 40274 : 61102
rest - < 40274
Building index with size: 40274



['261245H1_Familie_Kruys_S4_E1.mxf.mp4', '261246H1_

In [36]:
def fill_gaps(sequence, lookahead):
    i = 0
    while i < len(sequence) - lookahead:
        current = sequence[i]
        next = sequence[i + 1 : i + lookahead].tolist()
        
        if current and True in next:
            x = 0
            while not next[x]:
                sequence[i + 1 + x] = True
                x = x + 1
                
        i = i + 1

    return sequence

def to_time_string(seconds):
    """
    Given seconds in number, returns a string in the format hh:mm:ss (example: 01:30:45)
    """
    return str(datetime.timedelta(seconds=seconds))

def sum_timestamps(timestamps):
    """
    Get the toal number of seconds out of a list with timestamps formatted like: (start,end)
    """
    result = 0
    for start,end in timestamps:
        result += end - start
        
    return result


def overlap(interval1, interval2):
    return max(0, min(interval1[1], interval2[1]) - max(interval1[0], interval2[0]))

# matches two lists of (starttime,endtime) detections and outputs cumulative gain
def match_detections_precision_recall(detected, ground_truth, verbose=False):
    
    if verbose:
        print("Comparing detections")
        print("detected: \t \t {}".format(detected))
        print("ground truth: \t \t {}".format(ground_truth))


    total_relevant_seconds = sum_timestamps(ground_truth)
    total_detected_seconds = sum_timestamps(detected)
    relevant_detected_seconds = 0    

    for start, end in ground_truth:
        lowest_difference_index = 0
        lowest_difference = -1        

        for i, (start_d, end_d) in enumerate(detected):
  
            if abs(start - start_d) < 2:
                start_d = start
            if abs(end - end_d) < 2:
                end_d = end

            relevant = overlap((start,end), (start_d, end_d))
            relevant_detected_seconds += relevant
    
    if verbose:
        print("total relevant seconds: {}".format(total_relevant_seconds)) #relevant documents
        print("total detected seconds: {}".format(total_detected_seconds)) #retrieved documents
        print("relevant detected seconds: {}".format(relevant_detected_seconds)) #relevant documents AND retrieved documents

        if total_detected_seconds > 0:
            print("Precision = {}".format(relevant_detected_seconds / total_detected_seconds))

        if total_relevant_seconds > 0:
            print("Recall = {}".format(relevant_detected_seconds / total_relevant_seconds))

    return total_relevant_seconds, total_detected_seconds, relevant_detected_seconds

# TESTS
# print("overlap: {}".format(overlap((0,20),(5,15))))

# # case 1
# d = [(92.75999999999999, 120.47999999999999), (154.67999999999998, 217.92)]
# gt =  [(154, 218)]
# match_detections_precision_recall(d, gt)

# print()

# # case 2
# d =  [(99.6, 159.0)]
# gt = [(0, 70), (99, 159)]
# match_detections_precision_recall(d, gt)

# print()

# # case 3
# d = [(92.75999999999999, 120.47999999999999), (154.67999999999998, 217.92), (300,400)]
# gt =  [(154, 218)]
# match_detections_precision_recall(d, gt)

# #case 4
# d = [(7.319999999999999, 134.88), (406.32, 423.47999999999996)]
# gt = [(0, 63), (75, 135)]
# match_detections_precision_recall(d, gt)

d= [(0.0, 48.79835917394137), (62.43687494306858, 203.57674455385285), (2942.791306184429, 3001.47446119104)]
gt = [(0, 198), (2945, 3006)]
match_detections_precision_recall(d, gt,True)


def max_two_values(d):
    """ a) create a list of the dict's keys and values; 
        b) return the key with the max value"""  
    v=list(d.values())
    k=list(d.keys())
    result1 = k[v.index(max(v))]
    del d[result1]

    v=list(d.values())
    k=list(d.keys())
    result2 = k[v.index(max(v))]
    return [result1, result2]

def get_two_longest_timestamps(timestamps):
    # if size is smaller or equal to 2, return immediately
    if len(timestamps) <= 2:
        return timestamps

    d = {}
    for start,end in timestamps:
        d[(start,end)] = end - start

    return max_two_values(d)

# TESTS
# timestamps = [(0.0, 63.12), (0,30), (84.36, 143.76), (476.28, 480), (490,510)]
# print(get_two_longest_timestamps(timestamps))

Comparing detections
detected: 	 	 [(0.0, 48.79835917394137), (62.43687494306858, 203.57674455385285), (2942.791306184429, 3001.47446119104)]
ground truth: 	 	 [(0, 198), (2945, 3006)]
total relevant seconds: 259
total detected seconds: 248.62138379133694
relevant detected seconds: 240.83594542191287
Precision = 0.9686855641670862
Recall = 0.9298685151425208


In [40]:
def experiment(percentile, plot = False, verbose=False):
    
    framejump = 3
    
    total_error = 0
    total_ground_truth = 0

    cumulative_gain_gt = [0]
    cumulative_gain = [0]

    total_relevant_seconds = 0
    total_detected_seconds = 0
    total_relevant_detected_seconds = 0

    for fn, result in results:
        if verbose:
            print(fn)
        
        framerate = 25
        if "Dexter" in fn:
            framerate = 23.976216
            
        threshold = np.percentile(result, percentile)
        seq = result
#         result2 = scipy.ndimage.median_filter(result, 10)
        result2 = result

        if plot:
            plt.figure()
            plt.plot(result2)
            plt.plot([threshold]*len(seq))
            plt.show()

        below_threshold2 = result2 < threshold
        below_threshold = fill_gaps(below_threshold2, int((framerate/framejump) * 10)) #fill gaps over 10 seconds

        if plot:
            plt.plot(below_threshold)
            plt.show()

        nonzeros = [[i for i,value in it] for key, it in itertools.groupby(enumerate(below_threshold), key=operator.itemgetter(1)) if key != 0]

        detected_beginning = []
        detected_end = []

        for nonzero in nonzeros:
            start = nonzero[0]
            end = nonzero[-1]

            occurs_at_beginning = end < len(result) / 5              #result is in first 20% of the video
            ends_at_the_end = end > len(result) - 15 * (framerate/framejump) #the end of this timestamp ends in the last 15 seconds

            if (end - start > (15 * (framerate / framejump)) #only count when larger than x seconds             
                and (occurs_at_beginning or ends_at_the_end)): #only use results that are in first 1/5 part or end at last 5s            

    #             print(to_time_string(start / (25 / framejump)))
    #             print(to_time_string(end / (25 / framejump)))

    #             print(start / (25 / framejump))
    #             print(end / (25 / framejump))
                start = start / (framerate / framejump)
                end = end / (framerate / framejump)

                if occurs_at_beginning:
                    detected_beginning.append((start,end))
                elif ends_at_the_end:
                    detected_end.append((start,end))


        detected = get_two_longest_timestamps(detected_beginning) + detected_end

        try:
            # get list of ground truth timestamps [(start,end), (start,end)]
            ground_truths = get_skippable_timestamps_by_filename(fn, annotations)
            # total seconds of ground truth within this video
    #         ground_truths_sum = sum_timestamps(ground_truths)
    #         total_ground_truth += ground_truths_sum
            relevant_seconds, detected_seconds, relevant_detected_seconds = match_detections_precision_recall(detected, ground_truths, verbose)

            total_relevant_seconds += relevant_seconds
            total_detected_seconds += detected_seconds
            total_relevant_detected_seconds += relevant_detected_seconds

    #         print("Precision = {}".format(relevant_detected_seconds / detected_seconds))
    #         print("Recall = {}".format(relevant_detected_seconds / relevant_seconds))


    #         mismatch_seconds = match_detections(detected.copy(), ground_truths.copy())        
    #         total_error += mismatch_seconds        

    #         cumulative_gain_gt.append(total_ground_truth)
    #         cumulative_gain.append(cumulative_gain[-1] + match_detections_cumulativegain(detected, ground_truths))
        except:
            traceback.print_exc()
            print("Error!!!")

        if verbose:
            print("=========================================================================================")

    # print("Total error of:\t {} seconds".format(total_error))
    # print("Total error of:\t {} minutes".format(total_error/60))
    # print()
    # print("Total ground truth:\t {} s".format(total_ground_truth))
    # print("Accuracy ratio:\t {}".format(total_ground_truth / (total_ground_truth + total_error)))

    # plt.plot(cumulative_gain, label = 'detected')
    # plt.plot(cumulative_gain_gt, label = 'ground_truth')
    # plt.legend(loc='upper left')
    # plt.show()

    # print("Ratio cumulative gain: {}".format(cumulative_gain[-1] / cumulative_gain_gt[-1]))
    precision = total_relevant_detected_seconds / total_detected_seconds
    recall = total_relevant_detected_seconds / total_relevant_seconds

    print("Total precision = {0:.3f}".format(precision))
    print("Total recall = {0:.3f}".format(recall))

#     print("F1 = {0:.3f}".format((2*precision*recall) / (precision+recall)))

print("Computing with feature vectors: {}".format(vectors_dir))
for p in [5, 7.5, 10]:
    print("Results percentile={}".format(p))
#     experiment(p, plot=True, verbose=True)
    experiment(p, False, False)
    print()
    print("----------------------------------------------------------------------------------------------------")

Computing with feature vectors: ./videos_resized_w320/color_histogram_binsize180_framejump3/
Results percentile=5
Total precision = 0.880
Total recall = 0.704

----------------------------------------------------------------------------------------------------
Results percentile=7.5
Total precision = 0.828
Total recall = 0.820

----------------------------------------------------------------------------------------------------
Results percentile=10
Total precision = 0.782
Total recall = 0.862

----------------------------------------------------------------------------------------------------
