In [None]:
import cv2
import os
import pandas as pd
import numpy as np

In [None]:
def compare_feature_vectors(vec_test, vec_dev):

    most_similar_list = []
    for feature_test in vec_test:

        highest_hist = 0
        highest_edge = 0
        
        diff_list = []
        for feature_dev in vec_dev:
            #dist_red = np.linalg.norm(feature_test[0] - feature_dev[0])
            #dist_green = np.linalg.norm(feature_test[1] - feature_dev[1])
            #dist_blue = np.linalg.norm(feature_test[2] - feature_dev[2])
            
            
            #HISTOGRAM INTERSECTION VERSION (see lecture 12.3.1)
            dist_red = 1/hist_intersection(feature_test[0], feature_dev[0])
            dist_green = 1/hist_intersection(feature_test[1], feature_dev[1])
            dist_blue = 1/hist_intersection(feature_test[2], feature_dev[2])
            
            
            avg_dist = (dist_red + dist_green + dist_blue) / 3
            edge_dist = np.abs(feature_test[3] - feature_dev[3]) 
            
            if avg_dist > highest_hist:
                highest_hist = avg_dist
                
            if edge_dist > highest_edge:
                highest_edge = edge_dist
                
                
            diff_list.append([avg_dist])

        #normalize features so that every feature has same weight
        for tuple in diff_list:
            tuple[0] /= highest_hist
            if len(tuple) > 1:
                tuple[1] /= highest_edge

        eucli_list = []
        for tuple in diff_list:
            
            tmp = 0          
            for val in tuple:
                tmp += val**2
            eucli_dist = np.sqrt(tmp)
            
            eucli_list.append(eucli_dist)

        most_similar_file_idx = eucli_list.index(min(eucli_list))
        most_similar_list.append(most_similar_file_idx)

    return most_similar_list

In [None]:
def find_closest_in_devset(show_keyframes = False):
    %run ../Task1/Implementation.ipynb

    
    key_frames_test = extract_key_frames("../videos/TestSet", 1000)
    vid_features_test = extract_features_from_key_frames(key_frames_test, show_keyframes)

    key_frames_dev = extract_key_frames("../videos/DevSet", 1000)
    vid_features_dev = extract_features_from_key_frames(key_frames_dev, show_keyframes)

    most_similar_list = compare_feature_vectors(vid_features_test, vid_features_dev)

    
    film = 0
    hits = 0
    misses = 0
    for entry in most_similar_list:
        if film <= entry < film+4:
            hits += 1
        else:
            misses += 1

        film += 4

    recall = hits / len(key_frames_test)
    precision = hits / len(most_similar_list)
    if precision + recall != 0:
        f1 = (2 * precision * recall) / (precision + recall)
    else:
        f1 = 0

    print("files deemed most similar to the test set: ")
    print(most_similar_list)
    print("PRECISION: " + str(precision))
    print("RECALL: " + str(recall))
    print("F1 MEASURE: " + str(f1))

In [None]:
def extract_features_from_key_frames(key_frames, plot=False):
    vid_features = []
    bins = 128
    
    for vid in key_frames:
        sum_hist_red = np.zeros((1, bins))
        sum_hist_green = np.zeros((1, bins))
        sum_hist_blue = np.zeros((1, bins))

        sum_edge = 0
        for frame in vid:
            sum_hist_red = np.add(sum_hist_red, np.transpose(cv2.calcHist([frame], [0], None, [bins], [0, bins])))
            sum_hist_green = np.add(sum_hist_green, np.transpose(cv2.calcHist([frame], [1], None, [bins], [0, bins])))
            sum_hist_blue = np.add(sum_hist_blue, np.transpose(cv2.calcHist([frame], [2], None, [bins], [0, bins])))

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            edges = cv2.Canny(gray, 100, 200)
            if plot:
                imshow(edges)
            edge_counter = np.where(np.asarray(edges) > 0.5)[0].size
            edge_ratio = edge_counter / gray.size
            sum_edge += edge_ratio
            
            

        nr_of_frames = len(vid)
        
        mean_red = np.divide(sum_hist_red, nr_of_frames)
        mean_green = np.divide(sum_hist_green, nr_of_frames)
        mean_blue = np.divide(sum_hist_blue, nr_of_frames)
        mean_edge = sum_edge / nr_of_frames
        vid_features.append([mean_red, mean_green, mean_blue, mean_edge])

    return vid_features

In [1]:
def imshow(img):
    import cv2
    import IPython
    _,ret = cv2.imencode('.jpg', img) 
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)

In [None]:
def extract_key_frames(directory, delimiter=1000):
    
    count = 0

    all_key_frames = []
    for filename in os.listdir(directory):
        print("processing " + filename)
        filename = directory + "/" + filename
        if count >= delimiter:
            break
        if "Devils" in filename or "Pulp" in filename:
            genre = "thriller"
        elif "Move" in filename or "Dance" in filename:
            genre = "dance"
        elif "Arctic" in filename:
            genre = "documentary"

        #get cuts
        idx_hist = find_shots_hist(filename, genre)
        

        #get key frame numbers in beween cuts
        idx_key_frames = []
        old = None
        for i in range(idx_hist.size):
            if old is not None:
                key = (old + idx_hist[i]) / 2
                idx_key_frames.append(int(round(key)))
            old = idx_hist[i]

        #capture key frames from video
        cap = cv2.VideoCapture(filename)
        key_frames = []
        for idx in idx_key_frames:
            cap.set(1, idx)
            res, frame = cap.read()
            key_frames.append(frame)
            #imshow(frame)
            
        all_key_frames.append(key_frames)
        count += 1
    return all_key_frames