In [17]:
import cv2
import numpy as np
import time
import os
from image_match.goldberg import ImageSignature
import pandas as pd
import pickle

In [11]:
def framing(video_path):

    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(5))
    if(fps==0):
        print("Not available")
    print(fps)

    fr = []
    res = []
    while True:
        ret, frame = cap.read()
        if ret:
            resized_frame = cv2.resize(frame, (224, 224), interpolation= cv2.INTER_LINEAR)

            grayed = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#             canned = cv2.Canny(grayed,320,320)
            fr.append(frame)
            res.append(grayed)
        else:
            break
            
    return res, fr

In [12]:
def keyframeDetection(video_path):
    frames,ogframes = framing(video_path)
    if(len(frames)==0):
        print("Cannot compare video because of zero fps.")
        return 0
    else:
        video_name = (video_path.split('/')[-1]).split('.')[0]
        keyframePath = 'keyframes/'+video_name
        if not os.path.exists(keyframePath):
            os.makedirs(keyframePath)
        
        diff = []
        for i in range(0,len(frames)-1):
            diff.append(cv2.absdiff(frames[i],frames[i+1]))

        mn_all_frames = np.mean(diff)         #This gives mean
        st_d_all_frames = np.std(diff)        #This gives standard deviation
        a = 7.8                                 #Setting a random value we can modify it to any value 
        ts = mn_all_frames + (a * st_d_all_frames)
        
        a_fr = []
        for i in range(len(diff)):
            mn = np.mean(diff[i])             #Calculating the mean for each frame
            st_d = np.std(diff[i])            #Calculating the standard deviation for each frame
            fr_ts = mn + (5*st_d)             #Finding the threshold values for each frame/image
            a_fr.append([i,fr_ts])
            
        imp_fr = []
        key_fr = []
        count = 0
        for i,ac_tr in(a_fr):
            if ac_tr >= ts:                   #Comapring the threshold values to the standard threshold/global threshold values
                imp_fr.append([i,ac_tr])
                key = ogframes[i]
                key_fr.append(key)
                cv2.imwrite(keyframePath+'/'+str(i)+'.jpg',frames[i]) 
                count+=1

        print("Total Number of keyframes saved: {}".format(count))    
        return key_fr, imp_fr

In [58]:
keys_og, keys_canned  = keyframeDetection("index-videos/test1.mp4")

25
Saving Frame number: 493

In [5]:
keys_og, keys_canned  = keyframeDetection("index-videos/test3.mp4")

29
Saving Frame number: 251

In [4]:
keys_og, keys_canned  = keyframeDetection("index-videos/test4.mp4")

30
Saving Frame number: 269

In [13]:
def imageDist(img1, img2):
    gis = ImageSignature()
    a = gis.generate_signature(img1)
    b = gis.generate_signature(img2)
    return gis.normalized_distance(a, b)

In [14]:
def videoSimilarity(video1_path, video2_path):
    keys_og1, keys_canned1  = keyframeDetection(video1_path)
    keys_og2, keys_canned2  = keyframeDetection(video2_path)
    
    dist_mat = []
    count=0
    for img1 in keys_og1:
        row = []
        for img2 in keys_og2:        
            dist = imageDist(img1, img2)
            row.append(dist)

        count = count+1
        print("row",count, end='\r')
        dist_mat.append(row)
        
    sim_mat = 1-np.array(dist_mat)
    sim_df = pd.DataFrame(sim_mat)   
    sim_percent = ((sim_df.max(axis=1).sum())/len(sim_df)) * 100
    return sim_percent

In [6]:
videoSimilarity("index-videos/test3.mp4", "index-videos/test4.mp4")

29
Total Number of keyframes saved: 12
30
Total Number of keyframes saved: 9
row 12

54.37048042660133

In [7]:
videoSimilarity("index-videos/test5.mp4", "index-videos/test6.mp4")

30
Total Number of keyframes saved: 36
30
Total Number of keyframes saved: 16
row 36

43.63070121747681

In [8]:
videoSimilarity("index-videos/test7.mp4", "index-videos/test8.mp4")

30
Total Number of keyframes saved: 16
25
Total Number of keyframes saved: 16
row 16

83.99155870885365

In [16]:
df = pd.DataFrame(dist)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36
0,0.83659,0.782255,0.806073,0.758445,0.857035,0.808961,0.812963,0.801227,0.799698,0.802003,...,0.75152,0.758191,0.75746,0.752741,0.754461,0.758401,0.760234,0.750481,0.746232,0.750541
1,0.778839,0.722474,0.752362,0.774479,0.79085,0.747216,0.754323,0.748875,0.747802,0.745278,...,0.706031,0.713024,0.711479,0.7083,0.711459,0.709209,0.713207,0.702863,0.700546,0.700533
2,0.829872,0.752116,0.783254,0.753238,0.856678,0.799426,0.804904,0.79724,0.795716,0.793628,...,0.750277,0.756677,0.756677,0.750714,0.753798,0.756198,0.758696,0.747131,0.743066,0.745782
3,0.804988,0.77623,0.790986,0.818622,0.805711,0.757216,0.765909,0.761381,0.761116,0.756842,...,0.72218,0.725229,0.719944,0.716772,0.721425,0.721557,0.720965,0.710672,0.709199,0.716015
4,0.627592,0.711427,0.623739,0.64498,0.0,0.523604,0.521595,0.504862,0.510185,0.508433,...,0.649893,0.649585,0.649585,0.651113,0.649893,0.648658,0.647093,0.651415,0.650504,0.651113
5,0.627592,0.711427,0.623739,0.64498,0.0,0.523604,0.521595,0.504862,0.510185,0.508433,...,0.649893,0.649585,0.649585,0.651113,0.649893,0.648658,0.647093,0.651415,0.650504,0.651113
6,0.627592,0.711427,0.623739,0.64498,0.0,0.523604,0.521595,0.504862,0.510185,0.508433,...,0.649893,0.649585,0.649585,0.651113,0.649893,0.648658,0.647093,0.651415,0.650504,0.651113
7,0.627592,0.711427,0.623739,0.64498,0.0,0.523604,0.521595,0.504862,0.510185,0.508433,...,0.649893,0.649585,0.649585,0.651113,0.649893,0.648658,0.647093,0.651415,0.650504,0.651113
8,0.627592,0.711427,0.623739,0.64498,0.0,0.523604,0.521595,0.504862,0.510185,0.508433,...,0.649893,0.649585,0.649585,0.651113,0.649893,0.648658,0.647093,0.651415,0.650504,0.651113
9,0.647477,0.687709,0.666314,0.679915,0.503955,0.191779,0.171957,0.033077,0.086947,0.087134,...,0.447148,0.448333,0.446572,0.452901,0.447148,0.451892,0.452577,0.460302,0.456965,0.458057


In [18]:
sim_mat = 1-np.array(dist)
sim_df = pd.DataFrame(sim_mat)
sim_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27,28,29,30,31,32,33,34,35,36
0,0.16341,0.217745,0.193927,0.241555,0.142965,0.191039,0.187037,0.198773,0.200302,0.197997,...,0.24848,0.241809,0.24254,0.247259,0.245539,0.241599,0.239766,0.249519,0.253768,0.249459
1,0.221161,0.277526,0.247638,0.225521,0.20915,0.252784,0.245677,0.251125,0.252198,0.254722,...,0.293969,0.286976,0.288521,0.2917,0.288541,0.290791,0.286793,0.297137,0.299454,0.299467
2,0.170128,0.247884,0.216746,0.246762,0.143322,0.200574,0.195096,0.20276,0.204284,0.206372,...,0.249723,0.243323,0.243323,0.249286,0.246202,0.243802,0.241304,0.252869,0.256934,0.254218
3,0.195012,0.22377,0.209014,0.181378,0.194289,0.242784,0.234091,0.238619,0.238884,0.243158,...,0.27782,0.274771,0.280056,0.283228,0.278575,0.278443,0.279035,0.289328,0.290801,0.283985
4,0.372408,0.288573,0.376261,0.35502,1.0,0.476396,0.478405,0.495138,0.489815,0.491567,...,0.350107,0.350415,0.350415,0.348887,0.350107,0.351342,0.352907,0.348585,0.349496,0.348887
5,0.372408,0.288573,0.376261,0.35502,1.0,0.476396,0.478405,0.495138,0.489815,0.491567,...,0.350107,0.350415,0.350415,0.348887,0.350107,0.351342,0.352907,0.348585,0.349496,0.348887
6,0.372408,0.288573,0.376261,0.35502,1.0,0.476396,0.478405,0.495138,0.489815,0.491567,...,0.350107,0.350415,0.350415,0.348887,0.350107,0.351342,0.352907,0.348585,0.349496,0.348887
7,0.372408,0.288573,0.376261,0.35502,1.0,0.476396,0.478405,0.495138,0.489815,0.491567,...,0.350107,0.350415,0.350415,0.348887,0.350107,0.351342,0.352907,0.348585,0.349496,0.348887
8,0.372408,0.288573,0.376261,0.35502,1.0,0.476396,0.478405,0.495138,0.489815,0.491567,...,0.350107,0.350415,0.350415,0.348887,0.350107,0.351342,0.352907,0.348585,0.349496,0.348887
9,0.352523,0.312291,0.333686,0.320085,0.496045,0.808221,0.828043,0.966923,0.913053,0.912866,...,0.552852,0.551667,0.553428,0.547099,0.552852,0.548108,0.547423,0.539698,0.543035,0.541943


In [19]:
sim_percent = ((sim_df.max(axis=1).sum())/len(sim_df)) * 100
sim_percent

80.8079562945676