In [1]:
import cv2
import numpy as np
import time
import os
from image_match.goldberg import ImageSignature
import pandas as pd
from PIL import Image
import imagehash

In [2]:
def framing(video_path):

    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(5))
    if(fps==0):
        print("Not available")
    print(fps)

    fr = []
    res = []
    while True:
        ret, frame = cap.read()
        if ret:
            resized_frame = cv2.resize(frame, (224, 224), interpolation= cv2.INTER_LINEAR)

            grayed = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            fr.append(frame)
            res.append(grayed)
        else:
            break
            
    return res, fr

In [3]:
def keyframeDetection(video_path):
    frames,ogframes = framing(video_path)
    if(len(frames)==0):
        print("Cannot compare video because of zero fps.")
        return 0
    else:
        video_name = (video_path.split('/')[-1]).split('.')[0]
        keyframePath = 'keyframes/'+video_name
        if not os.path.exists(keyframePath):
            os.makedirs(keyframePath)
        
        diff = []
        for i in range(0,len(frames)-1):
            diff.append(cv2.absdiff(frames[i],frames[i+1]))

        mn_all_frames = np.mean(diff)         #This gives mean
        st_d_all_frames = np.std(diff)        #This gives standard deviation
        a = 8                                 #Setting a random value we can modify it to any value 
        ts = mn_all_frames + (a * st_d_all_frames)
        
        a_fr = []
        for i in range(len(diff)):
            mn = np.mean(diff[i])             #Calculating the mean for each frame
            st_d = np.std(diff[i])            #Calculating the standard deviation for each frame
            fr_ts = mn + (5*st_d)             #Finding the threshold values for each frame/image
            a_fr.append([i,fr_ts])
            

        frame_num = []
        count = 0
        for i,ac_tr in(a_fr):
            if ac_tr >= ts:                   #Comparing the threshold values to the standard threshold/global threshold values
                cv2.imwrite(keyframePath+'/'+str(i)+'.jpg',frames[i]) 
                frame_num.append(i)
                count+=1

        print("Total Number of keyframes saved: {}".format(count))    
        return frame_num, keyframePath

In [23]:
def imageDist(img1, img2):
    img1_hash = imagehash.phash(Image.open(img1))
    img2_hash = imagehash.phash(Image.open(img2))
    dist = img1_hash - img2_hash
    return int(dist)

In [24]:
def videoSimilarity(video1_path, video2_path):
    v1_keyframe_num, keyframe1_path = keyframeDetection(video1_path)
    v2_keyframe_num, keyframe2_path = keyframeDetection(video2_path)
    
    dist_mat = []
    count=0
    for i in v1_keyframe_num:
        row = []
        p1 = keyframe1_path+'/'+str(i)+'.jpg'
        for j in v2_keyframe_num:  
            p2 = keyframe2_path+'/'+str(j)+'.jpg'
            dist = imageDist(p1, p2)
            row.append(dist)

        count = count+1
        print("row",count, end='\r')
        dist_mat.append(row)
        
    sim_mat = 100-np.array(dist_mat)
    sim_df = pd.DataFrame(sim_mat)   
    sim_percent = ((sim_df.max(axis=1).sum())/len(sim_df))
    return sim_percent

In [25]:
videoSimilarity("index-videos/test3.mp4", "index-videos/test4.mp4")

29
Total Number of keyframes saved: 11
30
Total Number of keyframes saved: 9
row 11

85.27272727272727

In [26]:
videoSimilarity("index-videos/test5.mp4", "index-videos/test6.mp4")

30
Total Number of keyframes saved: 34
30
Total Number of keyframes saved: 14
row 34

78.47058823529412

In [27]:
videoSimilarity("index-videos/test7.mp4", "index-videos/test8.mp4")

30
Total Number of keyframes saved: 16
25
Total Number of keyframes saved: 16
row 160

98.125

In [15]:
y = 100-pd.DataFrame(x)
sim_percent = ((y.max(axis=1).sum())/len(y))
sim_percent

96.36363636363636

In [18]:
x

[[4, 6, 8, 8, 14, 14, 14, 14, 10],
 [4, 4, 6, 6, 10, 10, 10, 12, 10],
 [4, 2, 0, 0, 10, 10, 10, 10, 6],
 [10, 10, 10, 10, 4, 4, 4, 6, 10],
 [10, 10, 10, 10, 4, 4, 4, 6, 10],
 [8, 8, 8, 8, 4, 4, 4, 6, 10],
 [8, 8, 8, 8, 4, 4, 4, 6, 10],
 [10, 10, 8, 8, 4, 4, 4, 4, 12],
 [10, 10, 8, 8, 4, 4, 4, 4, 12],
 [10, 10, 8, 8, 4, 4, 4, 4, 12],
 [10, 10, 8, 8, 4, 4, 4, 4, 12]]

In [21]:
i = Image.open("keyframes/test4/43.jpg")
x = imagehash.phash(i)
print(x)

d53adc2771959621
