In [None]:

query_file_path = '../../dataset/Tests/video2_1_modified.mp4'

In [1]:
# SHOT BOUNDARY DETECTION

import cv2
from scenedetect import VideoManager
from scenedetect import SceneManager
from scenedetect.detectors import ContentDetector
from datetime import datetime, timedelta
import numpy as np
from moviepy.editor import VideoFileClip
import json

def get_video_duration(video_file_path):
    with VideoFileClip(video_file_path) as video:
        return video.duration  # duration in seconds

def find_subarray_np(main_array, sub_array):
    main_array = np.array(main_array)
    sub_array = np.array(sub_array)
    sub_len = len(sub_array)

    # print(main_array)
    # print(sub_array)

    strided = np.lib.stride_tricks.sliding_window_view(main_array, window_shape=sub_len)

    matches = np.all(strided == sub_array, axis=1)

    indices = np.where(matches)[0]
    
    if indices.size > 0:
        return indices
    else:
        return [-1]

# Function to parse timecodes into timedelta objects
def parse_timecode(time_str):
    # Format: 'HH:MM:SS.sss'
    return datetime.strptime(time_str, "%H:%M:%S.%f")

# Function to convert seconds into hh:mm:ss.sss format
def seconds_to_timestamp(seconds):
    td = timedelta(seconds=seconds)
    str_time = str(td)
    hours, minutes, seconds = str_time.split(':')
    seconds, microseconds = seconds.split('.')
    milliseconds = f"{int(microseconds):03d}"[:3]
    return f"{hours}:{minutes}:{seconds}.{milliseconds}"

# Function to compute differences in timestamps
def compute_differences(timestamps):
    times = [parse_timecode(t) for t in timestamps]
    
    differences = []
    for i in range(1, len(times)):
        diff = (times[i] - times[i-1]).total_seconds()
        differences.append(round(diff, 2))
    return differences

def find_scenes(video_path, threshold=30.0):
    shot_boundaries = []

    # Create a video manager object for the video.
    video_manager = VideoManager([video_path])
    scene_manager = SceneManager()
    
    # Add the ContentDetector algorithm (with a threshold setting).
    scene_manager.add_detector(ContentDetector(threshold=threshold))
    
    # Start the video manager and perform scene detection.
    video_manager.set_downscale_factor()
    video_manager.start()

    # Detect scenes and return a list of scenes.
    scene_manager.detect_scenes(frame_source=video_manager)
    
    # Obtain the scenes by frame and timecode.
    scene_list = scene_manager.get_scene_list(video_manager.get_base_timecode())
    
    # Each scene is a tuple of (start, end) FrameTimecodes.
    # print('List of scene changes:')
    for i, scene in enumerate(scene_list):
        shot_boundaries.append(scene[1].get_timecode())
        #print(f'Scene {i+1}: Start {scene[0].get_timecode()} - End {scene[1].get_timecode()}')

    video_manager.release()
    return [shot_boundaries, scene_list]

def compute_time_difference(time1, time2):
    datetime1 = parse_timecode(time1)
    datetime2 = parse_timecode(time2)

    difference = datetime1 - datetime2 if datetime1 > datetime2 else datetime2 - datetime1

    return difference.total_seconds()

def get_fps(video_path):
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return fps

def timecode_to_frames(timecode, fps):
    time_obj = parse_timecode(timecode)
    total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
    frame_number = int(round(total_seconds * fps))
    return frame_number


query_video_boundaries, query_scenes = find_scenes(query_file_path)
query_video_differences = compute_differences(query_video_boundaries)[:-1]

if(query_video_differences == []):
    print("No Shot Boundaries present in the query video")
else:
    print("Query Video contains Shot Boundaries.\nStarting shot boundary match now:")
    # Read the JSON file
    with open('signatures/shotBoundSignature.json', 'r') as file:
        data = json.load(file)

    # Get the arrays from the data
    arrays = data['arrays']

    # Iterate over each array
    for key, array in arrays.items():
        print("Array", key + ":")
        original_video_boundaries = array
        original_video_differences = compute_differences(original_video_boundaries)
        # print(original_video_differences)

        if(len(original_video_differences) < len(query_video_differences)):
            print("No shot Boundary match found\n")
            continue

        start_index = find_subarray_np(original_video_differences, query_video_differences)[0]

        # print(original_video_boundaries)
        # print(query_video_boundaries)

        if(start_index == -1):
            print("No shot Boundary match found\n")
        else:
            print("Shot Boundary match found at index", start_index)
            print(key + " is under consideration\n")

In [None]:
# PERCEPTUAL IMAGE HASHING

import imagehash
from PIL import Image
import numpy as np
import cv2
import json
import os

def extract_frames(video_path):
    """ Extracts a fixed number of frames evenly spaced from a video. """
    cap = cv2.VideoCapture(video_path)
    frames = []
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break  # Exit loop if no more frames are available
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
        frames.append(frame)
    
    cap.release()
    return frames

def calculate_hashes(frames):
    """ Calculate perceptual hash for each frame. """
    return [imagehash.average_hash(Image.fromarray(frame)) for frame in frames]

def compare_hashes(query_hashes, candidate_hashes):
    """ Compare hashes from the query video to hashes from one candidate video using optimized numpy operations. """
    n = len(query_hashes)
    min_diff = float('inf')

    # Convert imagehash objects to NumPy arrays of the entire list
    query_hashes_np = np.stack([np.array(h.hash, dtype=int) for h in query_hashes])
    candidate_hashes_np = np.stack([np.array(h.hash, dtype=int) for h in candidate_hashes])

    # Calculate the windowed sum of differences over all possible subarrays of length n
    for offset in range(len(candidate_hashes) - n + 1):
        # Select the window segment of candidate hashes
        window = candidate_hashes_np[offset:offset + n]
        # Calculate the number of different bits (hamming distance) using XOR and sum
        current_diff = np.sum(query_hashes_np != window)
        if current_diff < min_diff:
            min_diff = current_diff

    return min_diff


def find_source_video(query_video_path):
    query_frames = extract_frames(query_video_path)
    query_hashes = calculate_hashes(query_frames)

    # print(len(query_frames))
    # print(len(query_hashes))

    json_file_path = 'signatures/perceptualHash.json'  # Specify your file path here

    with open(json_file_path, 'r') as file:
        precomputed_hashes = json.load(file)

    best_match = None
    smallest_diff = float('inf')

    # Compare the query hashes against each candidate video's hashes in the JSON file
    for video_file, candidate_hash_strings in precomputed_hashes.items():
        print("Checking " + video_file)
        candidate_hashes = [imagehash.hex_to_hash(h_str) for h_str in candidate_hash_strings]

        # Compare hashes to find the best match
        diff = compare_hashes(query_hashes, candidate_hashes)
        if diff < smallest_diff:
            smallest_diff = diff
            best_match = video_file

    return best_match


source_video = find_source_video(query_file_path)
source_name = os.path.splitext(source_video)[0]
print(f"The query video is most likely from: {source_name}")

In [None]:
# AUDIO - MFCC

from moviepy.editor import VideoFileClip
import librosa
import numpy as np
from datetime import timedelta, datetime
import cv2
import json
import time

def extract_audio(video_file_path, output_audio_path):
    video = VideoFileClip(video_file_path)

    audio = video.audio

    audio.write_audiofile(output_audio_path, codec='pcm_s16le')

    video.close()
    audio.close()

def find_best_match(input_features, query_features):
    best_match = {'score': np.inf, 'index': 0}
    num_frames = input_features.shape[1] - query_features.shape[1] + 1

    for i in range(num_frames):
        current_window = input_features[:, i:i + query_features.shape[1]]
        distance = np.linalg.norm(query_features - current_window)

        if distance < best_match['score']:
            best_match['score'] = distance
            best_match['index'] = i

    return best_match['index'], best_match['score']

# Function to convert seconds into hh:mm:ss.sss format
def seconds_to_timestamp(seconds):
    if(seconds == 0.0):
        return "00:00:00.000"
    td = timedelta(seconds=seconds)
    str_time = str(td)
    hours, minutes, seconds = str_time.split(':')
    seconds, microseconds = seconds.split('.')
    milliseconds = f"{int(microseconds):03d}"[:3]
    return f"{hours}:{minutes}:{seconds}.{milliseconds}"

# functions to convert the timestamp into frame number in the original video
def get_fps(video_path):
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return fps

def parse_timecode(time_str):
    return datetime.strptime(time_str, "%H:%M:%S.%f")

def timecode_to_frames(timecode, fps):
    time_obj = parse_timecode(timecode)
    total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + time_obj.microsecond / 1e6
    frame_number = int(round(total_seconds * fps))
    return frame_number

def load_array_from_json(filename):
    with open(filename, 'r') as json_file:
        data = json.load(json_file)
        return data
    

extract_audio(query_file_path, 'output_query_audio.wav')


query_audio, query_sampling_rate = librosa.load('output_query_audio.wav')
query_mfcc = librosa.feature.mfcc(y=query_audio, sr=query_sampling_rate, n_mfcc=13)

# Load all arrays from JSON file
loaded_data = np.load('signatures/mfcc_arrays.npz')
# Iterate over each key-value pair in the JSON file

key = source_name

input_mfcc = loaded_data[key]

start_time = time.time()

start_index, similarity_score = find_best_match(input_mfcc, query_mfcc)

end_time = time.time()

print("Time taken: ", seconds_to_timestamp(end_time - start_time))

HOP_LENGTH = 512
fps = get_fps(query_file_path)

start_time_seconds = start_index * HOP_LENGTH / query_sampling_rate
query_duration_seconds = len(query_audio) / query_sampling_rate
end_time_seconds = start_time_seconds + query_duration_seconds

start_time = seconds_to_timestamp(start_time_seconds)
end_time = seconds_to_timestamp(end_time_seconds)
start_frame_audio = timecode_to_frames(start_time, fps)
end_frame_audio = timecode_to_frames(end_time, fps)

print(f"For key: {key}")
print(f"Start Time: {start_time} seconds")
print(f"End Time: {end_time} seconds")
print(f"Start Frame: {start_frame_audio}")
print(f"End Frame: {end_frame_audio}\n")

In [None]:
# MOTION - OPTICAL FLOW

import cv2
import numpy as np
from tqdm import tqdm
import json
import os

def resize_frame(frame, target_size=(640, 480)):
    """ Resize the frame to a target size while maintaining aspect ratio. """
    dimensions = (target_size[1], target_size[0])  # OpenCV uses width first
    return cv2.resize(frame, dimensions, interpolation=cv2.INTER_LINEAR)

def extract_optical_flow_features(video_path, target_size):
    cap = cv2.VideoCapture(video_path)
    ret, prev_frame = cap.read()
    if not ret:
        print("Failed to read the first frame.")
        return []
   
    prev_frame = resize_frame(prev_frame, target_size)
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    features = []

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    pbar = tqdm(total=frame_count, desc=f'Extracting features- {video_path}')

    while ret:
        ret, frame = cap.read()
        if not ret:
            break
       
        frame = resize_frame(frame, target_size)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        flow_mag, flow_ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])

        mag_hist = np.histogram(flow_mag, bins=30, range=(0, 30))[0]
        ang_hist = np.histogram(flow_ang, bins=30, range=(0, 2 * np.pi))[0]
        features.append(np.concatenate((mag_hist, ang_hist)))

        prev_gray = gray
        pbar.update(1)

    cap.release()
    pbar.close()
    return np.array(features)

def find_start_frame(input_features, query_features):
    # Simple sliding window search
    min_diff = float('inf')
    start_frame = -1
    for i in range(len(input_features) - len(query_features) + 1):
        diff = np.sum((input_features[i:i+len(query_features)] - query_features)**2)
        if diff < min_diff:
            min_diff = diff
            start_frame = i

    return start_frame

def get_video_dimensions(video_path):
    """ Get dimensions of the video. """
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    if not ret:
        print("Could not read frame from video.")
        cap.release()
        return None
    height, width = frame.shape[:2]
    cap.release()
    return (width, height)


query_dimensions = get_video_dimensions(query_file_path)
print(query_dimensions)
# Extract features
query_features = extract_optical_flow_features(query_file_path, query_dimensions)

filename = 'signatures/opticalFlow.json'
with open(filename, 'r') as file:
    existing_data = json.load(file)

features = {}
for key, value in existing_data.items():
    features[key] = np.array(value)



input_features = features[source_name]


# Find the starting frame
start_frame_motion = find_start_frame(input_features, query_features)
print(f"The query video starts at frame {start_frame_motion} of the input video.")

In [None]:
# Video Player

import sys
import vlc
from PyQt5 import QtWidgets, QtGui
from PyQt5.QtGui import QIcon, QPixmap
from PyQt5.QtCore import *

class VideoPlayer(QtWidgets.QMainWindow):
    def __init__(self, master, video_path):
        super().__init__(master)
        self.setWindowTitle("PyQt VLC Video Player")
    
        self.instance = vlc.Instance()
        self.player = self.instance.media_player_new()

        self.central_widget = QtWidgets.QWidget(self)
        self.setCentralWidget(self.central_widget)

        self.layout = QtWidgets.QVBoxLayout()
        self.layout_button = QtWidgets.QHBoxLayout()
        self.central_widget.setLayout(self.layout)

        self.frame = QtWidgets.QFrame()
        self.layout.addWidget(self.frame)
        if sys.platform == "win32":
            self.player.set_hwnd(self.frame.winId())

        btn_size = QSize(150, 50)
    


        self.play_button = QtWidgets.QPushButton()
        self.play_button.clicked.connect(self.toggle_play_pause)
        self.play_button.setFixedSize(btn_size)
        pixmap = QPixmap("icons/play.png")
        self.play_button.setIcon(QIcon(pixmap))
        self.play_button.setIconSize(btn_size)
        self.layout_button.addWidget(self.play_button)
        
        self.stop_button = QtWidgets.QPushButton()
        self.stop_button.clicked.connect(self.stop_player)
        self.stop_button.setFixedSize(btn_size)
        pixmap = QPixmap("icons/stop.png")
        self.stop_button.setIcon(QIcon(pixmap))
        self.stop_button.setIconSize(btn_size)
        self.layout_button.addWidget(self.stop_button)
        
        self.skip_button = QtWidgets.QPushButton()
        self.skip_button.clicked.connect(self.skip_to)
        self.skip_button.setFixedSize(btn_size)
        pixmap = QPixmap("icons/fastfwd.png")
        self.skip_button.setIcon(QIcon(pixmap))
        self.skip_button.setIconSize(btn_size)
        self.layout_button.addWidget(self.skip_button)


        self.layout.addLayout(self.layout_button)


        self.media = self.instance.media_new(video_path)
        self.player.set_media(self.media)
    
        self.setGeometry(100, 100, 1400, 900)
        self.show()

    def toggle_play_pause(self):
        btn_size = QSize(150, 50)
        if self.player.is_playing():
            self.player.pause()
            pixmap = QPixmap("icons/play.png")
            self.play_button.setIcon(QIcon(pixmap))
            self.play_button.setIconSize(btn_size)
        else:
            self.player.play()
            pixmap = QPixmap("icons/pause.png")
            self.play_button.setIcon(QIcon(pixmap))
            self.play_button.setIconSize(btn_size)
    
    def stop_player(self):
        self.player.stop()
    
    def skip_to(self):
        fps = self.player.get_fps()

        frame_number, okPressed = QtWidgets.QInputDialog.getInt(self, "Skip to frame","Frame number:", 0, 0, 100000, 1)
        
        if okPressed:
            ms_time = (frame_number / fps) * 1000
            self.player.set_time(int(ms_time))

def main():
    app = QtWidgets.QApplication(sys.argv)
    video_path = 'dataset/originals/video1.mp4' 
    player = VideoPlayer(None, video_path)
    sys.exit(app.exec_())

if __name__ == "__main__":
    main()