In [None]:
# %pip install opencv-python
# %pip install imagehash

Collecting imagehash
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl (296 kB)
     ---------------------------------------- 0.0/296.5 kB ? eta -:--:--
     ---- ---------------------------------- 30.7/296.5 kB 1.3 MB/s eta 0:00:01
     ------------- ------------------------ 102.4/296.5 kB 1.5 MB/s eta 0:00:01
     ------------------------------ ------- 235.5/296.5 kB 2.0 MB/s eta 0:00:01
     -------------------------------------- 296.5/296.5 kB 1.8 MB/s eta 0:00:00
Collecting PyWavelets
  Downloading pywavelets-1.7.0-cp310-cp310-win_amd64.whl (4.3 MB)
     ---------------------------------------- 0.0/4.3 MB ? eta -:--:--
      --------------------------------------- 0.1/4.3 MB ? eta -:--:--
     ----- ---------------------------------- 0.6/4.3 MB 9.3 MB/s eta 0:00:01
     ------- -------------------------------- 0.8/4.3 MB 5.7 MB/s eta 0:00:01
     -------- ------------------------------- 0.9/4.3 MB 5.8 MB/s eta 0:00:01
     ---------- ----------------------------- 1.2/4.3 MB 5.2 M


[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import os
import cv2
import imagehash
from PIL import Image
import sys

SIMILARITY_TOLERANCE = 8  # Threshold for duplicate detection

In [9]:
def print_progress(percentage):
    """Prints a progress bar for frame extraction."""
    sys.stdout.write("\r[")
    for i in range(100):
        sys.stdout.write("#" if i <= percentage else " ")
    sys.stdout.write(f"] {percentage}%")
    sys.stdout.flush()


class FrameProcessor:
    def __init__(self, video_path, output_dir):
        self.video_path = video_path
        self.output_dir = output_dir
        self.phash_dict = {}

    def get_frame_phash(self, frame):
        """Compute perceptual hash of a grayscale frame."""
        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
        return imagehash.phash(frame_pil)

    def save_if_unique(self, frame, frame_count):
        """Save the frame if it is unique compared to previous frames."""
        phash = self.get_frame_phash(frame)
        for existing_phash in self.phash_dict:
            if phash - existing_phash <= SIMILARITY_TOLERANCE:
                return frame_count

        # Save unique frame
        frame_path = os.path.join(self.output_dir, f"frame_{frame_count:04d}.jpg")
        cv2.imwrite(frame_path, frame)
        self.phash_dict[phash] = frame_path
        return frame_count + 1

    def extract_frames(self):
        """Process video to extract unique frames."""
        cap = cv2.VideoCapture(self.video_path)
        if not cap.isOpened():
            print("Error: Unable to open video.")
            return 0

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_count, saved_count, percentage = 0, 0, 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            saved_count = self.save_if_unique(frame, saved_count)

            frame_count += 1
            new_percentage = int((frame_count / total_frames) * 100)
            if new_percentage != percentage:
                print_progress(new_percentage)
                percentage = new_percentage

        cap.release()
        cv2.destroyAllWindows()
        print("\nExtraction complete.")
        return saved_count

In [10]:
def main():
    # Hardcoded paths
    video_path = "./Solution.mp4"  # Replace with the path to your video
    output_dir = "./output"  # Replace with your desired output folder

    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    print("\nProcessing video...\n")
    processor = FrameProcessor(video_path, output_dir)
    extracted_count = processor.extract_frames()

    if extracted_count > 0:
        print(f"\nExtraction complete. {extracted_count} unique frames saved to '{output_dir}'.")
    else:
        print("No unique frames were extracted.")


if __name__ == "__main__":
    main()



Processing video...

[####################################################################################################] 100%####################                                                                               ] 20%############################################                                                        ] 43%################################################                                                    ] 47%##################################################                                                  ] 49%#############################################################                                       ] 60%##############################################################                                      ] 61%#####################################################################                               ] 68%#########################################################################                           ] 72%################################