In [None]:
import os
import shutil
from PIL import Image
import numpy as np
import uuid
import imagehash
def create_or_empty_directory(directory):
    if os.path.exists(directory):
        # Empty the directory if it exists
        shutil.rmtree(directory)
    # Create the directory (whether it was just emptied or didn't exist)
    os.makedirs(directory)

In [None]:
def find_and_sort_intro_frames(intro_frames_dir):
    """Recursively find and sort all .jpg files in the intro_frames directory."""
    intro_frames = []
    for root, dirs, files in os.walk(intro_frames_dir):
        for file in files:
            if file.endswith('.jpg'):
                intro_frames.append(os.path.join(root, file))
    
    # Sort intro frames numerically based on filename (handling prefix like 'frame_')
    intro_frames = sorted(intro_frames, key=lambda x: x)
    
    return intro_frames


In [None]:
def process_main_frames(main_frames_dir, intro_frames, test_dir, threshold):
    """Process the main frames directory and compare each frame with sorted intro frames."""
    # Gather all jpg files in the main_frames directory (including subdirectories)
    all_files = []
    for root, dirs, files in os.walk(main_frames_dir):
        for file in files:
            if file.endswith('.jpg'):
                all_files.append(os.path.join(root, file))
    
    # Sort all the gathered files by their absolute path
    all_files = sorted(all_files)

    to_ignore = []

    # Process each sorted file
    found_first = False
    found_last = False
    found_really_last = False
    counter = 0

    for main_frame_path in all_files:
        

        # Check if the file name is under 000070.jpg
        file_num = int(os.path.splitext(os.path.basename(main_frame_path))[0].zfill(6))
        if file_num <= 70:
            if os.path.dirname(main_frame_path) in to_ignore: continue
            print(main_frame_path)
            
            # Compare with every sorted intro frame
            found = False
            for intro_frame in intro_frames:
                if compare_images(main_frame_path, intro_frame, threshold):
                    unique_name = f"{os.path.splitext(os.path.basename(main_frame_path))[0]}_{uuid.uuid4().hex[:8]}.jpg"
                    destination_path = os.path.join(test_dir, unique_name)
                    found_first, found = True, True

                    # Copy the file with the unique name
                    shutil.copy(main_frame_path, destination_path)
                    print(f"\tCopied: {main_frame_path} to {destination_path}")
                    break  # Stop comparing once a match is found

            if found_first:
                found_really_last = True if (found_last and not found) else False
                found_last = True if (not found) else False
                counter += 1
                


            if found_first and found_last and found_really_last:
                to_ignore.append(os.path.dirname(main_frame_path))
                print(f"{os.path.dirname(main_frame_path)}\t{counter}")
                
                found_first = False
                found_last = False
                found_really_last = False
                counter = 0

                
        


In [None]:
def compare_images(image1_path, image2_path, threshold):
    """Compare two images using histogram correlation and return True if they are similar based on the threshold."""
    image1 = Image.open(image1_path).convert('RGB')
    image2 = Image.open(image2_path).convert('RGB')
    
    # Calculate histograms for each image
    hist1 = np.array(image1.histogram())
    hist2 = np.array(image2.histogram())
    
    # Calculate the correlation between the histograms
    correlation = np.corrcoef(hist1, hist2)[0, 1]

    # print(correlation)
    
    # Check if the correlation is above the threshold
    return correlation >= threshold

def process_images(intro_frames_dir, main_frames_dir, test_dir, threshold):
    """Process images and copy matching files to the test directory."""
    # Ensure the test directory exists
    os.makedirs(test_dir, exist_ok=True)

    # Load all intro frame paths
    intro_frames = sorted([os.path.join(intro_frames_dir, f) for f in os.listdir(intro_frames_dir) if f.endswith('.jpg')])
    
    # Step 1: Find and sort intro frames
    intro_frames = find_and_sort_intro_frames(intro_frames_dir)

    # Step 2: Process the main frames and compare with sorted intro frames
    process_main_frames(main_frames_dir, intro_frames, test_dir, threshold)



In [None]:
intro_frames_dir = "/media/coof/standardHDD/IntroFramesJPG"  # Update this path
main_frames_dir = "/media/coof/standardHDD/SampleMainFrames"    # Update this path
test_dir = "/media/coof/standardHDD/test"                  # Update this path
threshold = .6  # Set your RMSE threshold here
create_or_empty_directory(test_dir)
process_images(intro_frames_dir, main_frames_dir, test_dir, threshold)