Metadata also includes the tiemstamp each frame was extracted from

In [None]:
import os
from pathlib import Path
import cv2
import pandas as pd
from tqdm import tqdm
from ultralytics import YOLO
from PIL import Image
import imagehash

input_dir = r"F:\Bowerbird-ID_dataset"  # Directory containing bird folders
output_dir = "Fully_processed_frames"  # Directory for processed frames
yolo_model_path = "yolo11m-seg.pt"  # Path to YOLO model
sampling_interval = 240  # Extract a frame every 240 frames
max_frames_per_bird = 200  # Total instances required per bird folder
similarity_threshold = 5  # pHash similarity threshold

metadata_csv = os.path.join(output_dir, "processed_frames_metadata.csv")
os.makedirs(output_dir, exist_ok=True)

if not os.path.exists(metadata_csv):
    with open(metadata_csv, 'w') as f:
        f.write("Bird ID,Video Name,Frame Name,Timestamp (s)\n")  # Headers of the metadata file

# Load YOLO 
yolo_model = YOLO(yolo_model_path)

# Iterate over the bird folders

bird_folders = [f for f in Path(input_dir).iterdir() if f.is_dir()]
for bird_folder in tqdm(bird_folders, desc="Processing bird folders"):
    bird_name = bird_folder.name  # Extract bird ID, e.g. "B02"
    bird_output_dir = os.path.join(output_dir, bird_name)
    os.makedirs(bird_output_dir, exist_ok=True)

    valid_instances = 0  # Count of valid and unique frames for this bird
    unique_hashes = []  # Store pHash values for unique frames

    print(f"\nProcessing bird: {bird_name}")

    # Iterate over videos in the bird folder
    video_files = list(bird_folder.glob("*.MP4"))
    for video_file in tqdm(video_files, desc=f"Processing videos for {bird_name}", leave=False):
        if valid_instances >= max_frames_per_bird:
            break

        cap = cv2.VideoCapture(str(video_file))
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Sample frames
            if frame_count % sampling_interval == 0:
                # YOLO detection
                result = yolo_model.predict(frame, conf=0.6, verbose=False)
                if result[0].boxes:  # If bird detected
                    # Convert frame to PIL Image for pHash
                    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                    frame_hash = imagehash.phash(pil_image)

                    # pHash frame similarity
                    if all(abs(frame_hash - h) > similarity_threshold for h in unique_hashes):
                        frame_name = f"{Path(video_file).stem}_frame{frame_count}.png"
                        frame_path = os.path.join(bird_output_dir, frame_name)
                        cv2.imwrite(frame_path, frame)

                        # Get frame timestamp
                        timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000  # Convert to seconds

                        # Append metadata to file
                        with open(metadata_csv, 'a') as f:
                            f.write(f"{bird_name},{video_file.name},{frame_name},{timestamp}\n")

                        # Update unique hash and frame counter
                        unique_hashes.append(frame_hash) 
                        valid_instances += 1

                        if valid_instances >= max_frames_per_bird:
                            break

            frame_count += 1

        cap.release()

    if valid_instances < max_frames_per_bird:
        print(f"Only {valid_instances} valid frames were collected for bird {bird_name}.")

print(f"\nProcessed frames saved in: {output_dir}")
print(f"Metadata saved at: {metadata_csv}")

Processing bird folders:   0%|          | 0/16 [00:00<?, ?it/s]


Processing bird: B02


Processing bird folders:   6%|▋         | 1/16 [22:56<5:44:08, 1376.55s/it]


Processing bird: B03


Processing bird folders:  12%|█▎        | 2/16 [49:42<5:52:44, 1511.77s/it]


Processing bird: B04


Processing bird folders:  19%|█▉        | 3/16 [2:08:31<10:45:48, 2980.66s/it]


Processing bird: B07


Processing bird folders:  25%|██▌       | 4/16 [2:20:17<6:56:31, 2082.59s/it] 


Processing bird: B11


Processing bird folders:  31%|███▏      | 5/16 [3:08:28<7:15:16, 2374.21s/it]


Processing bird: B18


Processing bird folders:  38%|███▊      | 6/16 [3:25:41<5:19:41, 1918.19s/it]


Processing bird: B23


Processing bird folders:  44%|████▍     | 7/16 [3:44:42<4:09:37, 1664.16s/it]


Processing bird: B29


Processing bird folders:  50%|█████     | 8/16 [6:12:28<8:47:34, 3956.85s/it]


Processing bird: B30


Processing bird folders:  56%|█████▋    | 9/16 [6:58:46<6:58:37, 3588.15s/it]


Processing bird: B31


Processing bird folders:  62%|██████▎   | 10/16 [7:58:56<5:59:30, 3595.07s/it]


Processing bird: B50


Processing bird folders:  69%|██████▉   | 11/16 [8:18:15<3:57:26, 2849.29s/it]


Processing bird: B52


Processing bird folders:  75%|███████▌  | 12/16 [8:32:06<2:29:01, 2235.31s/it]


Processing bird: B05


Processing bird folders:  81%|████████▏ | 13/16 [9:02:05<1:45:09, 2103.13s/it]


Processing bird: B26


Processing bird folders:  88%|████████▊ | 14/16 [9:16:58<57:55, 1737.64s/it]  


Processing bird: B47


Processing bird folders:  94%|█████████▍| 15/16 [9:32:03<24:46, 1486.63s/it]


Processing bird: B49


Processing bird folders: 100%|██████████| 16/16 [9:46:58<00:00, 2201.15s/it]


Processed frames saved in: Fully_processed_frames
Metadata saved at: Fully_processed_frames\processed_frames_metadata.csv





The script iterates through the folders to find videos for each bird:
* Using OpenCV for frame extraction at a specified interval
* Filter out redundant frames through Perceptual Hashing (pHash). I decided to start with pHash as it is faster and less computationally expensive and since I don't need clustering. If it fails I will consdier instead of other feature matching alternatives like ORB, SIFT, or SURF (within k-means algorithm) for more advanced comparisons. 
* Save the selected frames into bird-specific folders

It also generates a CSV recording:
* Original video name
* Timestamp of the extracted frame in the original video
* Filename of the frame

This is debatable, but as of now I have decided to filter similar frames (using pHASH) that have not been mask segmented. In that way, te similarity is checked considering how close/far birds are to the camera, or where in the frame they apepar, rather than focusing strictly on the posture, which I assume would be the case if I filtered for similar frames after the image has been cropped
In general, pHash works best when applied to full images with consistent backgrounds, as it captures overall image structure. Removing the background would probably lead to incorrectly marking unique frames as duplicates

## Frame extraction

In [None]:
import os
from pathlib import Path
import cv2
import csv
from tqdm import tqdm 

input_dir = r"F:\Bowerbird-ID_dataset"  # Path to the hard drive containing all valid videos
output_dir = r"C:\Users\Athena\Documents\Bowerbird-ID\3_Frame_sampling\1_Raw_extracted_frames"
sampling_interval = 120  # Extract a frame every 120 frames
max_frames_per_bird = 300  # Max frames per bird folder
max_videos_per_bird = 100  # Process only 10 videos per bird folder

os.makedirs(output_dir, exist_ok=True)
csv_data = []

# Iterate through bird folders directly in the input directory
for bird_folder in tqdm(Path(input_dir).iterdir()):
    if bird_folder.is_dir():  # Check if it's a folder
        bird_name = bird_folder.name  # Get bird ID (e.g., "B02")
        bird_output_dir = os.path.join(output_dir, bird_name)
        os.makedirs(bird_output_dir, exist_ok=True)

        frame_paths = []
        video_count = 0

        for video_file in bird_folder.glob("*.MP4"):
            if video_count >= max_videos_per_bird:
                break

            cap = cv2.VideoCapture(str(video_file))
            frame_count = 0
            saved_count = 0
            video_name = Path(video_file).stem

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret or saved_count >= max_frames_per_bird:
                    break

                # Save frames at the specified sampling interval
                if frame_count % sampling_interval == 0:
                    timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
                    frame_name = f"{video_name}_frame{frame_count}.png"
                    frame_path = os.path.join(bird_output_dir, frame_name)

                    cv2.imwrite(frame_path, frame)
                    frame_paths.append(frame_path)
                    csv_data.append([bird_name, video_file.name, timestamp, frame_name])
                    saved_count += 1

                frame_count += 1

            cap.release()
            video_count += 1

# Save metadata to CSV
metadata_csv = os.path.join(output_dir, "raw_frames_metadata.csv")
with open(metadata_csv, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["Bird ID", "Video Name", "Timestamp", "Frame Name"])  # Include Bird ID
    writer.writerows(csv_data)

print(f"Frame extraction complete. Metadata saved to {metadata_csv}")

## YOLO filtering

In [None]:
from ultralytics import YOLO
import pandas as pd
import os
import shutil
from tqdm import tqdm

yolo_model = YOLO('yolo11m.pt') 

raw_metadata_csv = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/1_Raw_extracted_frames/raw_frames_metadata.csv"
raw_frames_dir = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/1_Raw_extracted_frames"  # Directory containing raw frames with subfolders
filtered_frames_dir = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/2_YOLO_filtered_frames"  # Directory for valid frames
filtered_out_frames_dir = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/Filtered_out_frames_no_bird"  # Directory for invalid frames

os.makedirs(filtered_frames_dir, exist_ok=True)
os.makedirs(filtered_out_frames_dir, exist_ok=True)

metadata = pd.read_csv(raw_metadata_csv)

filtered_metadata = []

print("Running YOLO on raw frames...")
for _, row in tqdm(metadata.iterrows(), total=len(metadata)):
    frame_name = row["Frame Name"]  
    bird_id = row["Bird ID"]  

    frame_path = os.path.join(raw_frames_dir, bird_id, frame_name)
    if not os.path.exists(frame_path):
        print(f"Warning: Frame not found at {frame_path}")
        continue

    # Run YOLO det
    results = yolo_model.predict(frame_path, conf=0.3, verbose=False)

    if results[0].boxes:  # If a bird is detected...
        # Create subdirectory structure in the filtered frames directory
        filtered_subfolder_dir = os.path.join(filtered_frames_dir, bird_id)
        os.makedirs(filtered_subfolder_dir, exist_ok=True)

        # Copy valid frame to the corresponding subdirectory
        valid_frame_path = os.path.join(filtered_subfolder_dir, frame_name)
        shutil.copy(frame_path, valid_frame_path)  # Copy valid frame

        # Add row to new metadata
        filtered_metadata.append(row)
    else:  # If no bird is detected...
        # Copy invalid frame directly to the "filtered out" directory (no bird ID subfolder)
        invalid_frame_path = os.path.join(filtered_out_frames_dir, frame_name)
        shutil.copy(frame_path, invalid_frame_path)  # Copy invalid frame directly

# Save updated metadata for valid frames
filtered_metadata_csv = os.path.join(filtered_frames_dir, "yolodet_frames_metadata.csv")
pd.DataFrame(filtered_metadata).to_csv(filtered_metadata_csv, index=False)

print(f"Done!")
print(f"Filtered frames (with birds) saved in: {filtered_frames_dir}")
print(f"Filtered-out frames (no birds) saved in: {filtered_out_frames_dir}")
print(f"Updated metadata for valid frames saved at: {filtered_metadata_csv}")

## pHASH filtering

In [None]:
import pandas as pd
import shutil

filtered_metadata_csv = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/2_YOLO_filtered_frames/yolodet_frames_metadata.csv"
filtered_frames_dir = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/2_YOLO_filtered_frames"  # Directory for valid YOLO-filtered frames
phash_filtered_frames_dir = "/Users/sarah/Bowerbird-ID/3_Frame_sampling/3_pHASH_filtered_frames"  # Directory for pHASH-filtered frames
os.makedirs(phash_filtered_frames_dir, exist_ok=True)

# Perceptual Hashing Parameters
similarity_threshold = 5

# Load metadata
metadata = pd.read_csv(filtered_metadata_csv)

unique_metadata = []
unique_hashes = {}

total_frames = len(metadata)
deleted_frames = 0

print("Filtering similar frames across bird folders using perceptual hashing...")
for _, row in tqdm(metadata.iterrows(), total=total_frames):
    frame_name = row["Frame Name"]  
    bird_id = row["Bird ID"]  

    frame_path = os.path.join(filtered_frames_dir, bird_id, frame_name)
    if not os.path.exists(frame_path):
        print(f"Warning: Frame not found at {frame_path}")
        continue

    img = Image.open(frame_path)
    img_hash = imagehash.phash(img)

    if bird_id not in unique_hashes:
        unique_hashes[bird_id] = []

    # Check if the hash is unique for this bird
    if all(img_hash - h > similarity_threshold for h in unique_hashes[bird_id]):
        # Add to unique list
        unique_metadata.append(row)
        unique_hashes[bird_id].append(img_hash)

        # Create subdirectory structure for pHASH-filtered frames
        phash_subfolder_dir = os.path.join(phash_filtered_frames_dir, bird_id)
        os.makedirs(phash_subfolder_dir, exist_ok=True)

        # Copy unique frame to the corresponding subdirectory
        unique_frame_path = os.path.join(phash_subfolder_dir, frame_name)
        shutil.copy(frame_path, unique_frame_path)
    else:
        # Increment deleted frames count
        deleted_frames += 1

# Save updated metadata for pHASH-filtered frames
phash_metadata_csv = os.path.join(phash_filtered_frames_dir, "phash_frames_metadata.csv")
pd.DataFrame(unique_metadata).to_csv(phash_metadata_csv, index=False)

kept_frames = total_frames - deleted_frames
deleted_percentage = (deleted_frames / total_frames) * 100 if total_frames > 0 else 0

print("\nFrame Filtering Summary:")
print(f"Total frames processed: {total_frames}")
print(f"Frames kept: {kept_frames}")
print(f"Frames deleted: {deleted_frames}")
print(f"Percentage of frames deleted: {deleted_percentage:.2f}%")
print(f"Unique frames saved in: {phash_filtered_frames_dir}")
print(f"Updated metadata for unique frames saved at: {phash_metadata_csv}")