<a href="https://colab.research.google.com/github/sandhyaparna/CV-Training/blob/main/notebooks/segmentation/image_collection/split_images_on_redness_score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Split images collected based on the redness score into different number of bins/folders

* Redness score of each folder is calculated for all images in a folder
* Determine percentile threshold for the number of bins decided
* Images corresponding to their percentile score are moved into their respective folders


In [None]:
import shutil

import cv2
import numpy as np

In [None]:
# --- Global Configuration ---
NUM_BINS = 15  # Number of percentile folders (bins)

cam_ids_to_process = [
        176,
        177,
        178,
        179,
        180,
        184,
    ]  # 182 - Example list: replace with your actual IDs

In [None]:
def get_red_score(image_path):
    """
    Calculates the percentage of red pixels in an image using HSV color space.
    """
    img = cv2.imread(image_path)

    if img is None:
        return 0.0

    # Convert to HSV color space
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # Define the two ranges for RED in HSV (0-10 and 160-179)
    lower_red_1 = np.array([0, 50, 50])
    upper_red_1 = np.array([10, 255, 255])
    mask1 = cv2.inRange(hsv, lower_red_1, upper_red_1)

    lower_red_2 = np.array([160, 50, 50])
    upper_red_2 = np.array([179, 255, 255])
    mask2 = cv2.inRange(hsv, lower_red_2, upper_red_2)

    # Combine the two masks
    final_mask = mask1 + mask2

    total_pixels = img.shape[0] * img.shape[1]
    red_pixel_count = np.sum(final_mask > 0)

    # Return percentage
    return (red_pixel_count / total_pixels) * 100


In [None]:
def process_cam_id(cam_id):
    """
    Scores, bins, and copies images for a single cam_id based on red pixel percentage.
    """

    # --- Local Configuration ---
    # Define folder paths based on the current cam_id
    IMAGE_FOLDER = (
        f"HighRiver_BoneCrops/{cam_id}"  # Folder where original images are located
    )
    OUTPUT_FOLDER = (
        IMAGE_FOLDER + f"/red_{cam_id}"
    )  # Root folder for the output percentile folders

    print(f"\n--- Starting processing for CAM ID: {cam_id} ---")

    red_scores = []

    # 1. Score all images and store (score, filename)
    print("1. Calculating red scores for all images...")
    if not os.path.exists(IMAGE_FOLDER):
        print(f"⚠️ Error: Input folder '{IMAGE_FOLDER}' not found. Skipping.")
        return

    for filename in os.listdir(IMAGE_FOLDER):
        if filename.lower().endswith((".png", ".jpg", ".jpeg", ".bmp")):
            full_path = os.path.join(IMAGE_FOLDER, filename)
            score = get_red_score(full_path)
            # Only include images with a score > 0 for percentile distribution
            if score > 0:
                red_scores.append((score, filename))

    if not red_scores:
        print("No valid images found or no red color detected. Skipping.")
        return

    # 2. Determine percentile thresholds (Binning)
    scores_array = np.array([score for score, _ in red_scores])

    print(f"2. Determining percentile thresholds for {NUM_BINS} bins...")

    # Define the steps for percentiles (e.g., for 15 bins, 0, 6.67, 13.33, ..., 100)
    percentile_steps = np.linspace(0, 100, NUM_BINS + 1)

    # Calculate the exact score at the 1st, 2nd, ..., 15th bin boundaries
    # Note: We skip the 0th percentile score
    thresholds = np.percentile(scores_array, percentile_steps[1:])

    print("Calculated Score Thresholds (Cumulative):")
    for i, threshold in enumerate(thresholds):
        print(f"  Bin Max Score (Q{i + 1}): {threshold:.2f}%")
    print("-" * 40)

    # 3. Create output directories
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    bin_names = [f"{cam_id}_Q{i + 1}" for i in range(NUM_BINS)]
    for name in bin_names:
        os.makedirs(os.path.join(OUTPUT_FOLDER, name), exist_ok=True)

    # 4. Copy images to the correct percentile bin
    print("3. Copying images to percentile folders...")

    for score, filename in red_scores:
        source_path = os.path.join(IMAGE_FOLDER, filename)

        # Determine the correct bin index (0 to NUM_BINS-1)
        # Find the first threshold that the score is less than or equal to
        bin_index = 0
        while bin_index < NUM_BINS and score > thresholds[bin_index]:
            bin_index += 1

        # The logic ensures the score is placed in the lowest possible bin that meets the threshold.
        # If score > all thresholds, it should be placed in the highest bin (index NUM_BINS - 1)
        if bin_index >= NUM_BINS:
            bin_index = NUM_BINS - 1

        target_folder_name = bin_names[bin_index]
        target_folder_path = os.path.join(OUTPUT_FOLDER, target_folder_name)

        # Copy the file
        shutil.copy2(source_path, os.path.join(target_folder_path, filename))

    print(f"✅ Finished! Images copied to {NUM_BINS} folders inside '{OUTPUT_FOLDER}'.")


In [None]:
if __name__ == "__main__":
    # Define the list of camera IDs you want to process
    cam_ids_to_process = [
        176,
        177,
        178,
        179,
        180,
        184,
    ]  # 182 - Example list: replace with your actual IDs

    for current_cam_id in cam_ids_to_process:
        process_cam_id(current_cam_id)

    print("\n\n*** All camera IDs processed. ***")