In [12]:
import cv2
import os
import shutil
import numpy as np

def calculate_class_histograms(sample_paths):
    """
    Calculate mean HSV histograms for each class using sample images.
    :param sample_paths: List of directories containing sample images for each class.
    :return: List of reference histograms for each class.
    """
    histograms = []
    for class_path in sample_paths:
        h_hist_sum = None
        image_files = os.listdir(class_path)
        sample_size = min(20, len(image_files))  # Use 20 images or fewer if fewer images exist

        for img_name in image_files[:sample_size]:
            img_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            h_hist = cv2.calcHist([hsv_img], [0], None, [180], [0, 180])  # Hue histogram
            h_hist = cv2.normalize(h_hist, h_hist).flatten()

            if h_hist_sum is None:
                h_hist_sum = h_hist
            else:
                h_hist_sum += h_hist

        # Average histogram for the class
        h_hist_avg = h_hist_sum / sample_size
        histograms.append(h_hist_avg)
        print(f"Computed histogram for class: {class_path}")
    return histograms

def compare_histogram(hsv_img, reference_hist):
    """
    Compare an image's HSV histogram with the reference histogram using correlation.
    :param hsv_img: HSV image to compare.
    :param reference_hist: Reference histogram for the class.
    :return: Correlation value.
    """
    h_hist = cv2.calcHist([hsv_img], [0], None, [180], [0, 180])
    h_hist = cv2.normalize(h_hist, h_hist).flatten()
    similarity = cv2.compareHist(h_hist, reference_hist, cv2.HISTCMP_CORREL)
    return similarity

def filter_images_by_class(input_paths, included_paths, excluded_paths, reference_histograms, threshold=0.5):
    """
    Filter images based on histogram similarity with reference class histograms.
    :param input_paths: List of input directories for each class.
    :param included_paths: List of output directories for included images.
    :param excluded_paths: List of output directories for excluded images.
    :param reference_histograms: List of reference histograms for each class.
    :param threshold: Similarity threshold to include an image.
    """
    for class_idx, input_path in enumerate(input_paths):
        included_path = included_paths[class_idx]
        excluded_path = excluded_paths[class_idx]
        reference_hist = reference_histograms[class_idx]

        os.makedirs(included_path, exist_ok=True)
        os.makedirs(excluded_path, exist_ok=True)

        for img_name in os.listdir(input_path):
            img_path = os.path.join(input_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

            # Compare histogram similarity
            similarity = compare_histogram(hsv_img, reference_hist)
            if similarity > threshold:
                shutil.copy(img_path, os.path.join(included_path, img_name))
            else:
                shutil.copy(img_path, os.path.join(excluded_path, img_name))

        print(f"Finished processing {input_path} with threshold {threshold}")




In [14]:


# Define paths
input_paths = [
    r"F:\xxxxxxxxxxxxx\WSI\Patches\test_data_wsi-20-80\class_0",
    r"F:\xxxxxxxxxxxxx\WSI\Patches\test_data_wsi-20-80\class_1+",
    r"F:\xxxxxxxxxxxxx\WSI\Patches\test_data_wsi-20-80\class_2+",
    r"F:\xxxxxxxxxxxxx\WSI\Patches\test_data_wsi-20-80\class_3+",
]

included_paths = [
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\included\test\class_0",
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\included\test\class_1+",
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\included\test\class_2+",
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\included\test\class_3+",
]

excluded_paths = [
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\excluded\test\class_0",
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\excluded\test\class_1+",
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\excluded\test\class_2+",
    r"F:\xxxxxxxxxxxxx\color Histogram intensity\excluded\test\class_3+",
]

# Step 1: Calculate reference histograms
reference_histograms = calculate_class_histograms(input_paths)

# Step 2: Filter images based on histogram similarity
filter_images_by_class(input_paths, included_paths, excluded_paths, reference_histograms, threshold=0.7)


Computed histogram for class: F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_0
Computed histogram for class: F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_1+
Computed histogram for class: F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_2+
Computed histogram for class: F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_3+
Finished processing F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_0 with threshold 0.7
Finished processing F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_1+ with threshold 0.7
Finished processing F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_2+ with threshold 0.7
Finished processing F:\Serajuns File\HER2 IHC\Data 20%80% from WSI\Patches\test_data_wsi-20-80\class_3+ with threshold 0.7
