In [5]:
import os
import cv2
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

def apply_clahe_to_image(src_path, dst_path,
                         clip_limit=2.0,
                         tile_grid_size=(8, 8)):
    """
    Read an image, apply CLAHE on the L channel in LAB space,
    and write the processed image to dst_path.
    """
    img = cv2.imread(src_path)  # BGR format :contentReference[oaicite:7]{index=7}
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(
        clipLimit=clip_limit,
        tileGridSize=tile_grid_size
    )  # create CLAHE object :contentReference[oaicite:8]{index=8}
    cl = clahe.apply(l)
    limg = cv2.merge((cl, a, b))
    img_out = cv2.cvtColor(limg, cv2.COLOR_Lab2BGR)
    os.makedirs(os.path.dirname(dst_path), exist_ok=True)
    cv2.imwrite(dst_path, img_out)  # save enhanced image :contentReference[oaicite:9]{index=9}

def process_dataset(input_dir, output_dir, num_workers=1):
    """
    Walk through input_dir, construct matching output paths,
    and apply CLAHE to each image in parallel.
    """
    tasks = []
    for root, _, files in os.walk(input_dir):  # recursive directory walk
        for fname in files:
            if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
                rel = os.path.relpath(root, input_dir)
                src = os.path.join(root, fname)
                dst_folder = os.path.join(output_dir, rel)
                dst = os.path.join(dst_folder, fname)
                tasks.append((src, dst))

    # Parallel processing with ThreadPoolExecutor :contentReference[oaicite:10]{index=10}
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        list(tqdm(
            executor.map(lambda p: apply_clahe_to_image(*p), tasks),
            total=len(tasks),
            desc="CLAHE Preprocessing"
        ))  # progress bar :contentReference[oaicite:11]{index=11}

if __name__ == "__main__":
    # Adjust paths to your environment
    input_dir  = r"G:\Mega_Project\Dataset\5_Classes_TVT\train"
    output_dir = r"D:\clahe_train"
    process_dataset(input_dir, output_dir, num_workers=1)


CLAHE Preprocessing: 100%|██████████| 12556/12556 [03:18<00:00, 63.21it/s]
