In [1]:
import os
import cv2
import numpy as np
from pathlib import Path

# Define dataset paths
dataset_path = "dataset"
subfolders = ["above40", "AdditionalAbove40", "age17-40", "AdditionalAge17-40"]
output_path = "preprocessed_dataset"

# Create output directories
for subfolder in subfolders:
    Path(f"{output_path}/{subfolder}").mkdir(parents=True, exist_ok=True)

def apply_histogram_equalization(image):
    """Apply standard histogram equalization to grayscale image."""
    return cv2.equalizeHist(image)

def apply_clahe(image, clip_limit=2.0, tile_grid_size=(8, 8)):
    """Apply CLAHE to enhance contrast in grayscale image."""
    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
    return clahe.apply(image)

def preprocess_image(image_path, output_path, age_group):
    """Preprocess a single OPG scan image."""
    # Read image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Failed to load {image_path}")
        return

    # Apply preprocessing based on age group
    if "above40" in age_group.lower():
        # Use CLAHE for above-40 group (higher osteoporosis likelihood)
        processed_img = apply_clahe(img, clip_limit=2.5, tile_grid_size=(8, 8))
    else:
        # Use standard histogram equalization for younger group
        processed_img = apply_histogram_equalization(img)

    # Save processed image
    cv2.imwrite(output_path, processed_img)

def process_dataset():
    """Process all images in the dataset."""
    for subfolder in subfolders:
        input_dir = Path(dataset_path) / subfolder
        output_dir = Path(output_path) / subfolder
        
        for img_path in input_dir.glob("*.jpg"):  # Adjust extension if needed
            output_img_path = output_dir / img_path.name
            preprocess_image(str(img_path), str(output_img_path), subfolder)

if __name__ == "__main__":
    process_dataset()
    print("Preprocessing complete. Check 'preprocessed_dataset' folder.")

Preprocessing complete. Check 'preprocessed_dataset' folder.
