In [12]:
import os
import cv2
import pandas as pd
from tqdm import tqdm

def detect_haar_eyes(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
    skipped_files = []

    for file in tqdm(os.listdir(input_dir)):
        file_path = os.path.join(input_dir, file)

        img = cv2.imread(file_path)
        if img is None:
            skipped_files.append((file, "Unreadable image"))
            continue

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        equalized = cv2.equalizeHist(gray)

        eyes = eye_cascade.detectMultiScale(
            equalized,
            scaleFactor=1.03,
            minNeighbors=2
        )

        if len(eyes) == 0:
            fallback = cv2.resize(img, (224, 224))
            fallback_name = os.path.splitext(file)[0] + "_fallback.jpg"
            cv2.imwrite(os.path.join(output_dir, fallback_name), fallback)
            skipped_files.append((file, "No eyes detected — fallback used"))
            continue

        for i, (x, y, w, h) in enumerate(eyes):
            pad = 5
            x1 = max(x - pad, 0)
            y1 = max(y - pad, 0)
            x2 = min(x + w + pad, img.shape[1])
            y2 = min(y + h + pad, img.shape[0])
            roi = img[y1:y2, x1:x2]
            roi_resized = cv2.resize(roi, (224, 224))
            save_path = os.path.join(output_dir, f"{os.path.splitext(file)[0]}_eye{i}.jpg")
            cv2.imwrite(save_path, roi_resized)

    return skipped_files

# Use base dir one level up from 'notebooks'
base_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))

input_path = os.path.join(base_dir, "data", "processed", "healthy_eye")
output_path = os.path.join(base_dir, "data", "haar_test_output", "healthy_eye")
log_path = os.path.join(base_dir, "data", "log")

# Run detection
skipped = detect_haar_eyes(input_path, output_path)

# Log skipped files
if skipped:
    os.makedirs(log_path, exist_ok=True)
    df = pd.DataFrame(skipped, columns=["filename", "reason"])
    df.to_csv(os.path.join(log_path, "skipped_haar_images.csv"), index=False)
    print(f"Logged {len(skipped)} skipped files to 'data/log/skipped_haar_images.csv'")
else:
    print("All eye crops successful.")

100%|██████████| 172/172 [00:05<00:00, 34.21it/s]

All eye crops successful.





In [13]:
import matplotlib.pyplot as plt
import cv2
import os

def visualize_skipped_images(skipped_list, input_dir, max_images=4):
    for i, (fname, reason) in enumerate(skipped_list[:max_images]):
        img_path = os.path.join(input_dir, fname)
        img = cv2.imread(img_path)

        if img is not None:
            plt.figure(figsize=(4, 4))
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.title(f"{fname} — {reason}")
            plt.axis('off')
            plt.show()  # Move inside the loop to show each image one-by-one
        else:
            print(f"Image {fname} could not be loaded.")

# Use same base_dir logic
base_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
input_path = os.path.join(base_dir, "data", "processed", "healthy_eye")

# Call the function with corrected path
visualize_skipped_images(skipped, input_path)