In [None]:
import cv2
import numpy as np
import os
from mtcnn import MTCNN
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import tensorflow as tf

# ==============================
# TensorFlow GPU setup
# ==============================
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
        # Only allow 2GB per process (optional, tweak as needed)
        # tf.config.experimental.set_virtual_device_configuration(
        #     gpu, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)]
        # )

# ==============================
# Constants
# ==============================
REFERENCE_FACIAL_POINTS = np.array([
    [38.2946, 51.6963],
    [73.5318, 51.5014],
    [56.0252, 71.7366],
    [41.5493, 92.3655],
    [70.7299, 92.2041]
], dtype=np.float32)

RAW_DIR = "/home/sandeshprasai/Final_Semester_Project/AI_Attendance_System/ai-ml-model/DataSets/processed/ThirdLab"
OUTPUT_DIR = "/home/sandeshprasai/Final_Semester_Project/AI_Attendance_System/ai-ml-model/DataSets/processed/ThirdLap"

os.makedirs(OUTPUT_DIR, exist_ok=True)

# ==============================
# Global detector
# ==============================
_detector = None

def get_detector():
    global _detector
    if _detector is None:
        # Use CPU to avoid GPU OOM
        _detector = MTCNN(device='cpu')  # âœ… safe for large batch
    return _detector

# ==============================
# Alignment
# ==============================
def align_face_to_template(img, landmarks):
    src_pts = np.array([
        landmarks['left_eye'],
        landmarks['right_eye'],
        landmarks['nose'],
        landmarks['mouth_left'],
        landmarks['mouth_right']
    ], dtype=np.float32)

    tform, _ = cv2.estimateAffinePartial2D(src_pts, REFERENCE_FACIAL_POINTS)
    if tform is None:
        return None

    return cv2.warpAffine(img, tform, (112, 112), borderValue=0)

# ==============================
# Lighting normalization
# ==============================
def solve_lighting(img):
    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    yuv[:, :, 0] = clahe.apply(yuv[:, :, 0])
    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)

# ==============================
# Worker
# ==============================
def process_image(img_path):
    try:
        relative_path = os.path.relpath(img_path, RAW_DIR)
        output_path = os.path.join(OUTPUT_DIR, relative_path)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        if os.path.exists(output_path):
            return True

        img = cv2.imread(img_path)
        if img is None:
            return False

        detector = get_detector()
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = detector.detect_faces(img_rgb)

        if not results:
            return False

        best_face = max(results, key=lambda x: x['confidence'])
        aligned = align_face_to_template(img, best_face['keypoints'])

        if aligned is None:
            return False

        final_img = solve_lighting(aligned)
        cv2.imwrite(output_path, final_img)
        return True

    except Exception as e:
        print(f"[ERROR] {img_path}: {e}")
        return False

# ==============================
# Main
# ==============================
if __name__ == "__main__":
    # Gather all image files
    image_files = [
        os.path.join(root, file)
        for root, _, files in os.walk(RAW_DIR)
        for file in files
        if file.lower().endswith(('.jpg', '.jpeg', '.png'))
    ]

    print(f"Total images found: {len(image_files)}")

    # Use threads instead of processes
    with ThreadPoolExecutor(max_workers=2) as executor:  # tweak to 1-2 for stability
        list(
            tqdm(
                executor.map(process_image, image_files),
                total=len(image_files),
                desc="Processing Faces"
            )
        )

    print("Processing completed.")
