In [None]:
# ===============================================
# LIGHT PREPROCESSING + FACE DETECTION PIPELINE 
# (Linked with AccoID & Timestamp)
# ===============================================
# This notebook processes earlier YOLO-based "close-up person crops" and performs:
# 1. Controlled image enhancement specifically tuned for CCTV footage.
# 2. Face detection using a backend suitable for non-frontal, low-quality, or angled imagery.
# 3. Metadata recovery (AccoID + timestamp) from filenames, preserving linkage between
#    transaction time and detected faces.
# 4. Export of clean, normalized face crops prepared for DeepFace age/gender inference.
#
# This stage is critical for:
# - Maximizing facial detection accuracy under CCTV conditions.
# - Preserving temporal and transactional linkage required for the thesis dataset.
# - Producing consistent face crops suitable for downstream neural models.
from deepface import DeepFace
import cv2, os, re
import numpy as np
from tqdm import tqdm

# === CONFIGURATION ===
# INPUT_DIR contains YOLO-generated person crops (full upper-body crops).
# OUTPUT_DIR stores face-only crops with enhanced clarity.
INPUT_DIR = "../data/crops_person/cafe_POS_person_crops_2025_11_05"
OUTPUT_DIR = "../data/crops_face/20251105/1_cafe_pos_person_faces_yunet"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# DETECTOR:
# - YuNet is a lightweight CNN face detector optimized for real-time systems
# - Performs significantly better than traditional Haar cascades under CCTV conditions
# - Handles rotations, partial occlusions, and varying illumination well.
DETECTOR = "yunet"

# ENFORCE_DETECTION:
# - DeepFace raises an exception if no face is detected when this is True.
# - This ensures no silent failures where frames pass undetected.
ENFORCE_DETECTION = True

# MIN_CONFIDENCE:
# - YuNet sometimes returns low-confidence detections in noisy CCTV frames.
# - A threshold of 0.65 balances recall (not missing faces) with precision (avoiding false positives).
MIN_CONFIDENCE = 0.65

# FACE SIZE NORMALIZATION:
# - TARGET_MIN ensures extremely small crops don't enter the pipeline.
# - TARGET_MAX prevents excessively large crops from destabilizing the model input.
# - All faces become scale-normalized, stabilizing age/gender predictions.
TARGET_MIN = 192
TARGET_MAX = 384

# EXPAND_RATIO:
# - Expands the bounding box around the detected face.
# - This preserves contextual cues (forehead, hairline, chin shape) 
#   that improve age estimation reliability.
EXPAND_RATIO = 0.5

# === LIGHT IMAGE PREPROCESSING ===
# CCTV footage is often low-light, noisy, contrast-poor, and compressed.
# This preprocessing block applies gentle enhancement without altering facial identity.
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

def preprocess_soft(img):
    """
    Apply subtle enhancement to improve detection robustness:
    1. CLAHE on the L-channel (lightness) improves local contrast in dim environments.
    2. Gamma correction brightens midtones without blowing out highlights.
    3. Mild denoising reduces compression artifacts common in CCTV footage.
    This avoids aggressive transformations that would distort the facial structure,
    preserving authenticity for downstream demographic inference.
    """
    # Convert to LAB to isolate luminance for CLAHE
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l = clahe.apply(l)  # boost local contrast
    lab = cv2.merge((l, a, b))
    img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

    # Gentle gamma correction
    gamma = 1.08
    lut = np.array([((i / 255.0) ** (1 / gamma)) * 255 
                    for i in np.arange(256)]).astype("uint8")
    img = cv2.LUT(img, lut)

    # Mild denoising (helps with grainy CCTV motion noise)
    img = cv2.fastNlMeansDenoisingColored(img, None, 3, 3, 5, 15)
    return img

# === FILENAME PARSING FUNCTION ===
def parse_filename_for_info(filename):
    """
    Extracts the AccoID and timestamp embedded during YOLO-person cropping.
    Pattern expected:
        AccoID_1147705_20251019_083301_person_0.jpg
    Why this matters:
    - It maintains linkage between the transaction and the shopperâ€™s face.
    - It is critical for constructing time-aligned demographic datasets.
    If parsing fails, defaults are assigned to avoid pipeline interruption.
    """
    match = re.search(r"AccoID_(\d+)_(\d{8}_\d{6})", filename)
    if match:
        acco_id = match.group(1)
        timestamp = match.group(2)
    else:
        acco_id, timestamp = "unknown", "00000000_000000"
    return acco_id, timestamp

# === PROCESSING LOOP ===
# Load all person crops in consistent order for deterministic execution.
images = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith((".jpg", ".png"))])
print(f"Processing {len(images)} person crops...")

for i, img_name in enumerate(tqdm(images)):
    img_path = os.path.join(INPUT_DIR, img_name)
    img = cv2.imread(img_path)
    if img is None:
        # Skips corrupted or partially written images
        continue

    # Parse metadata before preprocessing
    acco_id, timestamp = parse_filename_for_info(img_name)

    # Apply controlled enhancement to maximize face detector robustness
    img = preprocess_soft(img)
    h_img, w_img = img.shape[:2]

    try:
        # DeepFace handles both detection + preprocessing internally.
        # extract_faces returns one or many faces depending on the frame content.
        detections = DeepFace.extract_faces(
            img_path=img_path,
            detector_backend=DETECTOR,
            enforce_detection=ENFORCE_DETECTION,
            align=False  # Disable alignment to preserve camera pose geometry
        )

        if not detections:
            continue

        for j, face_info in enumerate(detections):
            # Confidence filtering to prevent false positives
            conf = face_info.get("confidence", 1.0)
            if conf < MIN_CONFIDENCE:
                continue

            # The detector returns bounding box geometry
            area = face_info.get("facial_area", {})
            if not area:
                continue

            x, y, w, h = area["x"], area["y"], area["w"], area["h"]

            # --- Bounding box expansion ---
            # Enlarging ensures the final crop includes:
            # - complete forehead (important for age lines)
            # - hair shape (gender classifier cue)
            # - jawline and chin proportions (age and gender)
            dx, dy = int(w * EXPAND_RATIO), int(h * EXPAND_RATIO)
            x1, y1 = max(x - dx, 0), max(y - dy, 0)
            x2, y2 = min(x + w + dx, w_img), min(y + h + dy, h_img)
            face_crop = img[y1:y2, x1:x2]

            # --- Adaptive resizing ---
            # Ensures consistent face scale across videos and camera angles.
            # Prevents overfitting due to resolution variance during model inference.
            h_c, w_c = face_crop.shape[:2]
            scale = min(
                max(TARGET_MIN / min(h_c, w_c), 1.0),
                TARGET_MAX / max(h_c, w_c)
            )
            new_size = (int(w_c * scale), int(h_c * scale))
            face_crop = cv2.resize(face_crop, new_size, interpolation=cv2.INTER_CUBIC)

            # Save output crop using original transactional metadata
            out_name = f"AccoID_{acco_id}_{timestamp}.jpg"
            out_path = os.path.join(OUTPUT_DIR, out_name)
            cv2.imwrite(out_path, face_crop, [int(cv2.IMWRITE_JPEG_QUALITY), 95])

    except Exception:
        # Fail silently on detection error (DeepFace often raises edge-case exceptions)
        continue

print(f"\nEnhanced face crops saved to: {OUTPUT_DIR}")
print(f"Total cropped faces: {len(os.listdir(OUTPUT_DIR))}")


ðŸ§  Processing 29214 person crops...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 29214/29214 [2:11:11<00:00,  3.71it/s]  


âœ… Enhanced face crops saved to: ../data/crops_face/20251105/1_cafe_pos_person_faces_yunet
ðŸ“¸ Total cropped faces: 7625





In [None]:
# ===============================================
# LIGHT PREPROCESSING + FACE DETECTION PIPELINE
# (Linked with AccoID & Timestamp)
# ===============================================
# This script performs face detection on YOLO-generated person crops.
# It applies controlled image enhancement to improve facial visibility
# under CCTV-specific constraints (low light, compression noise, angle variation).
# Each detected face is cropped, padded, resized, and exported with its original
# transaction metadata (AccoID + timestamp), ensuring full traceability.
#
# This prepares clean, normalized inputs for subsequent DeepFace
# age/gender inference.
from deepface import DeepFace
import cv2, os, re
import numpy as np
from tqdm import tqdm

# ============================================================
# CONFIGURATION
# ============================================================
# INPUT_DIR: directory containing person-level crops extracted with YOLO.
# OUTPUT_DIR: final face-only crops ready for inference.
INPUT_DIR = "../data/crops_person/cafe_POS_person_crops_2025_11_07"
OUTPUT_DIR = "../data/crops_face/20251107/1_cafe_pos_person_faces_yunet"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Face detector selection:
# YuNet is used as it handles angled faces, low-resolution CCTV frames,
# and partial occlusions more reliably than Haar cascades or SSD.
DETECTOR = "yunet"

# enforce_detection=True ensures DeepFace raises an error if no face is detected.
# We catch and ignore these errors to avoid interrupting the pipeline.
ENFORCE_DETECTION = True

# Minimum confidence for accepting a detected face.
MIN_CONFIDENCE = 0.65

# Target minimum and maximum dimensions for the resized face crop.
# This normalizes scale variation and stabilizes downstream model performance.
TARGET_MIN = 192
TARGET_MAX = 384

# Expand bounding box to include contextual cues:
# - forehead, chin, hairline significantly improve age/gender prediction.
EXPAND_RATIO = 0.5

# ============================================================
# LIGHT IMAGE PREPROCESSING
# ============================================================
# CCTV footage tends to be dim, noisy, compressed, and low contrast.
# This preprocessing enhances images WITHOUT altering visual identity.
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

def preprocess_soft(img):
    """
    Apply subtle enhancement on CCTV person crops:
    1. CLAHE on the L-channel (LAB space) improves local contrast while preserving colour.
    2. Mild gamma correction brightens midtones without distorting highlights.
    3. Fast Non-Local Means denoising reduces compression noise typical of CCTV.
    These combined steps improve face detection accuracy while preserving integrity
    for demographic inference.
    """
    # Convert to LAB to isolate luminance for CLAHE
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l = clahe.apply(l)
    lab = cv2.merge((l, a, b))
    img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

    # Gentle gamma correction (slightly brightens without washing out detail)
    gamma = 1.08
    lut = np.array([((i / 255.0) ** (1 / gamma)) * 255
                    for i in np.arange(256)]).astype("uint8")
    img = cv2.LUT(img, lut)

    # Mild denoising to clean compression artifacts and low-light noise
    img = cv2.fastNlMeansDenoisingColored(img, None, 3, 3, 5, 15)
    return img

# ============================================================
# FILENAME PARSING FUNCTION
# ============================================================
def parse_filename_for_info(filename):
    """
    Extract AccoID and timestamp embedded in the filename.
    The naming convention is inherited from the YOLO frame extraction script:
        AccoID_<id>_<YYYYMMDD_HHMMSS>.jpg

    Reverse-engineering this metadata preserves the transactional linkage,
    which is critical for your thesis dataset (time-aligned demographics).
    """
    match = re.search(r"AccoID_(\d+)_(\d{8}_\d{6})", filename)
    if match:
        acco_id = match.group(1)
        timestamp = match.group(2)
    else:
        # Fallback values in case of unexpected filenames
        acco_id, timestamp = "unknown", "00000000_000000"
    return acco_id, timestamp

# ============================================================
# PROCESSING LOOP
# ============================================================
# List all person-crop images in sorted order for deterministic execution.
images = sorted([f for f in os.listdir(INPUT_DIR) if f.lower().endswith((".jpg", ".png"))])
print(f"Processing {len(images)} person crops...")

for i, img_name in enumerate(tqdm(images)):
    img_path = os.path.join(INPUT_DIR, img_name)
    img = cv2.imread(img_path)
    if img is None:
        # Skip corrupted files or incomplete writes
        continue

    # Recover transaction metadata from filename
    acco_id, timestamp = parse_filename_for_info(img_name)

    # Apply gentle enhancement to improve face detectability
    img = preprocess_soft(img)
    h_img, w_img = img.shape[:2]

    try:
        # DeepFace extracts one or more faces per image depending on scene complexity.
        # align=False preserves original face orientation which is important for real-world CCTV.
        detections = DeepFace.extract_faces(
            img_path=img_path,
            detector_backend=DETECTOR,
            enforce_detection=ENFORCE_DETECTION,
            align=False
        )

        if not detections:
            continue

        for j, face_info in enumerate(detections):

            # Skip low-confidence detections to avoid false positives
            conf = face_info.get("confidence", 1.0)
            if conf < MIN_CONFIDENCE:
                continue

            # Extract bounding box from DeepFace output
            area = face_info.get("facial_area", {})
            if not area:
                continue

            x, y, w, h = area["x"], area["y"], area["w"], area["h"]

            # ---------------------------------------------------------
            # ADAPTIVE CROP WITH PADDING
            # ---------------------------------------------------------
            # Expand bounding box to capture:
            # - forehead (improves gender/age model accuracy)
            # - jawline (critical for demographic classification)
            # - hairline and ears (additional context for gender)
            dx, dy = int(w * EXPAND_RATIO), int(h * EXPAND_RATIO)
            x1, y1 = max(x - dx, 0), max(y - dy, 0)
            x2, y2 = min(x + w + dx, w_img), min(y + h + dy, h_img)

            face_crop = img[y1:y2, x1:x2]

            # ---------------------------------------------------------
            # ADAPTIVE RESIZING
            # ---------------------------------------------------------
            # Ensures consistent scaling across all faces.
            # Prevents instability in DeepFace age/gender output.
            h, w = face_crop.shape[:2]
            scale = min(
                max(TARGET_MIN / min(h, w), 1.0),
                TARGET_MAX / max(h, w)
            )
            new_size = (int(w * scale), int(h * scale))
            face_crop = cv2.resize(face_crop, new_size, interpolation=cv2.INTER_CUBIC)

            # Save the processed face with AccoID + timestamp preserved
            out_name = f"AccoID_{acco_id}_{timestamp}.jpg"
            out_path = os.path.join(OUTPUT_DIR, out_name)
            cv2.imwrite(out_path, face_crop, [int(cv2.IMWRITE_JPEG_QUALITY), 95])

    except Exception:
        # DeepFace may throw unpredictable errors: corrupted images, unreadable crops, etc.
        # These are intentionally suppressed to prevent interrupting full-day processing.
        continue

print(f"\nEnhanced face crops saved to: {OUTPUT_DIR}")
print(f"Total cropped faces: {len(os.listdir(OUTPUT_DIR))}")


ðŸ§  Processing 27340 person crops...


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 27340/27340 [2:05:01<00:00,  3.64it/s]  


âœ… Enhanced face crops saved to: ../data/crops_face/20251107/1_cafe_pos_person_faces_yunet
ðŸ“¸ Total cropped faces: 7500



