# Landmark Extraction & Normalisation – Quick Test  
This notebook loads a few sample images, extracts the 21 MediaPipe hand landmarks,
normalises them (centre at wrist, scale with peak-to-peak) and flattens to a
63-element vector.  
We will visualise the landmarks and ensure the helper functions work before
processing the full dataset.


In [1]:
from pathlib import Path
import random
import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [5]:
MP_HANDS=mp.solutions.hands.Hands(
        static_image_mode=True,
        max_num_hands=1,
        model_complexity=1,
        min_detection_confidence=0.5,
)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1751813581.483099   15214 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1751813581.495900   15214 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [6]:
# ───────────────────────── single-image → vector ─────────────────────────────
def one_image_to_vector(img_path: Path):
    """
    Return (label, 65-D vector) or error message if:
      • class == 'nothing'
      • no hand detected
      • image corrupted
    """
    label = img_path.parent.name
    if label == "nothing":
        return label, "class_nothing"

    img_bgr = cv2.imread(str(img_path))
    if img_bgr is None:
        return label, "corrupt_image"

    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    lm_res = MP_HANDS.process(img_rgb)
    if not lm_res.multi_hand_landmarks:
        return label, "no_hand"

    # (21,3) array -> centre on wrist
    lm = np.array([[p.x, p.y, p.z] for p in lm_res.multi_hand_landmarks[0].landmark],
                  dtype=np.float32)
    lm -= lm[0]                                   # wrist at origin
    scale = np.max(np.ptp(lm, axis=0)) or 1.0     # global peak-to-peak
    lm /= scale                                   # normalise

    # 1️⃣ original 60-D vector (drop wrist → 20×3)
    vec60 = lm[1:].flatten()

    # 2️⃣ NEW 5-D distances wrist → finger tips
    tip_ids = [4, 8, 12, 16, 20]                  # thumb, index, middle, ring, pinky
    dists = [float(np.linalg.norm(lm[i])) for i in tip_ids]  # already scaled

    # final vector length = 60 + 5 = 65
    return label, vec60.tolist() + dists


In [7]:
K = 20
all_imgs = list(Path("../data/filtered/clean/A").rglob("*.jpg"))
batch    = random.sample(all_imgs, K)

vecs = []
fail_reasons = {"class_nothing": 0, "no_hand": 0, "corrupt_image": 0}

for p in batch:
    _, result = one_image_to_vector(p)
    if isinstance(result, list):  # good result
            vecs.append(result)
    else:
            fail_reasons[result] += 1

X_batch = np.stack(vecs)
print("Batch matrix:", X_batch.shape)   # (≤K, 65)

scaler  = StandardScaler().fit(X_batch)
X_scaled = scaler.transform(X_batch)

means = X_scaled.mean(axis=0)
stds  = X_scaled.std(axis=0)

print("Max |mean|:", np.abs(means).max())
print("Min/std:", stds.min(), "Max/std:", stds.max())  # all ~1

assert np.allclose(means, 0, atol=1e-6)
assert np.allclose(stds, 1, atol=1e-6)
print("✔️ StandardScaler behaves as expected with the 65-D features.")

W0000 00:00:1751813586.903945   15211 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Batch matrix: (20, 65)
Max |mean|: 4.71289673953379e-15
Min/std: 0.9999999999999998 Max/std: 1.0000000000000002
✔️ StandardScaler behaves as expected with the 65-D features.
