In [3]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from sklearn.preprocessing import normalize


In [4]:
PERSON_FOLDER = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/DataSets/SandeshPrasai"
PERSON_NAME = "Sandesh Prasai"   


In [5]:
import tensorflow as tf

def l2_norm(x, axis=1):
    return tf.math.l2_normalize(x, axis=axis)

class ArcFace(tf.keras.layers.Layer):
    def __init__(self, num_classes, margin=0.5, scale=64, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.margin = margin
        self.scale = scale

    def get_config(self):
        config = super().get_config()
        config.update({
            "num_classes": self.num_classes,
            "margin": self.margin,
            "scale": self.scale,
        })
        return config


In [6]:
def load_preprocessed_face(img_path):
    img = cv2.imread(img_path)

    if img is None:
        raise ValueError("Image not readable")

    # BGR → RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # FORCE resize (CRITICAL FIX)
    img = cv2.resize(img, (112, 112), interpolation=cv2.INTER_AREA)

    # Normalize (matches training)
    img = img.astype(np.float32) / 255.0

    return img


In [7]:
def extract_person_embeddings(folder_path, embedding_model):
    embeddings = []

    image_files = [
        f for f in os.listdir(folder_path)
        if f.lower().endswith(('.jpg', '.png', '.jpeg'))
    ]

    print(f"Found {len(image_files)} images")

    for img_name in tqdm(image_files, desc="Extracting embeddings"):
        img_path = os.path.join(folder_path, img_name)

        try:
            img = load_preprocessed_face(img_path)
            img = np.expand_dims(img, axis=0)  # (1,112,112,3)

            emb = embedding_model.predict(img, verbose=0)
            embeddings.append(emb.flatten())

        except Exception as e:
            print(f"[SKIPPED] {img_name}: {e}")

    return np.array(embeddings)


In [8]:
embedding_model = tf.keras.models.load_model(
    r"/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/src/models/RestNet50/final_year_project_face_recognition/embedding_model.keras",
    custom_objects={
        "l2_norm": l2_norm,
        "ArcFace": ArcFace
    },
    compile=False   # IMPORTANT
)


I0000 00:00:1769749179.400738   22276 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4309 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [9]:
person_embeddings = extract_person_embeddings(
    PERSON_FOLDER,
    embedding_model
)

print("Raw embeddings shape:", person_embeddings.shape)


Found 9 images


I0000 00:00:1769749182.583411   22541 service.cc:152] XLA service 0x7239a0015a90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1769749182.584409   22541 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 6GB Laptop GPU, Compute Capability 8.6
2026-01-30 10:44:42.728692: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1769749183.310896   22541 cuda_dnn.cc:529] Loaded cuDNN version 91002
I0000 00:00:1769749185.723785   22541 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
Extracting embeddings: 100%|██████████| 9/9 [00:05<00:00,  1.71it/s]

Raw embeddings shape: (9, 512)





In [10]:
person_embeddings = normalize(person_embeddings, norm="l2")


In [11]:
person_embedding_mean = np.mean(person_embeddings, axis=0)

print("Final embedding shape:", person_embedding_mean.shape)


Final embedding shape: (512,)


In [12]:
np.save(f"{PERSON_NAME}_embedding.npy", person_embedding_mean)


## Match Embeddings 

In [13]:
import cv2
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [14]:
REFERENCE_EMB_PATH = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/notebook/Verson_3_RestNet+ArcFace/Sandesh Prasai_embedding.npy"
reference_embedding = np.load(REFERENCE_EMB_PATH)

print("Reference embedding shape:", reference_embedding.shape)


Reference embedding shape: (512,)


In [15]:
from mtcnn import MTCNN

detector = MTCNN(device="cpu")


In [16]:
REFERENCE_FACIAL_POINTS = np.array([
    [38.2946, 51.6963],
    [73.5318, 51.5014],
    [56.0252, 71.7366],
    [41.5493, 92.3655],
    [70.7299, 92.2041]
], dtype=np.float32)


In [17]:
def align_face_to_template(img, landmarks):
    src_pts = np.array([
        landmarks['left_eye'],
        landmarks['right_eye'],
        landmarks['nose'],
        landmarks['mouth_left'],
        landmarks['mouth_right']
    ], dtype=np.float32)

    tform, _ = cv2.estimateAffinePartial2D(src_pts, REFERENCE_FACIAL_POINTS)
    if tform is None:
        return None

    return cv2.warpAffine(img, tform, (112, 112), borderValue=0)


In [18]:
def solve_lighting(img):
    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    yuv[:, :, 0] = clahe.apply(yuv[:, :, 0])
    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)


In [19]:
def prepare_face_for_embedding(face_bgr):
    face_rgb = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2RGB)
    face_rgb = face_rgb.astype(np.float32) / 255.0
    face_rgb = np.expand_dims(face_rgb, axis=0)  # (1,112,112,3)
    return face_rgb


In [22]:
cap = cv2.VideoCapture("http://192.168.1.88:4747/video")

THRESHOLD = 0.65  # start value (we can tune later)

print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = detector.detect_faces(rgb)

    if results:
        best_face = max(results, key=lambda x: x['confidence'])

        aligned = align_face_to_template(frame, best_face['keypoints'])

        if aligned is not None:
            aligned = solve_lighting(aligned)

            face_input = prepare_face_for_embedding(aligned)

            emb = embedding_model.predict(face_input, verbose=0)
            emb = emb.flatten()
            emb = emb / np.linalg.norm(emb)  # L2 normalize

            similarity = cosine_similarity(
                emb.reshape(1, -1),
                reference_embedding.reshape(1, -1)
            )[0][0]

            label = "MATCH" if similarity >= THRESHOLD else "NO MATCH"

            x, y, w, h = best_face['box']
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0,255,0), 2)

            cv2.putText(
                frame,
                f"{label} | sim={similarity:.3f}",
                (x, y-10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0,255,0),
                2
            )

    cv2.imshow("Face Verification", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Press 'q' to quit


In [21]:
cap.release()
cv2.destroyAllWindows()

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from mtcnn import MTCNN
from sklearn.metrics.pairwise import cosine_similarity

# ==============================
# PATHS
# ==============================
PERSON_FOLDER = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/DataSets/SandeshPrasai"
SAVE_EMB_PATH = os.path.join(PERSON_FOLDER, "SandeshPrasai_embedding.npy")

MODEL_PATH = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/src/models/RestNet50/final_year_project_face_recognition/embedding_model.keras"

CAMERA_URL = "http://192.168.1.81:4747/video"
THRESHOLD = 0.65

# ==============================
# ARC FACE HELPERS
# ==============================
def l2_norm(x, axis=1):
    return tf.math.l2_normalize(x, axis=axis)

class ArcFace(tf.keras.layers.Layer):
    def __init__(self, num_classes, margin=0.5, scale=64, **kwargs):
        super().__init__(**kwargs)
        self.num_classes = num_classes
        self.margin = margin
        self.scale = scale

    def get_config(self):
        config = super().get_config()
        config.update({
            "num_classes": self.num_classes,
            "margin": self.margin,
            "scale": self.scale,
        })
        return config

# ==============================
# LOAD MODEL
# ==============================
embedding_model = tf.keras.models.load_model(
    MODEL_PATH,
    custom_objects={"l2_norm": l2_norm, "ArcFace": ArcFace},
    compile=False
)

# ==============================
# FACE PREPROCESSING (CANONICAL)
# ==============================
detector = MTCNN(device="cpu")

REFERENCE_FACIAL_POINTS = np.array([
    [38.2946, 51.6963],
    [73.5318, 51.5014],
    [56.0252, 71.7366],
    [41.5493, 92.3655],
    [70.7299, 92.2041]
], dtype=np.float32)

def align_face_to_template(img, landmarks):
    src_pts = np.array([
        landmarks['left_eye'],
        landmarks['right_eye'],
        landmarks['nose'],
        landmarks['mouth_left'],
        landmarks['mouth_right']
    ], dtype=np.float32)

    tform, _ = cv2.estimateAffinePartial2D(src_pts, REFERENCE_FACIAL_POINTS)
    if tform is None:
        return None

    return cv2.warpAffine(img, tform, (112, 112), borderValue=0)

def solve_lighting(img):
    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    yuv[:, :, 0] = clahe.apply(yuv[:, :, 0])
    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)

def preprocess_face(img_bgr):
    rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    results = detector.detect_faces(rgb)

    if not results:
        raise ValueError("No face detected")

    best_face = max(results, key=lambda x: x['confidence'])
    aligned = align_face_to_template(img_bgr, best_face['keypoints'])

    if aligned is None:
        raise ValueError("Alignment failed")

    aligned = solve_lighting(aligned)
    aligned = cv2.cvtColor(aligned, cv2.COLOR_BGR2RGB)
    aligned = aligned.astype(np.float32) / 255.0
    aligned = np.expand_dims(aligned, axis=0)

    return aligned

# ==============================
# STEP 1: ENROLLMENT (FOLDER)
# ==============================
embeddings = []

image_files = [
    f for f in os.listdir(PERSON_FOLDER)
    if f.lower().endswith(('.jpg', '.jpeg', '.png'))
]

print(f"Found {len(image_files)} images for enrollment")

for img_name in tqdm(image_files, desc="Extracting embeddings"):
    img_path = os.path.join(PERSON_FOLDER, img_name)
    img = cv2.imread(img_path)

    if img is None:
        continue

    try:
        face_input = preprocess_face(img)
        emb = embedding_model.predict(face_input, verbose=0).flatten()
        emb = emb / np.linalg.norm(emb)
        embeddings.append(emb)
    except Exception as e:
        print(f"[SKIPPED] {img_name}: {e}")

embeddings = np.array(embeddings)
reference_embedding = np.mean(embeddings, axis=0)
reference_embedding /= np.linalg.norm(reference_embedding)

np.save(SAVE_EMB_PATH, reference_embedding)
print("Reference embedding saved:", SAVE_EMB_PATH)

# ==============================
# STEP 2: LIVE CAMERA VERIFICATION
# ==============================
cap = cv2.VideoCapture(CAMERA_URL)
print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    try:
        face_input = preprocess_face(frame)
        emb = embedding_model.predict(face_input, verbose=0).flatten()
        emb = emb / np.linalg.norm(emb)

        similarity = cosine_similarity(
            emb.reshape(1, -1),
            reference_embedding.reshape(1, -1)
        )[0][0]

        label = "MATCH" if similarity >= THRESHOLD else "NO MATCH"
        color = (0, 255, 0) if label == "MATCH" else (0, 0, 255)

        cv2.putText(
            frame,
            f"{label} | sim={similarity:.3f}",
            (30, 40),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.9,
            color,
            2
        )

    except Exception:
        pass

    cv2.imshow("Face Verification", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Found 9 images for enrollment


Extracting embeddings:   0%|          | 0/9 [00:00<?, ?it/s]