In [39]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from sklearn.preprocessing import normalize


In [40]:
import tensorflow as tf
from tensorflow.keras import layers

@tf.keras.utils.register_keras_serializable(package="Custom")
class L2Normalize(layers.Layer):
    def __init__(self, axis=1, **kwargs):
        super().__init__(**kwargs)
        self.axis = axis

    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=self.axis)

    def get_config(self):
        config = super().get_config()
        config.update({"axis": self.axis})
        return config


In [41]:
MODEL_PATH = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/src/models/ImageNetModel/face_embedding.keras"

embedding_model = tf.keras.models.load_model(
    MODEL_PATH,
    custom_objects={"L2Normalize": L2Normalize},
    compile=False
)

print("✅ ImageNet embedding model loaded successfully")
embedding_model.summary()


✅ ImageNet embedding model loaded successfully


In [42]:
def preprocess_face(img_bgr):
    img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (112, 112), interpolation=cv2.INTER_AREA)
    img = img.astype(np.float32) / 255.0
    img = np.expand_dims(img, axis=0)  # (1,112,112,3)
    return img


In [43]:
from sklearn.preprocessing import normalize
import os
from tqdm import tqdm

PERSON_FOLDER = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/DataSets/SandeshPrasai"
PERSON_NAME = "Sandesh Prasai"

def extract_person_embedding(folder_path, model):
    embeddings = []

    image_files = [
        f for f in os.listdir(folder_path)
        if f.lower().endswith(('.jpg', '.png', '.jpeg'))
    ]

    print(f"Found {len(image_files)} images")

    for img_name in tqdm(image_files):
        img_path = os.path.join(folder_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue

        face_input = preprocess_face(img)
        emb = model.predict(face_input, verbose=0)[0]
        embeddings.append(emb)

    embeddings = normalize(np.array(embeddings), axis=1)
    return embeddings.mean(axis=0)


In [44]:
reference_embedding = extract_person_embedding(PERSON_FOLDER, embedding_model)
np.save(f"{PERSON_NAME}_imagenet_embedding.npy", reference_embedding)

print("✅ Reference embedding saved:", reference_embedding.shape)


Found 9 images


100%|██████████| 9/9 [00:03<00:00,  2.95it/s]

✅ Reference embedding saved: (512,)





In [45]:
REFERENCE_FACIAL_POINTS = np.array([
    [38.2946, 51.6963],
    [73.5318, 51.5014],
    [56.0252, 71.7366],
    [41.5493, 92.3655],
    [70.7299, 92.2041]
], dtype=np.float32)

def align_face_to_template(img, landmarks):
    src_pts = np.array([
        landmarks['left_eye'],
        landmarks['right_eye'],
        landmarks['nose'],
        landmarks['mouth_left'],
        landmarks['mouth_right']
    ], dtype=np.float32)

    if src_pts.shape != (5, 2):
        return None

    tform, _ = cv2.estimateAffinePartial2D(src_pts, REFERENCE_FACIAL_POINTS)
    if tform is None:
        return None

    return cv2.warpAffine(img, tform, (112, 112), borderValue=0)


In [46]:
from sklearn.preprocessing import normalize
import os
from tqdm import tqdm

PERSON_FOLDER = "/home/sandeshprasai/Projects/Final_Semester_Project/AI_Attendance_System/ai-ml-model/DataSets/SandeshPrasai"
PERSON_NAME = "Sandesh Prasai"

def extract_person_embedding(folder_path, model):
    embeddings = []

    image_files = [
        f for f in os.listdir(folder_path)
        if f.lower().endswith(('.jpg', '.png', '.jpeg'))
    ]

    print(f"Found {len(image_files)} images")

    for img_name in tqdm(image_files):
        img_path = os.path.join(folder_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue

        face_input = preprocess_face(img)
        emb = model.predict(face_input, verbose=0)[0]
        embeddings.append(emb)

    embeddings = normalize(np.array(embeddings), axis=1)
    return embeddings.mean(axis=0)


In [47]:
DETECT_SIZE = 320  # 240–360 is ideal

def resize_for_detection(frame, target=320):
    h, w = frame.shape[:2]
    scale = target / max(h, w)
    resized = cv2.resize(frame, (int(w*scale), int(h*scale)))
    return resized, scale


In [48]:
DETECT_EVERY = 5   # detect once every 5 frames
frame_count = 0
cached_face = None


In [49]:
from mtcnn import MTCNN
from sklearn.metrics.pairwise import cosine_similarity
import time

detector = MTCNN(device="cpu")

reference_embedding = np.load("Sandesh Prasai_imagenet_embedding.npy")
reference_embedding = reference_embedding / np.linalg.norm(reference_embedding)

THRESHOLD = 0.9
DETECT_EVERY = 5
DETECT_SIZE = 320

cap = cv2.VideoCapture("http://192.168.1.75:4747/video")

frame_count = 0
cached_face = None

print("Press 'q' to quit")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # --- Face detection (sparse) ---
    if frame_count % DETECT_EVERY == 0 or cached_face is None:
        small, scale = resize_for_detection(frame, DETECT_SIZE)
        rgb_small = cv2.cvtColor(small, cv2.COLOR_BGR2RGB)
        results = detector.detect_faces(rgb_small)

        if results:
            cached_face = max(results, key=lambda x: x['confidence'])
            cached_scale = scale
        else:
            cached_face = None

    # --- Face verification ---
    if cached_face:
        box = cached_face['box']
        keypoints = cached_face['keypoints']

        # Rescale landmarks
        kp = {k: (int(v[0]/cached_scale), int(v[1]/cached_scale))
              for k, v in keypoints.items()}

        aligned = align_face_to_template(frame, kp)

        if aligned is not None:
            face_input = preprocess_face(aligned)

            emb = embedding_model.predict(face_input, verbose=0)[0]
            emb = emb / np.linalg.norm(emb)

            similarity = float(
                np.dot(emb, reference_embedding)
            )

            label = "MATCH" if similarity >= THRESHOLD else "NO MATCH"

            x, y, w, h = [int(v/cached_scale) for v in box]
            cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
            cv2.putText(
                frame,
                f"{label} | sim={similarity:.3f}",
                (x, y-10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0,255,0),
                2
            )

    cv2.imshow("ImageNet Face Verification", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Press 'q' to quit


2026-02-04 12:16:11.176942: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: INVALID_ARGUMENT: Incompatible shapes: [0,24,24,3] vs. [1,1,1,28]
2026-02-04 12:16:43.701530: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: INVALID_ARGUMENT: Incompatible shapes: [0,24,24,3] vs. [1,1,1,28]
