In [40]:
import os
import glob
import cv2
from tensorflow.keras.models import load_model
import tensorflow as tf
import numpy as np

from tensorflow.keras.applications.resnet50 import preprocess_input as preprocess_resnet50
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as preprocess_inception_resnet_v2
from tensorflow.keras.applications.inception_v3 import preprocess_input as preprocess_inception_v3
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as preprocess_mobilenet_v2
from tensorflow.keras.applications.vgg16 import preprocess_input as preprocess_vgg16
from tensorflow.keras.applications.vgg19 import preprocess_input as preprocess_vgg19
from tensorflow.keras.applications.mobilenet_v3 import preprocess_input as preprocess_mobilenet_v3
from tensorflow.keras.applications.efficientnet import preprocess_input as preprocess_efficientnet

import mediapipe as mp

In [42]:
def get_preprocess_function(base_model_type):
    if base_model_type == 'ResNet50':
        return preprocess_resnet50
    elif base_model_type == 'InceptionResNetV2':
        return preprocess_inception_resnet_v2
    elif base_model_type == 'InceptionV3':
        return preprocess_inception_v3
    elif base_model_type == 'MobileNetV2':
        return preprocess_mobilenet_v2
    elif base_model_type == 'MobileNetV3':
        return preprocess_mobilenet_v3
    elif base_model_type == 'VGG16':
        return preprocess_vgg16
    elif base_model_type == 'VGG19':
        return preprocess_vgg19
    elif base_model_type == 'ResNet50V2':
        return preprocess_inception_resnet_v2
    elif base_model_type == 'EfficientNet':
        return preprocess_efficientnet
    else:
        raise ValueError(
            'Invalid base_model_type.')

In [43]:
def preprocess(file, input_shape):
    face = cv2.imread(file)
    face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    face = cv2.resize(face, input_shape)
    face = np.expand_dims(face, axis=0)
    return face

In [44]:
def import_ref_embedding(model, input_shape, preprocess_input):
    # Load embeddings set
    test_dir = './ref_data/min_data'
    test_embeddings = []
    test_labels = []
    for subdir in os.listdir(test_dir):
        for file in glob.glob(os.path.join(test_dir, subdir, '*.jpg')):
            face = preprocess(file, input_shape)
            # Preprocess image same as training data
            face = preprocess_input(face)

            embedding = model.predict(face, verbose=0)
            test_embeddings.append(embedding.flatten())
            test_labels.append(subdir)

    return test_embeddings, test_labels

In [45]:
def l2_normalize(x):
    return x / np.sqrt(np.sum(np.multiply(x, x)))

def findEuclideanDistance(source_representation, test_representation):
    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance

def predict(feature, all_features, labels):
    distances = np.sqrt(np.sum(np.square(feature - all_features), axis = 1))
    index = np.argmin(distances)

    name = labels[index]
    distance = distances[index]
    return name, distance

## REAL TIME TRIPLET LOSS

In [49]:
# Function to recognize faces
def recognize_faces(image, face_boxes, model, ref_features, ref_labels, preprocess_input, threshold=0.45):
     recognized_faces = []
     distances = []
     for box in face_boxes:
         x, y, width, height = box
         face = image[y:y+height, x:x+width]

         if face.size == 0: # If the face image is empty, skip this face
             continue

         face = cv2.resize(face, (224,224)) 
         face = preprocess_input(face)
         face = np.expand_dims(face, axis=0)
       
         embedding = model.predict(face, verbose=0)
         
         predicted_label, distance = predict(embedding[0], ref_features, np.array(ref_labels))
         print(predicted_label, distance)
         if distance <= threshold:
            recognized_faces.append(predicted_label)
            distances.append(distance)
            
         else:
            recognized_faces.append("3")
            distances.append(distance)
         

     return [recognized_faces, distances]


In [47]:
BASE_MODEL_TYPE = 'EfficientNet'

model = load_model("triplet_loss_trained_models/" + "EfficientNet" )
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, epsilon=1e-01)
model.compile(optimizer=optimizer)
preprocess_input = get_preprocess_function(BASE_MODEL_TYPE)
ref_embeddings, ref_labels = import_ref_embedding(model, (224,224), preprocess_input)



In [50]:
# MEDIAPIPE Initialization
import mediapipe as mp

# Read the names from the text file into a list
with open("names.txt", "r") as file:
    names = file.read().splitlines()

preprocess_input = get_preprocess_function(BASE_MODEL_TYPE)

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.2)

# Webcam capture
cap = cv2.VideoCapture(1)

# Counter for frames
frame_count = 0

while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
          break

      frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      result = face_detection.process(frame_rgb)

      face_boxes = []
      if result.detections:
          for detection in result.detections:
              bboxC = detection.location_data.relative_bounding_box
              ih, iw, _ = frame.shape
              x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
              face_boxes.append((x, y, w, h))
              cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
      if frame_count % 50 == 0:
        labels, distances = recognize_faces(frame, face_boxes, model, ref_embeddings, ref_labels, preprocess_input)

      for label, distance, box in zip(labels, distances, face_boxes):
            x, y, w, h = box
            label_index = int(label)
            distance = round(distance, 3)
            if label_index <= 2:
                name = names[label_index]
                cv2.putText(frame, name + " " + str(distance), (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            else:
                name = "unknown"
                cv2.putText(frame, name, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)  

      cv2.imshow('Real-time Face Recognition', frame)

      if cv2.waitKey(1) & 0xFF == ord('q'):
          break
      
      frame_count += 1

cap.release()
cv2.destroyAllWindows()


1 0.6762822
1 0.6146554
1 0.8045504
0 0.41431624
0 0.32232332
0 0.45401248
1 0.7953351
2 0.6980446
2 0.5940066
0 0.6692596
0 0.3220486
0 0.43268064
0 0.3841468
2 1.0813482
2 0.6971509
2 1.0820616
2 0.91803443
1 1.0088573
2 0.8168856
1 1.0025078
2 0.91954404
1 0.79533505
2 0.92445254
1 0.8231786
1 0.8026843
2 0.9379284
1 0.8504975
2 0.92943674
1 0.9208703
2 0.80041546
1 0.74787515
1 0.5494098
0 0.4932686
0 0.37488303
1 0.92369896
1 0.85063636
2 0.9383852
1 0.718068
1 0.8446412
1 0.8803817
1 0.8614351
1 0.68205714
1 1.0134676
1 0.58958405
0 0.34409785
1 1.019573
1 0.8219104


## REAL TIME CROSS ENTROPY

In [35]:
# Function to recognize faces
def recognize_faces(image, face_boxes, model, preprocess_input , threshold=0.90):
     recognized_faces = []
     probabilities = []
     for box in face_boxes:
         x, y, width, height = box
         face = image[y:y+height, x:x+width]

         if face.size == 0: # If the face image is empty, skip this face
             continue

         face = cv2.resize(face, (224,224)) 
         face = preprocess_input(face)
         face = np.expand_dims(face, axis=0)
       
         predicted_probabilities = model.predict(face)
         max_prob_index = np.argmax(predicted_probabilities, axis=1)
         max_prob = np.max(predicted_probabilities, axis=1)
         if max_prob >= threshold:
             predicted_label = max_prob_index[0]
         else:
             predicted_label = "3"
             max_prob = 0
         recognized_faces.append(predicted_label)
         probabilities.append(max_prob)
         

     return recognized_faces, probabilities


In [36]:
model = load_model("cross_entropy_trained_models/" + "VGG16.h5" )
preprocess_input = get_preprocess_function(BASE_MODEL_TYPE)

In [39]:
# MEDIAPIPE Initialization
import mediapipe as mp

# Read the names from the text file into a list
with open("names.txt", "r") as file:
    names = file.read().splitlines()

preprocess_input = get_preprocess_function(BASE_MODEL_TYPE)

mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.2)

# Webcam capture
cap = cv2.VideoCapture(1)

# Counter for frames
frame_count = 0

while cap.isOpened():
      ret, frame = cap.read()
      if not ret:
          break

      frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
      result = face_detection.process(frame_rgb)

      face_boxes = []
      if result.detections:
          for detection in result.detections:
              bboxC = detection.location_data.relative_bounding_box
              ih, iw, _ = frame.shape
              x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
              face_boxes.append((x, y, w, h))
              cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
      if frame_count % 50 == 0:
        labels, probabilities = recognize_faces(frame, face_boxes, model, preprocess_input)

      for label, probability, box in zip(labels, probabilities, face_boxes):
            x, y, w, h = box
            label_index = int(label)
            if label_index <= 2:
                name = names[label_index]
                cv2.putText(frame, name + " " + str(probability), (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            else:
                name = "unknown"
                cv2.putText(frame, name, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)  

      cv2.imshow('Real-time Face Recognition', frame)

      if cv2.waitKey(1) & 0xFF == ord('q'):
          break
      
      frame_count += 1

cap.release()
cv2.destroyAllWindows()


