<a href="https://colab.research.google.com/github/oscarB1nar10/face_recognition/blob/main/Face_recognition_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import os
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import normalize
from tensorflow.keras.models import load_model

### Connect to Google drive

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
! git status

fatal: not a git repository (or any of the parent directories): .git


## Load the pretrained model

In [13]:
# Load the InceptionV3 model with pre-trained weights
# include_top = false, the top of the model, which is responsable for the actual calssification is not loaded.
# This is typically done when we want to use the model for feature extraction in a transfer learning scenario.
base_model = InceptionV3(include_top=False, weights='imagenet', pooling='avg')

## Preprocess the images

In [14]:
def normalize_image(image, target_size=(160, 160)):
    resized_image = cv2.resize(image, target_size)
    # Convert the image colors from BGR to RGB
    resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
    normalized_image = resized_image / 255.0
    return normalized_image

## Load and preprocess the images that we are going to use as database images of encodings and labels

In [15]:
# Function to load and preprocess images
def load_and_preprocess_images(dataset_path):
  image_paths = [] # List to store the image paths
  labels = [] # List to store the labels corresponding to each image
  encodings = [] # List to store the generated encodings

  # Loop through the dataset folder
  for label in os.listdir(dataset_path):
    label_path = os.path.join(dataset_path, label)
    if os.path.isdir(label_path):
      # Loop through the images in the subfolder
      for img_name in os.listdir(label_path):
        img_path = os.path.join(label_path, img_name)
        if img_path.endswith('.jpg') or img_path.endswith('.png'):
          image_paths.append(img_path)
          labels.append(label)

  # Loop through the image paths and generate encodings
  for img_path in image_paths:
    img = cv2.imread(img_path) # Read the image from the file path
    preprocessed_img = normalize_image(img)
    # Add an extra dimension to the image array to make it a batch of one image
    # The model expects input in the format (batch_size, height, width, channels)
    image_batch = np.expand_dims(preprocessed_img, axis=0)
    encodings.append(normalize(base_model.predict(image_batch)))

  return image_paths, labels, np.array(encodings)

In [16]:
# Set the path to the images we want to generate the encodings
dataset_path = '/content/drive/MyDrive/face_images/dataset_embeddings/'
# Generate encodings for the dataset images
image_paths_dataset, labels, dataset_encodings = load_and_preprocess_images(dataset_path)

print(f"dataset_encodings: {dataset_encodings}")

dataset_embeddings: [[[0.02463774 0.00540078 0.00528919 ... 0.0016229  0.02033683 0.01174251]]

 [[0.0081503  0.02689975 0.00161638 ... 0.01613967 0.00532081 0.01586023]]

 [[0.00221358 0.00983824 0.0057775  ... 0.00389638 0.02416536 0.0210307 ]]

 ...

 [[0.01782076 0.00353921 0.         ... 0.01039482 0.00392762 0.00398203]]

 [[0.00520398 0.00842023 0.00317693 ... 0.00382666 0.         0.00943276]]

 [[0.04431464 0.0282218  0.00721014 ... 0.00333184 0.         0.0347878 ]]]


## Validate the model accuracy based on L2 distance between the database embeddings and the test embeddings

In [9]:
def validate_model(validation_embeddings, validation_labels, threshold):
  correct_predictions = 0

  for i in range(len(validation_embeddings)):
    # Calculate distances with the dataset embeddings
    distances = []
    #min_dis = 100
    for emb in dataset_embeddings:
      #distance = euclidean_distance(validation_embeddings[i], emb)
      distance = np.linalg.norm(tf.subtract(validation_embeddings[i], emb))
      # if distance < min_dis:
      #   min_dis = distance
      distances.append(distance)

    # Get the index of the closest embedding and it's label
    min_index = np.argmin(distances)
    predicted_label = labels[min_index]

    print(f"predicted_label: {predicted_label}, validation_labels[i]: {validation_labels[i]}")

    print(f"distances[min_index]: {distances[min_index]}, threshold: {threshold}")

    # Check if the prediction is correct
    if distances[min_index] <= threshold and predicted_label == validation_labels[i]:
      correct_predictions += 1
      print(f"correct_predictions: {correct_predictions}")

  accurary = correct_predictions / len(validation_embeddings)

  return accurary

In [10]:
# Set the path to the images we want to generate the embeddings
validation_path = '/content/drive/MyDrive/face_images/test_images/test_images_formatted/'
# Generate embeddings for the test images
image_paths_validation, validation_labels, validation_embeddings = load_and_preprocess_images(validation_path)

print(f"validation_embeddings: {validation_embeddings}")

validation_embeddings: [[[0.00717166 0.01723029 0.00242884 ... 0.         0.00205334 0.02068376]]

 [[0.00043814 0.00093427 0.00912452 ... 0.0654472  0.00024958 0.02449374]]

 [[0.01744594 0.01521931 0.00328817 ... 0.0418946  0.00875952 0.03257682]]

 ...

 [[0.02048094 0.01981575 0.         ... 0.0085188  0.04169054 0.02208139]]

 [[0.02138359 0.01594866 0.         ... 0.04295041 0.02152747 0.00302541]]

 [[0.02433598 0.01250073 0.01261056 ... 0.00968251 0.05111663 0.02832301]]]


## Accuracy

In [11]:
threshold = 0.7
accuracy = validate_model(validation_embeddings, validation_labels, threshold)
print(f"Validation accuracy: {accuracy * 100}%")

predicted_label: Oscar, validation_labels[i]: Oscar
distances[min_index]: 0.5040819048881531, threshold: 0.7
correct_predictions: 1
predicted_label: Gladys, validation_labels[i]: Gladys
distances[min_index]: 0.646894097328186, threshold: 0.7
correct_predictions: 2
predicted_label: Alejandro, validation_labels[i]: Juan Jose
distances[min_index]: 0.7093230485916138, threshold: 0.7
predicted_label: Juan Jose, validation_labels[i]: Juan Jose
distances[min_index]: 0.5822619199752808, threshold: 0.7
correct_predictions: 3
predicted_label: Alejandro, validation_labels[i]: Alejandro
distances[min_index]: 0.38698285818099976, threshold: 0.7
correct_predictions: 4
predicted_label: Oscar, validation_labels[i]: Guillermo
distances[min_index]: 0.6380272507667542, threshold: 0.7
predicted_label: Dianna, validation_labels[i]: Dianna
distances[min_index]: 0.3461225926876068, threshold: 0.7
correct_predictions: 5
Validation accuracy: 71.42857142857143%
