# Introduction

<p>
    I was inspired by <a href="https://towardsdatascience.com/real-time-face-recognition-an-end-to-end-project-b738bb0f7348">this</a> article to try my hand at facial recognition. This notebook contains an app in two parts. The first uses a webcam and face detection to take mug shots of the user which are used to train a recognizer. The second part also uses face detection, but now uses the trained recognizer to predict whether the detected faces are in the training set.
</p>

| ![s](register_demo.gif)<br/><center>Face detection</center> | ![f](recognition_demo.gif)<br/><center>Face detection with facial recognition</center> |
| -- | -- |

## Requirements

<p>This notebook was written with Python 3.8.0 and the following libraries. If you have difficulties running it with other version, try using a virtual environment and running this cell.</p>

<p><strong>Note:</strong> The code depends on a local webcam, so this won't work in Google Colab or other web-based environments.</p>

In [None]:
!pip3 install opencv-contrib-python==4.8.0.76 imutils==0.5.4 matplotlib==3.7.3 numpy==1.24.4 imageio==2.31.5

## Imports

In [None]:
import datetime
import os
import random
import re
import time
from typing import Any, List, Tuple, Union

import cv2
import imageio
import imutils
import matplotlib.pyplot as plt
import numpy as np
from imutils.video import VideoStream

%matplotlib inline

## File paths

Let's download haarcascade_frontalface_default.xml, used for face detection, if we haven't already, and set some file paths that we'll need later.

In [None]:
CASCADE_PATH = os.path.join(os.getcwd(), "haarcascade_frontalface_default.xml")
CASCADE_URL = "https://raw.githubusercontent.com/kipr/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml"

if not os.path.exists(CASCADE_PATH):
    !wget {CASCADE_URL}

FACE_DATA = os.path.join(os.getcwd(), "face_data.yml")

SAVE_PATH = os.path.join(os.getcwd(), "user_pics")

PIC_FILE_TEMPLATE = "user_{0}_pic_{1}.jpg"
PIC_FILE_RE = PIC_FILE_TEMPLATE.format("(\d+)", "\d+")

## Saving Images

These methods cover the saving of the user pics and gif recordings.

In [None]:
def get_next_user_id() -> int:
    """
    Return the next available user ID (first is 1)
    """

    next_id = 1
    for file in sorted(os.listdir(SAVE_PATH)):
        match = re.search(PIC_FILE_RE, file)
        if match:
            if int(match.groups()[0]) == next_id:
                next_id += 1
    return next_id


def save_user_images(img_list: List[np.ndarray]) -> None:
    """
    Save the given list of images to the save directory with a unique prefix
    """
    
    # Make sure the save directory exists
    if not os.path.exists(SAVE_PATH):
        os.makedirs(SAVE_PATH)

    # Save the mug shots with a unique user ID
    next_id = get_next_user_id()
    for i, img in enumerate(img_list):
        cv2.imwrite(os.path.join(SAVE_PATH, PIC_FILE_TEMPLATE.format(next_id, i)), img)

    # Display the captured mug shots (or 10 random ones if there are more than that)
    max_displayed_shots = 10
    if len(img_list) > max_displayed_shots:
        img_list = random.sample(img_list, k=max_displayed_shots)
    rows = 2
    cols = int(len(img_list) / rows)
    fig = plt.figure(figsize=(10, 6))
    for i in range(0, rows * cols):
        img = cv2.cvtColor(img_list[i], cv2.COLOR_BGR2RGB)  # Because CV2 captures in BGR, but plt uses RGB
        fig.add_subplot(rows, cols, i + 1)
        plt.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)
        plt.imshow(img)
    plt.show()
    plt.close()


def save_gif(frames: List[np.ndarray],
             name: str) -> None:
    """
    Shrink and colour correct the frames before saving them as a gif
    """
    
    resized_frames = [imutils.resize(frame, width=300) for frame in frames]
    corrected_frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in resized_frames]
    imageio.mimwrite(name, corrected_frames, fps=30, loop=0)

# Video Capture

In [None]:
# How many mug shots to take
MAX_SHOTS = 20

# Random value in [0, 1) must be greater than this to capture mug shot
CAPTURE_THRESHOLD = 0.9

# Confidence must be below this to recognise user
RECOGNITION_THRESHOLD = 50

# How big to resize the frame
FRAME_WIDTH = 600

# Modes
MODE_WAITING = 0
MODE_LEFT_RIGHT = 1
MODE_UP_DOWN = 2
MODE_AUTHENTICATING = 3

MESSAGES = ["Press S when ready to begin",
            "Move your head left and right",
            "Move your head up and down",
            ""]

def add_text(frame: np.ndarray,
             message: str) -> None:
    """
    Add the message at bottom centre of the frame
    """
    
    # Text Options
    font = cv2.FONT_HERSHEY_SIMPLEX
    scale = 1
    thickness = 2
    colour = (255, 255, 255)

    # Find the bottom centre of the frame
    text_size = cv2.getTextSize(message, font, scale, thickness)[0]
    x_pos = int((frame.shape[1] - text_size[0]) / 2)
    y_pos = frame.shape[0] - 20

    # Add the text
    cv2.putText(frame, message, (x_pos, y_pos), font, scale, colour, thickness)


def capture_frame(stream: VideoStream,
                  cascade: cv2.CascadeClassifier) -> (np.ndarray, np.ndarray, Tuple[np.ndarray]):
    """
    Get the next frame from the stream, mirror it, resize it, and convert it to greyscale
    Find the faces in the frame
    Return frame, grayscale version, and bounding boxes of faces
    """

    frame = cv2.flip(stream.read(), 1)
    frame = imutils.resize(frame, width=FRAME_WIDTH)
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = cascade.detectMultiScale(
        gray_frame,     
        scaleFactor=1.2,
        minNeighbors=5,     
        minSize=(100, 100)
    )

    return frame, gray_frame, faces


def get_mug_shot(frame: np.ndarray,
                 gray_frame: np.ndarray,
                 face: Tuple[int]) -> Union[np.array, None]:
    """
    Draw a rectangle around the face in the frame.
    With some probability, capture and return this mug shot
    """

    # Unpack the bounding box
    x_pos, y_pos, width, height = face

    # Draw a rectangle around the face.
    frame = cv2.rectangle(frame, (x_pos, y_pos), (x_pos + width, y_pos + height), (255, 255, 255), 2)

    # If the threshold is passed, capture the mug shot
    if np.random.random() > CAPTURE_THRESHOLD:
        return gray_frame[y_pos:y_pos + height, x_pos:x_pos + width]

    return None


def main(current_mode: int,
         recognizer: cv2.face.LBPHFaceRecognizer = None) -> None:
    """
    MODE_WAITING:                  Detect faces and draw bounding boxes
    MODE_UP_DOWN|MODE_LEFT_RIGHT:  Sample mug shots if single face detected
    MODE_AUTHENTICATING:           Detect faces and predict label/confidence
    """

    # Initialise the webcam
    stream = VideoStream(src=0, framerate=30).start()

    # Initialise the classifier for detecting faces
    cascade = cv2.CascadeClassifier(CASCADE_PATH)

    # Set the frame title by mode
    # Initialise a list for the mug shots
    if current_mode == MODE_WAITING:
        mug_shots = []
        frame_title = "Register New User"
    elif current_mode == MODE_AUTHENTICATING:
        frame_title = "Authenticate User"

    # Keep a list of frames for a gif
    all_frames = []

    # Whether to save frames to produce a gif
    recording = red_circle = False

    while True:
        # Capture the frame, a gray scale version, and anhy faces in it
        frame, gray_frame, faces = capture_frame(stream, cascade)

        # Waiting to begin capturing new user images; put a bounding box around all faces
        if current_mode == MODE_WAITING:
            for (x_pos, y_pos, width, height) in faces:
                frame = cv2.rectangle(frame, (x_pos, y_pos), (x_pos + width, y_pos + height), (255, 255, 255), 2)

        # In capture mode (LEFT_RIGHT|UP_DOWN) and only 1 face
        elif current_mode <= MODE_UP_DOWN and len(faces) == 1:
            # Add the mug shot (if it exists) to the list; check for moving to next mode/finishing
            mug_shot = get_mug_shot(frame, gray_frame, faces[0])
            if mug_shot is not None:
                mug_shots.append(mug_shot)
                if len(mug_shots) == int(MAX_SHOTS / 2):
                    current_mode = MODE_UP_DOWN
                elif len(mug_shots) == MAX_SHOTS:
                    save_user_images(mug_shots)
                    break

        # Trying to recognize known users
        elif current_mode == MODE_AUTHENTICATING:
            for (x_pos, y_pos, width, height) in faces:
                # Predict a label and confidence for each face detected
                label, confidence = recognizer.predict(gray_frame[y_pos:y_pos + height, x_pos:x_pos + width])

                colour = (int(255 / (label + 1)), int(255 / (label + 1)), 255)  # Quick and dirty way to get a different colour per user

                # Draw a rectangle around this face with the predicted label/unknown and confidence
                frame = cv2.rectangle(frame, (x_pos, y_pos), (x_pos + width, y_pos + height), colour, 2)
                cv2.putText(frame, f"User {label} ({round(confidence, 2)})", (x_pos + 5, y_pos + height + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, colour, 2)

        # Add appropriate message to the frame
        add_text(frame, MESSAGES[current_mode])

        # If recording is True, add a blinking red circle to the top right and keep the frame
        if recording:
            # Blinking red circle
            if len(all_frames) % 30 == 0:
                red_circle = not red_circle
            if red_circle:
                all_frames.append(np.copy(frame))  # Save a copy without the red circle
                cv2.circle(frame, (FRAME_WIDTH - 20, 20), 10, (0, 0, 255), -1)
            else:
                all_frames.append(frame)

        # Display the frame
        cv2.imshow(frame_title, frame)

        # Check for a key press
        key = cv2.waitKey(1) & 0xFF

        # User is ready and presses 's'
        if current_mode == MODE_WAITING and key == ord("s"):
            current_mode = MODE_LEFT_RIGHT

        # Save a screen grab of the frame named for the current time
        elif key == ord("p"):
            name = f"{datetime.datetime.now().time().strftime('%H_%M_%S')}.jpg"
            cv2.imwrite(os.path.join(os.getcwd(), name), frame)

        # Start/stop recording
        elif key == ord("r"):
            recording = not recording
            red_circle = recording

        # Quit
        elif key == ord("q"):
            break

    # If frames have been recorded, save them as a gif
    if len(all_frames) > 0:
        gif_path = os.path.join(os.getcwd(), "recording.gif")
        save_gif(all_frames, gif_path)

    # Tidy up CV2 and the VideoStream
    cv2.destroyAllWindows()
    stream.stop()


def register_user() -> None:
    """
    Convenience wrapper for adding a user
    """
    main(MODE_WAITING)


def authenticate_user(recognizer: cv2.face.LBPHFaceRecognizer) -> None:
    """
    Convenience wrapper for starting facial recognition
    """
    main(MODE_AUTHENTICATING, recognizer=recognizer)

# Face Detection

## Haar Cascade Classifier
<p>
    OpenCV provides a pre-trained model, <a href="https://github.com/kipr/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml">haarcascade_frontalface_default.xml</a>, for use with <a href="https://docs.opencv.org/4.8.0/db/d28/tutorial_cascade_classifier.html">cv2.CascadeClassifier</a>.
    The Haar Cascade was proposed by Paul Viola and Michael Jones in their 2001 paper <a href="https://www.khoury.northeastern.edu/home/vip/teach/DMcourse/3_dim_reduction/notes_slides/viola-cvpr-01.pdf">Rapid Object Detection using a Boosted Cascade of Simple Features</a>. There's an excellent explanation in <a href="https://medium.com/analytics-vidhya/haar-cascades-explained-38210e57970d">this</a> article, but to summarise, the technique computes <a href="https://en.wikipedia.org/wiki/Haar-like_feature">Haar features</a> over an <a href="https://en.wikipedia.org/wiki/Summed-area_table">integral image</a>, and uses a modified <a href="https://en.wikipedia.org/wiki/AdaBoost">AdaBoost</a> create a series of cascading classifiers. At each stage of the cascade, if a region is determined not to contain an object, it does not progress to the next stage. This is a type of attentional mechanism to make the algotithm focus on the regions most likely to contain a face.
</p>

<p>
    The <em>register_user()</em> method in the next cell starts the video capture, with an example in this gif. The face detected by the Haar Cascade is surrounded with a bounding box, and mug shots are taken randomly to get a variety of angles.<br/>
    <img src="register_demo.gif"/>
    
</p>
<p>
    Here are a sample of the captured images which will be used to train the recognizer.
    <img src="mug_shot_samples.png"/>
</p>

In [None]:
register_user()

# Facial Recognition

## Local Binary Pattern Histogram (LBPH)
<p>
The LBPH face recognizer, implemented in OpenCV as <a href="https://docs.opencv.org/4.8.0/df/d25/classcv_1_1face_1_1LBPHFaceRecognizer.html">cv2.face.LBPHFaceRecognizer</a>, uses a combination of texture analysis and pattern recognition to recognize and differentiate faces.

LBPH captures the unique texture of a person's face by breaking down the image into smaller regions and analyzing the patterns within those regions. These patterns, represented as binary values, are then used to construct histograms for each face in a training dataset.

When it comes to recognizing a face, LBPH computes the pattern and histogram for a test image and compares it to the patterns and histograms of known faces in the training dataset.

LBPH particularly is robust enough to handle variations in lighting, facial expressions, and minor changes in pose.

Please check out <a href="https://towardsdatascience.com/face-recognition-how-lbph-works-90ec258c3d6b">this</a> article for a more in-depth explanation.
</p>
<p>
There are a <a href="https://docs.opencv.org/4.8.0/df/d25/classcv_1_1face_1_1LBPHFaceRecognizer.html#ac33ba992b16f29f2824761cea5cd5fc5">few parameters</a> to consider when creating the recognizer, but let's stick with the defaults because they affect performance in fairly predictable ways.
</p>

In [None]:
def create_recognizer(face_data_path: str = None) -> cv2.face.LBPHFaceRecognizer:
    """
    Return a new LBPHFaceRecognizer.
    If a path for data is given, load it
    """

    recognizer = cv2.face.LBPHFaceRecognizer_create()
    if face_data_path:
        recognizer.read(face_data_path)
    return recognizer

## Training

<p>
    The mug shots saved in the earlier step are loaded and the integer label extracted from the filename.
    The <a href="https://docs.opencv.org/4.8.0/dd/d65/classcv_1_1face_1_1FaceRecognizer.html#ac8680c2aa9649ad3f55e27761165c0d6"><i>train()</i></a> method (inherited from <a href="https://docs.opencv.org/4.8.0/dd/d65/classcv_1_1face_1_1FaceRecognizer.html">cv2.face.FaceRecognizer</a>) accepts the mug shots taken earlier and to compute the LBPH and creates a YAML file containing the histograms and the labels.

</p>

In [None]:
def load_users() -> (List[np.array], np.array):
    """
    Return the user images and labels
    """

    faces = []
    labels = []

    for file in sorted(os.listdir(SAVE_PATH)):
        match = re.search(PIC_FILE_RE, file)
        if match:
            # Read in grayscale image and convert to numpy array
            img = cv2.imread(os.path.join(SAVE_PATH, file), cv2.IMREAD_GRAYSCALE)
            img_numpy = np.array(img, "uint8")
            faces.append(img_numpy)

            # Get the ID of the user from the filename
            u_id = int(match.groups()[0])
            labels.append(u_id)

    return faces, np.array(labels)


# Load the user images and labels
faces, labels = load_users()

# Create and train the recognizer
recognizer = create_recognizer()
recognizer.train(faces, labels)

# Write the recognizer data to file
# recognizer.write(FACE_DATA)

## Authenticating
<p>
This step loads the previously computed LBPH and compares them to faces detected in the webcam.
</p>

![f](recognition_demo.gif)

In [None]:
# Create the recognizer with the trained data
recognizer = create_recognizer(FACE_DATA)

authenticate_user(recognizer)

# Experiments

## Photo of User

What's the prediction confidence when persented with a photo of a registered user?

## Number of neighbours

# Conclusion