<a href="https://colab.research.google.com/github/stschoberg/deeplearning/blob/main/facial_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Facial Recognition Using MTCNN, FaceNet, and K-Means Clustering

This notebook uses a combination of transfer learning with CNNs and classical machine learning algorithms to groups faces of the same identity together. Given a dataset of images, we want to be able to cluster individuals together across the entire set. In other words, we want to be able to say with a high degree of accuracy that person A is present in picture 1, 32, 53, 34, and 87. Its important to note early on that this is an unsupervised learning problem. The algorithm must be able to group faces with the same identity together without any labels. 

Below is a brief overview of the process needed to accomplish this.



1.   Determine the location of faces in all photos from the dataset.
2.   Using the coordinates of each face, extract those faces from the original photos.
3. Create an embedding of that face (a feature vector) that is unique to that face. 
4. Cluster those feature vectors together to create groups of the same person. 



In [None]:
!pip3 install mtcnn
!pip3 install opencv-contrib-python
!pip3 install pillow
!pip install git+git://github.com/PnS2019/pnslib.git
!pip3 install deepface

In [11]:
from google.colab import drive # Import photos from Google Photos
from mtcnn.mtcnn import MTCNN # Facial detection
from keras.models import load_model # Load pretrained models into tf (transfer learning)
from PIL import Image # Image manipulation package
import numpy as np
from pnslib import utils # Download certain feature detection models (eyes, mouths)
from matplotlib import pyplot as plt
import cv2

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
facenet = load_model('/content/gdrive/MyDrive/cv/facenet_keras.h5')
print(facenet.inputs)
print(facenet.outputs)

[<KerasTensor: shape=(None, 160, 160, 3) dtype=float32 (created by layer 'input_1')>]
[<KerasTensor: shape=(None, 128) dtype=float32 (created by layer 'Bottleneck_BatchNorm')>]


In [12]:
# Takes in a filepath, opens the image, and returns the pixels as an np array
def open_prepare_image(file):
  image = Image.open(file).convert('RGB')
  pixels = np.asarray(image)

  return pixels

In [13]:
# Reads in pxl array, outputs bounding box of all detected faces [x, y, width, height]
def get_face_boxes(pxls):
  detector = MTCNN()
  results = detector.detect_faces(pxls)
  confident = filter(lambda res: res['confidence'] > 0.95, results)
  return [box_coords['box'] for box_coords in confident]

In [14]:
# Converts bounding box coords [x, y, width, height] to two points
def calc_faces_coords(face_boxes):
  face_coords = []

  for face in face_boxes:
    x1, y1, width, height = face
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    face_coords.append([x1, y1, x2, y2])

  return face_coords


In [15]:
# Given the coordinates of a face and pxls, extracts the face from the image and resizes it accordingly
def faces_from_coords(face_coords, pxls, required_size=(160,160)):
  faces = []
  for coord in face_coords:
    x1, y1, x2, y2 = coord
    face = pxls[y1:y2, x1:x2]
    image = Image.fromarray(face)
    image = image.resize(required_size)
    faces.append(np.asarray(image))

  return faces

In [16]:
y = np.array([
     'alec', 'julia', 'erin', 'u', 'u',
     'u', 'u', 'u', 'u', 'u',
     'u', 'ross', 'ben', 'alec', 'ben',
     'erin', 'julia', 'pat', 'tony', 'sam',
     'u', 'collin', 'u', 'u', 'u',
     'u', 'u', 'alec', 'ben', 'julia',
     'collin', 'erin', 'tony', 'u', 'pat',
     'u', 'u', 'sam', 'u', 'u',
     'u', 'alec', 'tony', 'pat', 'ben',
     'erin', 'u', 'sam', 'u', 'collin',
     'julia', 'u', 'u', 'u', 'u',
     'sam', 'erin', 'alec', 'remi', 'sam',
     'alec', 'erin', 'tony', 'julia', 'max',
     'sam', 'collin', 'sam', 'tony', 'erin',
     'julia', 'alec', 'max', 'u', 'u',
     'alec', 'sam', 'sam', 'sam', 'amy',
     'collin', 'sam', 'max', 'alec', 'u',
     'erin', 'collin', 'sam', 'max', 'u',
     'alec', 'u', 'sam', 'erin', 'u',
     'max', 'max', 'sam', 'max', 'u',
     'erin', 'sam', 'jack', 'max', 'u',
     'sam', 'benny', 'erin', 'max', 'sam',
     'jack', 'max', 'sam', 'max', 'u',
     'dillon', 'ross', 'max', 'dillon', 'ross',
     'sam', 'sam', 'jack', 'max', 'ross',
     'max', 'sam', 'jack', 'max', 'u',
     'ross', 'max', 'sam', 'ross', 'ross',
     'u', 'alec', 'ross', 'sam', 'pat',
     'sam', 'benny', 'u', 'julia', 'sam',
     'sam', 'benny', 'julia', 'benny', 'sam',
     'julia', 'pat', 'sam', 'benny', 'julia',
     'sam', 'benny', 'alec', 'max', 'erin',
     'collin', 'phillipe', 'ehaab', 'sam', 'ross',
     'julia', 'pat', 'u', 'tony', 'tony',
     'u', 'benny', 'collin', 'ross', 'sam',
     'max', 'phillipe', 'erin', 'ehaab', 'alec',
     'pat', 'julia', 'jack', 'tony', 'u',
     'collin', 'phillipe', 'alec', 'ehaab', 'max',
     'julia', 'benny', 'erin', 'sam', 'ross',
     'pat', 'tony', 'jack', 'max', 'sam',
     'collin', 'alec', 'erin', 'phillipe', 'ehaab',
     'ross', 'julia', 'sam', 'max', 'benny',
     'collin', 'pat', 'u', 'tony', 'sam', 
     'erin', 'max', 'sam', 'alec', 'ehaab',
     'collin', 'ross', 'benny', 'julia', 'phillipe',
     'pat', 'u', 'tony', 'jack', 'sam',
     'collin', 'erin', 'max', 'sam', 'alec',
     'ehaab', 'collin', 'ross', 'benny', 'julia',
     'phillipe', 'pat', 'u', 'tony', 'jack',
     'sam', 'collin', 'erin', 'ehaab', 'max',
     'benny', 'sam', 'ehaab', 'alec', 'pat',
     'tony', 'ross', 'jack', 'julia', 'collin',
     'u', 'ehaab', 'alec', 'sam', 'ross',
     'max', 'erin', 'pat', 'julia', 'jack',
     'tony', 'benny', 'jack', 'u', 'u'])


This block below extracts all the faces from the provided images. It stores faces as a pixel array in *faces* and the bounding box for each face in *face_boxes_all*. 

In [None]:
imgs = !ls '/content/gdrive/MyDrive/cv/imgs'
imgs = ' '.join(imgs).split()
faces = []
face_boxes_all = []
for img in imgs:
  print(img)
  pxls = open_prepare_image('/content/gdrive/MyDrive/cv/imgs/' + img)
  face_boxes = get_face_boxes(pxls)
  face_coords = calc_faces_coords(face_boxes)
  faces_final = faces_from_coords(face_coords, pxls)
  faces = faces + faces_final
  face_boxes_all = face_boxes_all + face_boxes

faces = np.asarray(faces)
face_boxes_all = np.asarray(face_boxes_all)

Some faces in the images are unidentifiable due to poor image quality. The unsupervised algorithm should still be able to cluster these however we have no way to determine if they were classified correctly, so we remove them. 

In [18]:
unknowns = np.argwhere(y=='u').flatten()
y_identifiable = np.delete(y, unknowns)
faces_identifiable = np.delete(faces, unknowns, axis=0)
face_boxes_identifiable = np.delete(face_boxes_all, unknowns, axis=0)

In [19]:
faces_float = faces_identifiable.astype('float32')
mean, std = faces_float.mean(), faces_float.std()
faces_standardized = (faces_float - mean)/std

yhat = facenet.predict(faces_standardized)

In [24]:
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import homogeneity_score

kmeans = KMeans(n_clusters=len(set(y_identifiable)),init='k-means++',n_init=100, random_state=42).fit_predict(yhat)
kmeans

array([ 2,  5, 12,  2,  2, 15, 16,  9,  6,  8, 12,  3, 13, 15, 16,  6, 13,
        9, 12,  8,  3, 15, 12,  8, 16,  9,  3, 13,  6,  3,  9, 15,  2,  3,
       15,  9, 12,  6,  7,  3,  2,  3, 12,  9,  6, 15,  7, 15,  3,  3,  3,
        2, 14,  3,  7, 15,  2,  2,  3,  7, 15,  3,  9,  5,  7,  3,  7,  5,
       13,  2,  2,  3,  4,  9,  7, 13,  2,  2, 13,  2,  2,  2,  5,  5,  2,
        2, 13,  2,  2,  5,  5, 13,  2,  2,  5, 14,  2,  5,  5,  2,  2,  3,
        2,  3,  4,  2,  3,  3,  4,  2,  4,  3,  2,  2,  3,  4,  2,  3,  4,
       11,  7,  9, 14,  5,  1,  0, 10,  6,  8,  5,  2, 14, 14, 10,  0,  7,
        5,  9,  1, 11,  8,  6,  5,  5, 14,  5, 11,  1,  7,  6, 14,  9,  0,
       10,  8,  5,  5,  7,  3,  2, 11,  9,  5,  1, 10,  6,  0,  5, 14, 14,
        8,  5,  3,  9,  7,  0, 11,  1, 14, 10, 14,  6,  5,  8,  5,  5,  3,
       14,  9,  7,  0, 11,  1, 14, 10, 14,  6,  5,  8,  5,  5,  3, 14,  9,
        1,  7, 14,  0,  5, 11,  8,  5, 10,  5,  5, 14,  1, 11,  0, 10,  7,
        5,  8,  5,  2,  5

In [25]:
 homogeneity_score(y_identifiable, kmeans)

0.7014832262841287

In [1]:
show_cluster(np.argwhere(kmeans==0).flatten())

NameError: ignored

In [26]:
for cluster in range(len(set(y_identifiable))):
  print('cluster: ', cluster)
  print(y_identifiable[np.where(kmeans == cluster)])

cluster:  0
['sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam']
cluster:  1
['ehaab' 'ehaab' 'ehaab' 'ehaab' 'ehaab' 'ehaab' 'ehaab' 'ehaab']
cluster:  2
['alec' 'ross' 'ben' 'remi' 'collin' 'amy' 'erin' 'collin' 'jack' 'max'
 'jack' 'max' 'max' 'dillon' 'ross' 'ross' 'sam' 'jack' 'max' 'jack' 'max'
 'sam' 'alec' 'ross' 'pat' 'julia' 'julia' 'julia' 'pat' 'julia' 'tony'
 'collin' 'jack']
cluster:  3
['sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam'
 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam' 'sam'
 'sam' 'sam']
cluster:  4
['benny' 'benny' 'benny' 'benny' 'benny' 'benny']
cluster:  5
['julia' 'max' 'erin' 'max' 'dillon' 'ross' 'max' 'ross' 'ross' 'ross'
 'phillipe' 'tony' 'phillipe' 'jack' 'tony' 'phillipe' 'tony' 'jack'
 'phillipe' 'max' 'tony' 'phillipe' 'tony' 'jack' 'phillipe' 'tony' 'jack'
 'ehaab' 'tony' 'jack' 'julia' 'erin' 'julia' 'tony' 'jack']
cluster:  6
['julia' 'julia' 'julia' 'julia' 'julia' 'julia' 'julia' 'julia' 'julia'
 'ju

It appears as if clusters 2, 5, and 14 have the most incorrect classifications. Lets investigate those further. 

In [2]:
cluster2 = np.argwhere(kmeans==2).flatten()
cluster5 = np.argwhere(kmeans==5).flatten()
cluster14 = np.argwhere(kmeans==14).flatten()

def show_cluster(cluster):
  plt.figure(figsize = (20,160))

  for ix, face in enumerate(cluster):
    plt.subplot(55, 5, ix+1)
    plt.axis('off')
    plt.imshow(faces_identifiable[face])

  plt.show()

NameError: ignored

Cluster2 seems to contain high res images, but many are flipped or rotated. Per the [Facenet paper](https://arxiv.org/pdf/1503.03832.pdf), the algorithm works best when faces are aligned (all in the same rotation). As you'll see below, face alignment can be a tricky problem. 

In [None]:
show_cluster(cluster2)

Cluster5 contains lots of low-res and rotated images. There isn't much we can do about the low quality, but maybe rotating some of the images to the proper orientation may help. The same goes for Cluster14.

In [None]:
show_cluster(cluster5)

In [None]:
show_cluster(cluster14)

In [None]:
import dlib
predictor5 = dlib.shape_predictor('/content/gdrive/MyDrive/cv/shape_predictor_5_face_landmarks.dat')
predictor68 = dlib.shape_predictor('/content/gdrive/MyDrive/cv/shape_predictor_68_face_landmarks.dat')

landmarks = []

for ix, face in enumerate(faces_identifiable):
  x, y, w, h = face_boxes_identifiable[ix]
  dlib_rect = dlib.rectangle(0, 0, 160, 160)
  detected_landmarks = predictor68(face, dlib_rect)
  landmarks.append(detected_landmarks.parts())

landmarks_arr = np.array([[(p.x, p.y) for p in face] for face in landmarks])
landmarks_arr

In [None]:
plt.figure(figsize = (20,160))

for ix, face in enumerate(faces_identifiable):

  for landmark in landmarks_arr[ix]:
    pos = (landmark[0], landmark[1])
    cv2.circle(face, pos, 5, (255,0,0))


  plt.subplot(55, 5, ix+1)
  plt.axis('off')
  plt.imshow(face)

plt.show()



In [None]:
from matplotlib import pyplot as plt
mouth_detector = cv2.CascadeClassifier(utils.get_haarcascade_path('haarcascade_mcs_mouth.xml'))
eye_detector = cv2.CascadeClassifier(utils.get_haarcascade_path('haarcascade_eye.xml'))
eye_glass_detector = cv2.CascadeClassifier(utils.get_haarcascade_path('haarcascade_eye_tree_eyeglasses.xml'))
img_gray = cv2.cvtColor(faces_identifiable[3], cv2.COLOR_BGR2GRAY)

eyes = eye_glass_detector.detectMultiScale(img_gray)
# mouth = mouth_detector.detectMultiScale(img_gray)
# five = predictor(gray)

index = 0
for (eye_x, eye_y, eye_w, eye_h) in eyes:
   if index == 0:
      eye_1 = (eye_x, eye_y, eye_w, eye_h)
   elif index == 1:
      eye_2 = (eye_x, eye_y, eye_w, eye_h)
 
   cv2.rectangle(faces_identifiable[3],(eye_x, eye_y),(eye_x+eye_w, eye_y+eye_h), (255,0,0), 2)
   index = index + 1

plt.imshow(faces_identifiable[3])

In [None]:
eyes = []
mouth = []
for face in faces_identifiable:
  img_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
  eyes.append(eye_detector.detectMultiScale(img_gray, 1.05))

In [None]:
from matplotlib import pyplot as plt
plt.figure(figsize = (20,160))

for ix, face in enumerate(faces):

  for (eye_x, eye_y, eye_w, eye_h) in eyes[ix]:
    cv2.rectangle(face, (eye_x, eye_y),(eye_x+eye_w, eye_y+eye_h), (255,0,0), 2)

  plt.subplot(55, 5, ix+1)
  plt.axis('off')
  plt.imshow(face)

plt.show()

IndexError: ignored

<Figure size 1440x11520 with 0 Axes>

In [None]:
from sklearn.neighbors import NearestNeighbors

nbrs = NearestNeighbors(n_neighbors=19,n_iterations=100, algorithm='ball_tree').fit(yhat)

distances, indices = nbrs.kneighbors(yhat)

TypeError: ignored