# Face Recognition Using Eigenfaces

The following algorithm takes people's images with different emotions, and then uses the concept of Eigneface to find out similarity among them and further identify them. 

The image dataset is provided with repo, which is taken from [Yale Face Dataset](https://www.face-rec.org/databases/). Upload the whole image dataset here, and the following code generates a training and (exclusive) test set out of it. 

Note: The dataset used here is Yale Face Database (contains 165 grayscale images in GIF format of 15 individuals)

In [4]:
from google.colab import files
uploaded = files.upload()

Saving subject01.centerlight to subject01.centerlight
Saving subject01.glasses to subject01.glasses
Saving subject01.happy to subject01.happy
Saving subject01.leftlight to subject01.leftlight
Saving subject01.noglasses to subject01.noglasses
Saving subject01.normal to subject01.normal
Saving subject01.rightlight to subject01.rightlight
Saving subject01.sad to subject01.sad
Saving subject01.sleepy to subject01.sleepy
Saving subject01.surprised to subject01.surprised
Saving subject01.wink to subject01.wink
Saving subject02.centerlight to subject02.centerlight
Saving subject02.glasses to subject02.glasses
Saving subject02.happy to subject02.happy
Saving subject02.leftlight to subject02.leftlight
Saving subject02.noglasses to subject02.noglasses
Saving subject02.normal to subject02.normal
Saving subject02.rightlight to subject02.rightlight
Saving subject02.sad to subject02.sad
Saving subject02.sleepy to subject02.sleepy
Saving subject02.surprised to subject02.surprised
Saving subject02.win

Creating separate groups of images to be used as training and test sets.

In [12]:
!mkdir data
!mv subject* data/.
!rm -r training_data/
!mkdir training_data
!touch training_data/info.txt
!rm -r test_data/
!mkdir test_data
!touch test_data/test.txt

mkdir: cannot create directory ‘data’: File exists
mv: cannot stat 'subject*': No such file or directory


In [13]:
import os 
import shutil
import numpy
import cv2
from random import randint

all_img = open("training_data/info.txt",'w')
all_sub = []
a = 1
test_img = open("test_data/test.txt",'w')
test_sub = []
t = 1

# The image of each of the subjects are named as subjecti.emotions.png
emotions = ['centerlight','glasses','happy','leftlight','noglasses','normal','rightlight','sad','sleepy','surprised','wink']
for i in ["subject01.","subject02.","subject03.","subject04.","subject05.","subject06.","subject07.","subject08.","subject09.","subject10.","subject11.","subject12.","subject13.","subject14.","subject15."]:
  test = [randint(0,10),randint(0,10)]
  
  #Randomly selecting the images for the test set
  if test[0]==test[1]: 
    if test[0]==10: test[1] = 9
    else: test[1] = test[0]+1
  for j in range(11):
    if j in test:
      cap = cv2.VideoCapture(os.path.join('data',i+emotions[j]))
      ret, frame = cap.read()
      if (ret):
        #The algorithm works on gray scale images 
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cv2.imwrite('test_data/'+str(t)+'.jpg',gray)
        test_sub.append(i[7:9]+"\n")
        t = t+1
    else:
      cap = cv2.VideoCapture(os.path.join('data',i+emotions[j]))
      ret, frame = cap.read()
      if (ret):
        #The algorithm works on gray scale images 
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cv2.imwrite('training_data/'+str(a)+'.jpg',gray)
        all_sub.append(i[7:9]+"\n")
        a = a+1
      
all_img.writelines(all_sub)
all_img.close()
test_img.writelines(test_sub)
test_img.close()
print(str(a+t-2)+" images in total, are loaded")

165 images in total, are loaded


Eigenfaces are extracted from the training data set and used to identify the images further. Principal component analysis is used to reduce the data in computation without compromising the output quality to a large extent. 

The number of components to be retained is decided internally based on the thershold provided, which is percentage (i.e. out of 100) value of the amount of information to be retained, and is calculated as the sum over the retained components, of the fractional magnitude of eigenvalue.  

In [14]:
import os 
import numpy as np
import cv2
from google.colab.patches import cv2_imshow

#Getting the Viola Jones Classifier - pretrained classifier
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# The test can be performed over either of the training or test sets
r_file = open('test_data/test.txt','r')
# r_file = open('training_data/info.txt','r')
result = r_file.readlines()
r_file.close()

l_file = open('training_data/info.txt','r')
labels = l_file.readlines()
l_file.close()

#Reading the dataset of images
def load_img(folder):
  images = []
  num = len(os.listdir(folder))
  for i in range(num-1):
    img = cv2.imread(folder+'/'+str(i+1)+'.jpg')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    images.append(img)
  return images

#Loading the dataset
images = load_img('training_data')
print("Training Dataset Loaded ........")

#Detection of faces using Voila Jones Cascade Classifier
def detect_face(img):
  # cv2_imshow(img)
  roi = []
  faces = face_cascade.detectMultiScale(img, 1.3, 5)
  if(np.size(faces) > 0):
    #If face detected, pass only the region of interest
    for (x,y,w,h) in faces:
      img = img[y:y+h, x:x+w]
      roi = [x,y,w,h]
    return img,roi
  else:
    #If no face detected
    return [],roi 
    
faces = []
rect = []
size = []
for img in images:
  face,roi = detect_face(img)
  if(np.size(face) > 0):
    faces.append(face)
    rect.append(roi)
    size.append(np.size(face,1))
frame_size = np.max(size)

# The cropped face area sizes may differ, thus resizing them to the same size
for i in range(len(faces)):
  faces[i] = cv2.resize(faces[i],(frame_size,frame_size))  
  faces[i] = faces[i].reshape(frame_size*frame_size,1)

print("Faces detected .........")

# cv2_imshow(faces[10])
# comment out the reshaping command, to check the cropped out image 

def my_face_recognizer(faces, labels):
  # Stack of n^2 by 1 matrices, m in number 
  faces = np.array(faces)
  n = np.size(faces,1)
  m = np.size(faces,0)
  
  faces = faces[:,:,0].T
  
  # Finding the sample mean (Average face)
  frame_size = int(np.sqrt(n))
  avg_face = np.average(faces, axis=1).reshape((n,1))
  # avg_face = avg_face.reshape((frame_size,frame_size))
  # The average face formed can be seen here
  # cv2_imshow(avg_face)

  print("Training Started ...........")
  # Normalising the faces
  norm_faces = faces-avg_face

  # Finding the Covariance matrix
  # Performing A.T * A so that the size of the matrix is limited to M^2
  # and does not blow up to N^2
  Cov = np.matmul(norm_faces.T, norm_faces)
  # Using numpy function to compute the eigenvalues and eigenvectors of the reduced covariance matrix computed here
  values, vectors = np.linalg.eig(Cov)
  vectors = np.append(vectors, np.reshape(values,(np.size(vectors,0),1)), axis=1)
  vectors = vectors[np.argsort(vectors[:, -1])[::-1]]
  values = vectors[:,-1]
  vectors = vectors[:,:-1]
  
  for i in range(len(values)-1):
    if(values[i]<values[i+1]):
      print("Error!!!!!!!!!!!")

  # Computing the eigenvectors of the actual covariance matrix
  vectors = np.matmul(norm_faces,vectors)
  
  #Deciding k
  tot_eig = np.sum(values)
  weightage = [(i/tot_eig)*100 for i in values]
  coverage = np.cumsum(weightage)
  threshold = 97
  # %coverage of the variance, decides the number of Principal components to be used for the Dimensionality reduction
  k = 1
  while(coverage[k-1]<threshold):
    k = k+1
  print("K is decided to be: "+str(k))

  values = values[len(values)-k:]
  vectors = vectors[:,np.size(vectors,1)-k:]

  weights = np.matmul(vectors.T,norm_faces)
  # weights = np.linalg.lstsq(vectors, norm_faces, rcond=-1)[0]
  #Instead of solving the matrix equation, a simple matrix multiplication would also work, as eigenvectors involved are orthogonal to each other

  # trained = np.matmul(vectors, weights)
  trained_recognizer_model = {}
  trained_recognizer_model["Average face"] = avg_face
  trained_recognizer_model["Weights"] = weights
  trained_recognizer_model["Eigenvector"] = vectors
  trained_recognizer_model["Eigenvalue"] = values
  return trained_recognizer_model

trained_recognizer_model = my_face_recognizer(faces, labels)
print("Training completed ...........") 

def recognize(face,trained_recognizer_model):
  # cv2_imshow(face)
  face = face.reshape(frame_size*frame_size,1)
  norm_face = face - trained_recognizer_model["Average face"]
  test_weights = np.matmul(trained_recognizer_model["Eigenvector"].T,norm_face)
  # test_weights = np.linalg.lstsq(trained_recognizer_model["Eigenvector"], norm_face, rcond=-1)[0]
  dis = np.linalg.norm(trained_recognizer_model["Weights"]-test_weights, axis=0)
  match = np.where(dis == np.min(dis))
  return match
  
def test(test_data):
  test_images = load_img(test_data)
  total = 0
  correct = 0
  for i in range(len(test_images)):
    img = test_images[i]
    face,roi = detect_face(img)
    if (np.size(face) > 0):
      total = total+1
      face = cv2.resize(face,(frame_size,frame_size))
      name = recognize(face,trained_recognizer_model)
      # cv2_imshow(face)
      # The actual result, and the predicted one, are printed by the following command
      print(result[i] + " : " + labels[name[0][0]])
      if (result[i] == labels[name[0][0]]):
         correct = correct+1
  else:
      print("No face in the image")
  return correct/total

print("Testing started ..........")
accuracy = test('test_data')
# accuracy = test('training_data')
print("Testing completed............")
print("Accuracy is: "+str(accuracy*100)+" %")

Training Dataset Loaded ........
Faces detected .........
Training Started ...........
K is decided to be: 73
Training completed ...........
Testing started ..........
01
 : 01

01
 : 01

02
 : 02

02
 : 02

03
 : 03

03
 : 08

04
 : 04

04
 : 03

05
 : 05

05
 : 05

06
 : 02

06
 : 06

07
 : 07

07
 : 03

08
 : 08

08
 : 10

09
 : 09

09
 : 09

10
 : 03

10
 : 10

11
 : 11

11
 : 11

12
 : 12

12
 : 12

13
 : 13

14
 : 14

14
 : 04

15
 : 15

15
 : 15

No face in the image
Testing completed............
Accuracy is: 75.86206896551724 %
