# Face Recognition with PCA

## Import the libraries

In [1]:
import zipfile
import cv2
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA

In [5]:
ls

faces.zip  [0m[01;34msample_data[0m/


In [12]:
ls

faces.zip  [0m[01;34msample_data[0m/


In [8]:
pwd

'/content'

# Read the facial images from the zip file

In [None]:
faces = {}
with zipfile.ZipFile("faces.zip") as facezip:
    for filename in facezip.namelist():
        if not filename.endswith(".pgm"):
            continue # not a face picture
        with facezip.open(filename) as image:
            # If we extracted files from zip, we can use cv2.imread(filename) instead
            faces[filename] = cv2.imdecode(np.frombuffer(image.read(), np.uint8), cv2.IMREAD_GRAYSCALE)

## Display some of the images

In [None]:
# Show sample faces using matplotlib
fig, axes = plt.subplots(4,4,sharex=True,sharey=True,figsize=(8,10))
faceimages = list(faces.values())[-16:] # take last 16 images
for i in range(16):
    axes[i%4][i//4].imshow(faceimages[i], cmap="gray")
print("Showing sample faces")
plt.show()


In [None]:

faceshape = list(faces.values())[0].shape
print('Dimension of the facial image:', faceshape)

classes = set(filename.split("/")[0] for filename in faces.keys())

print('Number of classes : ', len(classes))
print(" Number of images : ", len(faces))


#### Take all images as trainning data except the last image of folder 39 and all images of folder 40 (test images).

In [16]:

facematrix = []
facelabel = []

for key, val in faces.items():
  if key.startswith("s40/"):
    continue
  if key == "s39/10.pgm":
    continue
  facematrix.append(val.flatten())
  facelabel.append(key.split("/")[0])
  

In [17]:
# Create a NXM matrix, N is the number of images
# M the total pixels per image

facematrix = np.array(facematrix)

## Apply PCA

In [None]:
pca = PCA().fit(facematrix)

In [None]:
n_components = 50
eigenfaces = pca.components_[:n_components]

## Plot the explained variance to check how many components to retain

In [None]:
#print(pca.explained_variance_ratio_)
# Determine explained variance using explained_variance_ration_ attribute
exp_var_pca = pca.explained_variance_ratio_

# Cumulative sum of eigenvalues; This will be used to create step plot
# for visualizing the variance explained by each principal component.
cum_sum_eigenvalues = np.cumsum(exp_var_pca)
# Create the visualization plot
plt.bar(range(0,len(exp_var_pca)), exp_var_pca, alpha=0.5, align='center', label='Individual explained variance')
plt.step(range(0,len(cum_sum_eigenvalues)), cum_sum_eigenvalues, where='mid',label='Cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal component index')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

In [None]:
# Show the first 16 eigenfaces
fig, axes = plt.subplots(4,4,sharex=True,sharey=True,figsize=(8,10))
for i in range(16):
    axes[i%4][i//4].imshow(eigenfaces[i].reshape(faceshape), cmap="gray")
print("Showing the eigenfaces")
plt.show()

## Projecting data to lower dimension

In [None]:
# Generate weights as KXN matrix 
weights = eigenfaces @ (facematrix - pca.mean_).T
weights.shape

## Face Match

## Correct Match

In [None]:
query = faces["s39/10.pgm"].reshape(1, -1)
query_weights = eigenfaces @ (query - pca.mean_).T

euclidean_distance = np.linalg.norm(weights - query_weight,axis=0)

best_match = np.argmin(euclidean_distance)
print("Best match %s with Euclidean distance %f" % (facelabel[best_match], euclidean_distance[best_match]))

In [None]:
# Visualize
fig, axes = plt.subplots(1,2,sharex=True,sharey=True,figsize=(8,6))
axes[0].imshow(query.reshape(faceshape), cmap="gray")
axes[0].set_title("Query")
axes[1].imshow(facematrix[best_match].reshape(faceshape), cmap="gray")
axes[1].set_title("Best match")
plt.show()

## Mismatch

In [None]:
query = faces["s40/1.pgm"].reshape(1, -1)
query_weights = eigenfaces @ (query - pca.mean_).T

euclidean_distance = np.linalg.norm(weights - query_weight,axis=0)

best_match = np.argmin(euclidean_distance)
print("Best match %s with Euclidean distance %f" % (facelabel[best_match], euclidean_distance[best_match]))

In [None]:
# Visualize
fig, axes = plt.subplots(1,2,sharex=True,sharey=True,figsize=(8,6))
axes[0].imshow(query.reshape(faceshape), cmap="gray")
axes[0].set_title("Query")
axes[1].imshow(facematrix[best_match].reshape(faceshape), cmap="gray")
axes[1].set_title("Best match")
plt.show()