# Import Libraries

In [1]:
import os
import cv2
import numpy as np
import matplotlib as plt

# Load Dataset

In [19]:
def load_images(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print("error load image")
        return None, None
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img, gray

dataset_path = 'dataset/images'
images = []
labels = []

for root, dirs, files in os.walk(dataset_path):
    if len(files) == 0:
        continue
    for f in files:
        _,image = load_images(os.path.join(root, f))
        if image is None:
            continue
        images.append(image)
        labels.append(root.split('/')[-1])

# check data length
print(f"label's length is {len(labels)}")
print(f"image's length is {len(images)}")


label's length is 70
image's length is 70


# Face Detection

In [23]:
# define model
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# detect face funct
def detect_faces(img, scale_factor=1.1, min_neighbors = 5, min_size=(30, 30)):
    faces = face_cascade.detectMultiScale(
        img,
        scaleFactor = scale_factor,
        minNeighbors = min_neighbors,
        minSize = min_size,
    )
    return faces

# crop face funt
def crop_faces(img, faces, return_all = False):
    cropped_faces = []
    selected_faces = []
    if len(faces)>0:
        if return_all:
            for x, y, w, h in faces:
                selected_faces.append((x, y, w, h))
                cropped_faces.append(img[y:y+h, x:x+w])
        else:
            x, y, w, h = max(faces, key=lambda rect: rect[2]*rect[3])
            selected_faces.append((x, y, w, h))
            cropped_faces.append(img[y:y+h, x:x+w])
    return cropped_faces, selected_faces

# Face Recognition

In [26]:
# resize and Flatten
face_size = (128, 128)

def resize_and_flatten(face):
    face_resized = cv2.resize(face, face_size)
    face_flattened = face_resized.flatten()
    return face_flattened

## Prepare Training Testing Data

In [31]:
# make dataset
x = []
y = []

for image, label in zip(images, labels):
    faces = detect_faces(image)
    cropped_faces, _ = crop_faces(image, faces)
    if len(cropped_faces) > 0:
        face_flattened = resize_and_flatten(cropped_faces[0])
        x.append(face_flattened)
        y.append(label)

x = np.array(x)
y = np.array(y)

In [32]:
# split train and test
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state = 42, stratify=y)

In [34]:
# preprocess image using mean centering 
from sklearn.base import BaseEstimator, TransformerMixin

class MeanCentering(BaseEstimator, TransformerMixin):
    def fit(self, x, y=None):
        self.mean_face = np.mean(x, axis=0)
        return self
    
    def transform(self, x):
        return x-self.mean_face