In [None]:
import os
import cv2
import numpy as np

In [None]:
# Loading the face cascade classifier
face_cascade = cv2.CascadeClassifier("./lbpcascade_frontalface.xml")

input_dir = "./raw_dataset/"
output_dir = "./preprocessed_dataset/"

# Looping over the input images and processing each one
for name in os.listdir(input_dir):
    if os.path.isdir(os.path.join(input_dir, name)):
        # Create the output directory if it doesn't exist
        output_folder = os.path.join(output_dir, name)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
    for filename in os.listdir(os.path.join(input_dir, name)):
        if filename.endswith('.jpg'):
            # Loading the input image
            img = cv2.imread(os.path.join(input_dir, name, filename))
            
            # print(img.dtype)
            # print(img.shape)
            
            # Checking if the image is empty
            if img is None:
                continue
    
            # Converting the image to grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
            # Detecting the faces in the image using the face cascade classifier
            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
            
            # Skiping the image if no faces are detected
            if len(faces) == 0:
                continue
            
            # Iterating over the detected faces and align them
            for (x, y, w, h) in faces:
                # Croping the face region
                face_img = gray[y:y+h, x:x+w]
        
                # Resizing the face image to a fixed size (e.g., 110x110)
                face_img = cv2.resize(face_img, (110, 110))
        
                # Normalizing the pixel values of the face image to be between 0 and 1
                face_img = face_img.astype(float) / 255.0
        
                # Defining the gamma value
                gamma = 1.5

                # Applying gamma correction
                corrected = cv2.pow(face_img/255.0, gamma)

                # Normalizing the output image
                face_img = cv2.normalize(corrected, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)

                # Saving the aligned face image to the output directory
                output_filename = os.path.join(output_folder, filename)
                cv2.imwrite(output_filename, face_img)

In [None]:
from skimage.feature import hog, local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Setting the input directory and load the pre-processed face images
input_dir = "./preprocessed_dataset/"
batch_size = 128
num_epochs = 5

In [None]:
def image_generator(input_dir):
    for batch_num, name in enumerate(os.listdir(input_dir)):
        if not os.path.isdir(os.path.join(input_dir, name)):
            continue
    
        # Loading the batch of images
        batch_images = []
        for filename in os.listdir(os.path.join(input_dir, name)):
            if filename.endswith(".jpg"):
                # Load the pre-processed face image and extract the label from the filename
                img = cv2.imread(os.path.join(input_dir, name, filename), cv2.IMREAD_GRAYSCALE)
                if img is None: 
                    print(f"Error loading image: {os.path.join(input_dir, name, filename)}")
                    continue
                batch_images.append(img)
            
        # Defining the HOG parameters
        block_size = (8, 8)
        cell_size = (4, 4)
        num_bins = 9

        # Defining the LBP parameters
        radius = 3
        num_points = 8 * radius
    
        # Extracting HOG and LBP features for the batch of images
        hog_features = []
        lbp_features = []
        for img in batch_images:
            # Extracting HOG features
            hog_feat = hog(img, orientations=num_bins, pixels_per_cell=cell_size, cells_per_block=block_size, block_norm='L2-Hys', feature_vector=True)
            hog_features.append(hog_feat)

            # Extracting LBP features
            lbp_feat = local_binary_pattern(img, num_points, radius, method='uniform')
            lbp_features.append(lbp_feat.flatten())
        
        # Combining the HOG and LBP features for the batch
        hog_features = np.array(hog_features)
        lbp_features = np.array(lbp_features)
    
        # print("HOG features shape:", hog_features.shape)
        # print("LBP features shape:", lbp_features.shape)
    
        if len(hog_features) == 0 or len(lbp_features) == 0:
            # Skipping this batch if either of the arrays is empty
            continue

        # Concatenating the arrays along axis 1
        features_batch = np.concatenate([hog_features, lbp_features], axis=1)
    
        # Extracting the label for the batch
        labels_batch = []
        for filename in os.listdir(os.path.join(input_dir, name)):
            if filename.endswith(".jpg"):
                split_filename = filename.split("_")
                if len(split_filename) < 2:
                    print(f"Error processing filename: {os.path.join(input_dir, name, filename)}")
                    continue  # skip filenames that don't contain at least two underscores
                firstname, lastname = split_filename[:2]
                if len(split_filename) == 2:
                    label = f"{firstname}"
                else:
                    label = f"{firstname}_{lastname}"
                labels_batch.append(label)
            
        # if len(labels_batch) != len(batch_images):
        #     continue # skip batches with inconsistent numbers of labels
    
        yield features_batch, labels_batch

In [None]:
#Training the SVM model
svm_model = SVC(kernel='linear')

for epoch in range(num_epochs):
    #Initializing the feature vectors and labels
    face_features = []
    face_labels = []
    
    #Processing each batch of images using the generator
    for features_batch, labels_batch in image_generator(input_dir):
        # Concatenating the features and labels to the running lists
        face_features.append(features_batch)
        face_labels.append(labels_batch)
        
    # Flatten the features and labels arrays
    features_flat = np.concatenate(face_features, axis=0)
    labels_flat = np.concatenate(face_labels, axis=0)
        
    # Splitting the data into training, validation, and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features_flat, labels_flat, test_size=0.2, random_state=42)
        
    # Print the shapes of the training, validation, and testing sets
    print("Training set shape:", X_train.shape)
    print("Testing set shape:", X_test.shape)
        
    # Training the SVM model on the current batch
    svm_model.fit(X_train, y_train)

    # Evaluating the SVM classifier on the training and testing sets
    y_pred_train = svm_model.predict(X_train)
    train_acc = accuracy_score(y_train, y_pred_train)
        
    y_pred_test = svm_model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred_test)
    test_prec = precision_score(y_test, y_pred_test, average='weighted')
    test_rec = recall_score(y_test, y_pred_test, average='weighted')
    test_f1 = f1_score(y_test, y_pred_test, average='weighted')
        
    print(f"Epoch {epoch+1} Batch {batch_num+1} acc: {accuracy_score:.2f} precision: {precision_score:.2f} recall: {recall_score:.2f} f1: {f1_score:.2f}")
        
    # Clear the features and labels arrays for the next batch
    features = []
    labels = []

In [None]:
import matplotlib.pyplot as plt

# Visualize results
fig, axs = plt.subplots(3, 3, figsize=(8, 8))
axs = axs.ravel()

for i, (img, pred_label, true_label) in enumerate(zip(batch_images, y_pred_test, y_test)):
    axs[i].imshow(X_test[i], cmap='gray')
    axs[i].set_title(f"Predicted label: {pred_label}\nTrue label: {true_label}")
    axs[i].axis('off')
    
    if i == 8:  # break out of loop when we've plotted 9 images
        break

plt.tight_layout()

In [None]:
# Save the trained model
import joblib

joblib.dump(svm_model, 'face_recognition_model.pkl')

In [None]:
# Import the necessary libraries.
import cv2
import numpy as np
from sklearn.svm import SVC

# Load the LFW face dataset.
lfw_dataset = np.load('lfw_dataset.npz')

# Preprocess the images.
for i in range(len(lfw_dataset['images'])):
    # Convert the image to grayscale.
    lfw_dataset['images'][i] = cv2.cvtColor(lfw_dataset['images'][i], cv2.COLOR_BGR2GRAY)
    # Normalize the image.
    lfw_dataset['images'][i] = lfw_dataset['images'][i] / 255.0

# Detect the faces in the images using the face cascade classifier.
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
for i in range(len(lfw_dataset['images'])):
    # Detect faces in the image.
    faces = face_cascade.detectMultiScale(lfw_dataset['images'][i], 1.1, 4)
    # If faces are detected, extract the HOG and LBP features from the faces.
    if len(faces) > 0:
        for (x, y, w, h) in faces:
            # Extract the HOG features from the face.
            hog_features = extract_hog_features(lfw_dataset['images'][i], (x, y, w, h))
            # Extract the LBP features from the face.
            lbp_features = extract_lbp_features(lfw_dataset['images'][i], (x, y, w, h))
            # Combine the HOG and LBP features into a single feature vector.
            features = np.concatenate((hog_features, lbp_features))
            # Train a SVM classifier using the feature vector.
            svm_classifier = train_svm_classifier(features, lfw_dataset['labels'][i])
            # Evaluate the performance of the SVM classifier.
            print('Accuracy:', svm_classifier.score(features, lfw_dataset['labels'][i]))

# Extract HOG features from an image.
def extract_hog_features(image, bounding_box):
    (x, y, w, h) = bounding_box
    hog_features = cv2.HOGDescriptor()
    hog_features.compute(image, (x, y, w, h))
    return hog_features.get_feature_vector()

# Extract LBP features from an image.
def extract_lbp_features(image, bounding_box):
    (x, y, w, h) = bounding_box
    lbp_features = cv2.LBPHFeatureDetector()
    lbp_features.compute(image, (x, y, w, h))
    return lbp_features.descriptors

# Train a SVM classifier.
def train_svm_classifier(features, label):
    svm_classifier = SVC(kernel='linear')
    svm_classifier.fit(features, label)
    return svm_classifier

# Evaluate the performance of an SVM classifier.
def evaluate_svm_classifier(svm_classifier, features, label):
    accuracy = svm_classifier.score(features, label)
    return accuracy