In [2]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

In [3]:
# Define the path to the folder containing the images
source_folder = "./labeled_faces"

# Define the target size for resizing the images
target_size = (64, 64)  

def split_data(subjects=40, imgs=15, target_size=target_size, source_folder=source_folder):
    X_train = []
    y_train = []
    X_test = []
    y_test = []

    for subject in range(1, subjects + 1):
        for image in range(1, 10 + 1):    # for images for each subject   
            # Construct the image path
            image_path = os.path.join(source_folder, f"s{subject:02d}_{image:02d}.jpg")
            X_train.append(image_path)
            y_train.append(subject)
    
    for subject in range(1, subjects + 1):
        for image in range(11, imgs + 1):       
            # Construct the image path
            image_path = os.path.join(source_folder, f"s{subject:02d}_{image:02d}.jpg")
            X_test.append(image_path)
            y_test.append(subject)

    

    # Read and resize images using cv2
    X_train_images = np.array(
        [cv2.resize(cv2.imread(image_path), target_size) for image_path in X_train])
    X_test_images = np.array(
        [cv2.resize(cv2.imread(image_path), target_size) for image_path in X_test])

    return X_train_images, X_test_images, y_train, y_test


# Split the data
X_train_images, X_test_images, y_train, y_test = split_data()

# Output the shapes of train and test sets
print("Train set shape:", X_train_images.shape)
print("Test set shape:", X_test_images.shape)

Train set shape: (400, 64, 64, 3)
Test set shape: (200, 64, 64, 3)


In [4]:
print(len(y_train))
print(len(y_test))

400
200


In [5]:
def calculate_eigenfaces(images):
    num_images = len(images)
    image_shape = images[0].shape
    image_size = np.prod(image_shape)
    flattened_images = images.reshape(num_images, -1)

    data_matrix = flattened_images.T

    mean_image = np.mean(data_matrix, axis=1)

    centered_data = data_matrix - mean_image[:, np.newaxis]

    covariance_matrix = np.cov(centered_data)

    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    normalized_eigenvectors = eigenvectors / \
        np.linalg.norm(eigenvectors, axis=0)

    return normalized_eigenvectors, eigenvalues, mean_image, centered_data


def select_eigenvectors(normalized_eigenvectors, eigenvalues, target_variance=0.9):
    explained_variance_ratio = np.cumsum(eigenvalues) / np.sum(eigenvalues)

    num_components = np.argmax(explained_variance_ratio >= target_variance) + 1
    selected_eigenvectors = normalized_eigenvectors[:, :num_components]

    return selected_eigenvectors


def transform_data(centered_data, selected_eigenvectors):
    transformed_data = np.dot(selected_eigenvectors.T, centered_data)

    return transformed_data


def train_pca(X_train, target_variance=0.9):
    normalized_eigenvectors, eigenvalues, mean_image, centered_data = calculate_eigenfaces(
        X_train)
    eigenfaces = select_eigenvectors(
        normalized_eigenvectors, eigenvalues, target_variance=target_variance)
    
    return eigenfaces, mean_image, centered_data


In [6]:
eigenfaces, mean_image, centered_data = train_pca(X_train_images)


In [7]:
print(mean_image.shape)
print(eigenfaces.shape)
print(centered_data.shape)


(12288,)
(12288, 113)
(12288, 400)


In [8]:
def predict_pca(test_image, eigenfaces, mean_image, transformed_data):

    # Resize the test image to match the target size
    test_image_resized = cv2.resize(test_image, (64, 64))

    # Flatten the resized test image
    test_image_vector = test_image_resized.flatten()


    # Center the test image by subtracting the mean image
    centered_test_image = test_image_vector - mean_image


    # Project the centered test image onto the eigenfaces
    test_image_transformed = np.dot(eigenfaces.T, centered_test_image)
    
    # Calculate Euclidean distances between the transformed test image and training images
    distances = np.linalg.norm(transformed_data - test_image_transformed[:, np.newaxis], axis=0)
    # print(transformed_data.shape)
    
    # Find the index of the closest match
    closest_index = np.argmin(distances)
    # print("closest_index:", closest_index)

    predicted_subject = closest_index // 10 + 1
    print("predicted_subject", predicted_subject)

    return predicted_subject

In [9]:
transformed_data = transform_data(centered_data, eigenfaces)

In [10]:
predicted_labels = []
for test_image in X_test_images:
    predicted_labels.append(predict_pca(test_image, eigenfaces, mean_image, transformed_data))

predicted_subject 35
predicted_subject 1
predicted_subject 1
predicted_subject 1
predicted_subject 33
predicted_subject 39
predicted_subject 2
predicted_subject 2
predicted_subject 39
predicted_subject 2
predicted_subject 3
predicted_subject 27
predicted_subject 3
predicted_subject 3
predicted_subject 3
predicted_subject 4
predicted_subject 4
predicted_subject 4
predicted_subject 4
predicted_subject 4
predicted_subject 16
predicted_subject 5
predicted_subject 4
predicted_subject 5
predicted_subject 5
predicted_subject 6
predicted_subject 6
predicted_subject 6
predicted_subject 6
predicted_subject 6
predicted_subject 7
predicted_subject 7
predicted_subject 1
predicted_subject 7
predicted_subject 7
predicted_subject 8
predicted_subject 8
predicted_subject 8
predicted_subject 8
predicted_subject 1
predicted_subject 9
predicted_subject 33
predicted_subject 1
predicted_subject 8
predicted_subject 9
predicted_subject 38
predicted_subject 10
predicted_subject 10
predicted_subject 6
predicted_

In [11]:
def evaluate_pca_predictions(test_images, eigenfaces, mean_image, transformed_data, y_test):
    correct_predictions = 0
    total_predictions = len(test_images)

    for i, test_image in enumerate(test_images):
        predicted_index = predict_pca(test_image, eigenfaces, mean_image, transformed_data)
        if predicted_index == y_test[i]:
            correct_predictions += 1
    accuracy = correct_predictions / total_predictions
    return accuracy

accuracy = evaluate_pca_predictions(X_test_images, eigenfaces, mean_image, transformed_data, y_test)
print("PCA Accuracy:", accuracy * 100)

predicted_subject 35
predicted_subject 1
predicted_subject 1
predicted_subject 1
predicted_subject 33
predicted_subject 39
predicted_subject 2
predicted_subject 2
predicted_subject 39
predicted_subject 2
predicted_subject 3
predicted_subject 27
predicted_subject 3
predicted_subject 3
predicted_subject 3
predicted_subject 4
predicted_subject 4
predicted_subject 4
predicted_subject 4
predicted_subject 4
predicted_subject 16
predicted_subject 5
predicted_subject 4
predicted_subject 5
predicted_subject 5
predicted_subject 6
predicted_subject 6
predicted_subject 6
predicted_subject 6
predicted_subject 6
predicted_subject 7
predicted_subject 7
predicted_subject 1
predicted_subject 7
predicted_subject 7
predicted_subject 8
predicted_subject 8
predicted_subject 8
predicted_subject 8
predicted_subject 1
predicted_subject 9
predicted_subject 33
predicted_subject 1
predicted_subject 8
predicted_subject 9
predicted_subject 38
predicted_subject 10
predicted_subject 10
predicted_subject 6
predicted_

In [13]:
len(predicted_labels)

200

In [14]:
print(y_test)
print(max(predicted_labels))
print(min(predicted_labels))

[1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40]
40
1


In [15]:
# Calculate accuracy
correct_predictions = sum(1 for pred_label, true_label in zip(predicted_labels, y_test) if pred_label == true_label)
total_predictions = len(y_test)
accuracy = correct_predictions / total_predictions

print("Accuracy:", accuracy * 100)


Accuracy: 76.5


In [17]:
def save_pca_model(data_dict, save_folder):
    for d in data_dict.keys():
        np.save(os.path.join(save_folder, f'{d}.npy'), data_dict[d])


# Example usage:
# Save the PCA model
save_folder = "./saved_model"
save_pca_model({"eigenfaces": eigenfaces,
                "mean_image": mean_image,
                "centered_data": centered_data}, save_folder)

In [18]:
# Loading Function
def load_pca_model(load_folder):
    # Example:
    # Load transformed data
    transformed_data = np.load(os.path.join(
        load_folder, 'transformed_data.npy'))

    return transformed_data


# Example usage:
# Load the PCA model
# load_folder = "./saved_model"
# transformed_data = load_pca_model(load_folder)