In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

### My Implementation

In [2]:
def pca_eigenfaces(images, target_variance=0.9):
    print("Reshaping images to 1D vectors...")
    num_images = len(images)
    image_shape = images[0].shape
    image_size = np.prod(image_shape)
    flattened_images = images.reshape(num_images, -1)
    print("Reshaping images complete.")

    print("Constructing data matrix...")
    data_matrix = flattened_images.T
    print("Constructing data matrix complete.")

    print("Calculating mean image...")
    mean_image = np.mean(data_matrix, axis=1)
    print("Mean image calculation complete.")

    print("Subtracting mean image from all images...")
    centered_data = data_matrix - mean_image[:, np.newaxis]
    print("Subtraction complete.")

    print("Calculating covariance matrix...")
    covariance_matrix = np.cov(centered_data)
    print("Covariance matrix calculation complete.")

    print("Calculating eigenvalues and eigenvectors...")
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    print("Eigenvalues and eigenvectors calculation complete.")

    print("Sorting eigenvectors by eigenvalues...")
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]
    print("Sorting complete.")

    print("Normalizing eigenvectors...")
    normalized_eigenvectors = eigenvectors / np.linalg.norm(eigenvectors, axis=0)
    print("Normalization complete.")

    print("Calculating cumulative explained variance ratio...")
    explained_variance_ratio = np.cumsum(eigenvalues) / np.sum(eigenvalues)
    print("Calculation complete.")

    print("Selecting eigenvectors to achieve target variance...")
    num_components = np.argmax(explained_variance_ratio >= target_variance) + 1
    selected_eigenvectors = normalized_eigenvectors[:, :num_components]
    print("Selection complete.")

    print("Mapping all images to new components...")
    transformed_data = np.dot(selected_eigenvectors.T, centered_data)
    print("Mapping complete.")

    return transformed_data, selected_eigenvectors, mean_image

# Example usage:
# Assuming 'images' is a numpy array of shape (num_images, height, width) containing grayscale images
# transformed_data, eigenfaces, mean_image = pca_eigenfaces(images)

In [3]:
train_dir = "splitted_dataset/train"
images_labels = [f for f in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, f))]

images = np.array([cv2.resize(cv2.imread(train_dir + "/" + i, cv2.IMREAD_GRAYSCALE), (90, 90)) for i in images_labels])

In [4]:
images.shape

(110, 90, 90)

In [5]:
transformed_data, eigenfaces, mean_image = pca_eigenfaces(images)

Reshaping images to 1D vectors...
Reshaping images complete.
Constructing data matrix...
Constructing data matrix complete.
Calculating mean image...
Mean image calculation complete.
Subtracting mean image from all images...
Subtraction complete.
Calculating covariance matrix...
Covariance matrix calculation complete.
Calculating eigenvalues and eigenvectors...
Eigenvalues and eigenvectors calculation complete.
Sorting eigenvectors by eigenvalues...
Sorting complete.
Normalizing eigenvectors...
Normalization complete.
Calculating cumulative explained variance ratio...
Calculation complete.
Selecting eigenvectors to achieve target variance...
Selection complete.
Mapping all images to new components...
Mapping complete.


In [7]:
def save_pca_model(transformed_data, selected_eigenvectors, mean_image, save_folder):
    # Save transformed data
    np.save(os.path.join(save_folder, 'transformed_data.npy'), transformed_data)

    # Save selected eigenvectors
    np.save(os.path.join(save_folder, 'selected_eigenvectors.npy'),
            selected_eigenvectors)

    # Save mean image
    np.save(os.path.join(save_folder, 'mean_image.npy'), mean_image)


# Example usage:
# Save the PCA model
save_folder = "./saved_model"
save_pca_model(transformed_data, eigenfaces, mean_image, save_folder)

In [None]:
def load_pca_model(load_folder):
    # Load transformed data
    transformed_data = np.load(os.path.join(
        load_folder, 'transformed_data.npy'))

    # Load selected eigenvectors
    selected_eigenvectors = np.load(os.path.join(
        load_folder, 'selected_eigenvectors.npy'))

    # Load mean image
    mean_image = np.load(os.path.join(load_folder, 'mean_image.npy'))

    return transformed_data, selected_eigenvectors, mean_image


# Example usage:
# Load the PCA model
# load_folder = "./saved_model"
# transformed_data, selected_eigenvectors, mean_image = load_pca_model(load_folder)

### OpenCV Function

---

## D



In [6]:
# import os
# import random
# import shutil

# # Define the path to the folder containing all images
# dataset_folder = "cropped_faces"

# # Define the number of images to select from each subject for testing
# num_test_images_per_subject = 4

# # Define the number of subjects to exclude randomly
# num_excluded_subjects = 10

# # Get a list of all images in the dataset folder
# images = [f for f in os.listdir(dataset_folder) if os.path.isfile(
#     os.path.join(dataset_folder, f))]

# # Extract subjects' names from the filenames
# subjects = list(set([image.split("_")[0] for image in images]))

# # Randomly select subjects to exclude
# excluded_subjects = random.sample(
#     subjects, min(num_excluded_subjects, len(subjects)))

# # Initialize lists to store paths of training and testing images
# train_images = []
# test_images = []

# for image in images:
#     sub = image.split["_"][0]


# # Iterate over each image
# for sub in subjects:
#     if sub not in excluded_subjects:
#         # sub_images =
#         [f'{sub}_0{i}' for i in random.sample(range(1, 10), 4)]
#         test_images.extend(tst)
#         train_images.extend()
#     # Check if the subject is not in the excluded list
#     if subject not in excluded_subjects:
#         # Randomly select images for testing
#         if random.random() < num_test_images_per_subject / len(images):
#             test_images.append(image)
#         else:
#             train_images.append(image)


# # # Define the paths for the train and test folders
# # train_folder = "splitted_dataset/train"
# # test_folder = "splitted_dataset/test"

# # # Create train and test folders if they don't exist
# # os.makedirs(train_folder, exist_ok=True)
# # os.makedirs(test_folder, exist_ok=True)

# # # Copy train images to the train folder
# # for image in train_images:
# #     src = os.path.join(dataset_folder, image)
# #     dst = os.path.join(train_folder, image)
# #     shutil.copy(src, dst)

# # # Copy test images to the test folder
# # for image in test_images:
# #     src = os.path.join(dataset_folder, image)
# #     dst = os.path.join(test_folder, image)
# #     shutil.copy(src, dst)
# dataset_folder = "cropped_faces"

# images = [f for f in os.listdir(dataset_folder) if os.path.isfile(
#     os.path.join(dataset_folder, f))]

# train = []
# test = []
# randx = random.sample(range(1, 16), 4)

# for i in images:
#     if int(i[4:6]) in randx:
#         test.append(i)
#     else:
#         train.append(i)

# # Define the paths for the train and test folders
# train_folder = "splitted_dataset/train"
# test_folder = "splitted_dataset/test"

# # Create train and test folders if they don't exist
# os.makedirs(train_folder, exist_ok=True)
# os.makedirs(test_folder, exist_ok=True)

# # Copy train images to the train folder
# for image in train:
#     src = os.path.join(dataset_folder, image)
#     dst = os.path.join(train_folder, image)
#     shutil.copy(src, dst)

# # Copy test images to the test folder
# for image in test:
#     src = os.path.join(dataset_folder, image)
#     dst = os.path.join(test_folder, image)
#     shutil.copy(src, dst)