In [37]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

print("Libraries Imported.")

Libraries Imported.


In [38]:
def load_custom_dataset(dataset_path):
    images = []
    labels = []
    names = []
    
    # Check if the folder exists
    if not os.path.exists(dataset_path):
        print(f"Error: The folder '{dataset_path}' does not exist. Please unzip your dataset.")
        return None, None, None

    print("Loading images from your dataset...")
    # Loop through each person's folder
    for label_id, person_name in enumerate(sorted(os.listdir(dataset_path))):
        person_folder = os.path.join(dataset_path, person_name)
        
        # Skip if it's not a folder (like .DS_Store on Mac)
        if not os.path.isdir(person_folder):
            continue
            
        names.append(person_name)
        
        # Loop through each image in the person's folder
        for image_name in os.listdir(person_folder):
            image_path = os.path.join(person_folder, image_name)
            
            # Read the image in Grayscale (Black & White is better for PCA)
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            
            if img is not None:
                # Resize all images to a fixed size (e.g., 64x64 pixels)
                # This is critical so all images have the same number of "features"
                img = cv2.resize(img, (64, 64))
                
                # Flatten the 2D image into a 1D list of numbers
                images.append(img.flatten())
                labels.append(label_id)

    print(f"Successfully loaded {len(images)} images for {len(names)} people.")
    return np.array(images), np.array(labels), np.array(names)

# RUN THE FUNCTION
# Ensure your folder is named 'dataset' and is in the same place as this file
X, y, target_names = load_custom_dataset("dataset")

Loading images from your dataset...
Successfully loaded 450 images for 9 people.


In [39]:
# Split data: 70% Training, 30% Testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Compute PCA (Eigenfaces)
# We reduce the data to 50 essential features (components)
n_components = 50
print(f"Extracting the top {n_components} Eigenfaces...")
pca = PCA(n_components=n_components, whiten=True).fit(X_train)

# Transform the images into "Eigenface" data
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

print("PCA Projection done.")

Extracting the top 50 Eigenfaces...
PCA Projection done.


  Q, _ = normalizer(A @ Q)
  Q, _ = normalizer(A @ Q)
  Q, _ = normalizer(A @ Q)
  Q, _ = normalizer(A.T @ Q)
  Q, _ = normalizer(A.T @ Q)
  Q, _ = normalizer(A.T @ Q)
  Q, _ = qr_normalizer(A @ Q)
  Q, _ = qr_normalizer(A @ Q)
  Q, _ = qr_normalizer(A @ Q)
  B = Q.T @ M
  B = Q.T @ M
  B = Q.T @ M
  U = Q @ Uhat
  U = Q @ Uhat
  U = Q @ Uhat
  X_transformed = X @ self.components_.T
  X_transformed = X @ self.components_.T
  X_transformed = X @ self.components_.T
  X_transformed -= xp.reshape(self.mean_, (1, -1)) @ self.components_.T
  X_transformed -= xp.reshape(self.mean_, (1, -1)) @ self.components_.T
  X_transformed -= xp.reshape(self.mean_, (1, -1)) @ self.components_.T
  X_transformed = X @ self.components_.T
  X_transformed = X @ self.components_.T
  X_transformed = X @ self.components_.T
  X_transformed -= xp.reshape(self.mean_, (1, -1)) @ self.components_.T
  X_transformed -= xp.reshape(self.mean_, (1, -1)) @ self.components_.T
  X_transformed -= xp.reshape(self.mean_, (1, -1)

In [40]:
# Train the Neural Network to recognize the faces
print("Training Neural Network...")
clf = MLPClassifier(hidden_layer_sizes=(1024,), batch_size=256, verbose=True, max_iter=1000).fit(X_train_pca, y_train)
print("Training Complete!")

Training Neural Network...
Iteration 1, loss = 2.19896798
Iteration 2, loss = 1.96285374
Iteration 3, loss = 1.77247801
Iteration 4, loss = 1.61563718
Iteration 5, loss = 1.47524308
Iteration 6, loss = 1.34635092
Iteration 7, loss = 1.22841637
Iteration 8, loss = 1.12114068
Iteration 9, loss = 1.02415123
Iteration 10, loss = 0.93783067
Iteration 11, loss = 0.86138646
Iteration 12, loss = 0.79090630
Iteration 13, loss = 0.73029673
Iteration 14, loss = 0.67362210
Iteration 15, loss = 0.62258982
Iteration 16, loss = 0.57581611
Iteration 17, loss = 0.53442209
Iteration 18, loss = 0.49671844
Iteration 19, loss = 0.46216455
Iteration 20, loss = 0.43076263
Iteration 21, loss = 0.40107481
Iteration 22, loss = 0.37443801
Iteration 23, loss = 0.34967013
Iteration 24, loss = 0.32701131
Iteration 25, loss = 0.30671292
Iteration 26, loss = 0.28754757
Iteration 27, loss = 0.27023760
Iteration 28, loss = 0.25405819
Iteration 29, loss = 0.23881408
Iteration 30, loss = 0.22508489
Iteration 31, loss = 0

  ret = a @ b
  ret = a @ b
  ret = a @ b


Iteration 53, loss = 0.07232545
Iteration 54, loss = 0.06949873
Iteration 55, loss = 0.06673233
Iteration 56, loss = 0.06415651
Iteration 57, loss = 0.06175215
Iteration 58, loss = 0.05946778
Iteration 59, loss = 0.05730229
Iteration 60, loss = 0.05524849
Iteration 61, loss = 0.05334496
Iteration 62, loss = 0.05146347
Iteration 63, loss = 0.04972183
Iteration 64, loss = 0.04808342
Iteration 65, loss = 0.04654411
Iteration 66, loss = 0.04507508
Iteration 67, loss = 0.04371315
Iteration 68, loss = 0.04240759
Iteration 69, loss = 0.04117341
Iteration 70, loss = 0.03996986
Iteration 71, loss = 0.03876973
Iteration 72, loss = 0.03766700
Iteration 73, loss = 0.03662865
Iteration 74, loss = 0.03561648
Iteration 75, loss = 0.03461817
Iteration 76, loss = 0.03373339
Iteration 77, loss = 0.03285956
Iteration 78, loss = 0.03198735
Iteration 79, loss = 0.03116343
Iteration 80, loss = 0.03031813
Iteration 81, loss = 0.02950578
Iteration 82, loss = 0.02878254
Iteration 83, loss = 0.02808210
Iteratio

In [41]:
# Check Accuracy
y_pred = clf.predict(X_test_pca)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print(classification_report(y_test, y_pred, target_names=target_names))

# Helper function to plot images
def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
    # Create the figure window
    plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    
    # Loop through the grid spots
    for i in range(n_row * n_col):
        # SAFETY CHECK: Stop if we run out of images!
        if i >= len(images):
            break
            
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())

Accuracy: 57.78%
              precision    recall  f1-score   support

       Aamir       0.57      0.52      0.55        23
        Ajay       0.85      0.58      0.69        19
      Akshay       0.47      0.62      0.53        13
        Alia       0.55      0.60      0.57        10
     Amitabh       0.85      0.85      0.85        13
     Deepika       0.39      0.50      0.44        14
       Disha       0.28      0.70      0.40        10
      Farhan       1.00      0.53      0.69        19
      Ileana       0.86      0.43      0.57        14

    accuracy                           0.58       135
   macro avg       0.65      0.59      0.59       135
weighted avg       0.67      0.58      0.60       135



  ret = a @ b
  ret = a @ b
  ret = a @ b
