In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split


# Load the dataset
def load_data():
    data = pd.read_csv("data/csv/lfw_allnames.csv")
    image_paths = []
    labels = []

    for _, row in data.iterrows():
        person_name = row["name"]
        num_images = int(row["images"])

        # Construct folder paths and image paths
        folder_path = f"data/lfw-deepfunneled/{person_name.replace(' ', '_')}"
        for i in range(1, num_images + 1):
            image_paths.append(f"{folder_path}/{person_name}_{i:04d}.jpg")
            labels.append(person_name)

    return image_paths, labels


def prepare_data():
    image_paths, labels = load_data()

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        image_paths, labels, test_size=0.2, random_state=42
    )

    print(f"Training set size: {len(X_train)}")
    print(f"Test set size: {len(X_test)}")

    return X_train, X_test, y_train, y_test


# Preprocess and split the data
X_train, X_test, y_train, y_test = prepare_data()


Training set size: 10586
Test set size: 2647


In [2]:
from keras_facenet import FaceNet
from keras.preprocessing import image
import numpy as np

# Initialize FaceNet
embedder = FaceNet()


def get_image_embedding(img_path):
    # Load and preprocess the image
    img = image.load_img(
        img_path, target_size=(160, 160)
    )  # FaceNet requires 160x160 images
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)

    # Get the embedding using FaceNet
    embedding = embedder.embeddings(img_array)
    return embedding.flatten()


def extract_embeddings(image_paths):
    embeddings = []
    for path in image_paths:
        embedding = get_image_embedding(path)
        embeddings.append(embedding)
    return np.array(embeddings)


# Extract embeddings for training and test datasets
print("Extracting embeddings for training data...")
embeddings_train = extract_embeddings(X_train)

print("Extracting embeddings for test data...")
embeddings_test = extract_embeddings(X_test)



Extracting embeddings for training data...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━

## Train an SVM classifier 

In [5]:
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib

# Encode the labels (person names)
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Train an SVM classifier
print("Training SVM classifier...")
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(embeddings_train, y_train_encoded)

# Save the trained model and label encoder
joblib.dump(svm_model, "models/face_recognition_svm_model.pkl")
joblib.dump(label_encoder, "models/label_encoder.pkl")

# Evaluate the model on the test set
y_pred = svm_model.predict(embeddings_test)
accuracy = accuracy_score(y_test_encoded, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


ValueError: y contains previously unseen labels: 'Tim_Curry'

## Load the Model and make predictions

In [6]:
# Load the trained SVM model and label encoder
svm_model = joblib.load("models/face_recognition_svm_model.pkl")
label_encoder = joblib.load("models/label_encoder.pkl")

def predict_face(image_path):
    # Get the embedding of the new image
    embedding = get_image_embedding(image_path)
    embedding = np.expand_dims(embedding, axis=0)

    # Predict the label for the input image
    predicted_label = svm_model.predict(embedding)
    predicted_name = label_encoder.inverse_transform(predicted_label)

    print(f"Predicted Person: {predicted_name[0]}")

# Test the prediction function on a new image
predict_face("path_to_new_image.jpg")  # Change the path to the actual image you want to test


FileNotFoundError: [Errno 2] No such file or directory: 'models/face_recognition_svm_model.pkl'