In [23]:
# Install DeepFace if not installed
# pip install deepface

import os
import numpy as np
from tqdm import tqdm
from deepface import DeepFace
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
import joblib  # To save and load the model
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [28]:
# -------------------
# Step 1: Prepare paths
# -------------------

dataset_path = r"C:\Users\niraj.meshram\Desktop\Data Science\Machine Learning\Exercise\PythonProject\Project_Celebrity_Image_Detection\model\dataset"  # Updated path
model_name = "ArcFace"    # Or "VGG-Face", "Facenet512", etc.

# -------------------
# Step 2: Load data and extract embeddings
# -------------------

X = []  # Feature Vectors
y = []  # Labels (Celebrity names)

print("Extracting face embeddings...")
for celebrity_name in tqdm(os.listdir(dataset_path)):
    celebrity_folder = os.path.join(dataset_path, celebrity_name)
    
    if not os.path.isdir(celebrity_folder):
        continue  # Skip files, only process folders

    for image_name in os.listdir(celebrity_folder):
        image_path = os.path.join(celebrity_folder, image_name)

        try:
            # DeepFace will handle face detection + feature extraction
            embedding = DeepFace.represent(img_path=image_path, model_name=model_name, enforce_detection=False)[0]["embedding"]
            X.append(embedding)
            y.append(celebrity_name)
        except Exception as e:
            print(f"Failed on {image_path}: {e}")

X = np.array(X)
y = np.array(y)

print(f"Total embeddings extracted: {len(X)}")


Extracting face embeddings...


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [05:58<00:00, 59.81s/it]

Total embeddings extracted: 366





In [29]:
# -------------------
# Step 3: Encode labels
# -------------------

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [32]:
# -------------------
# Step 4: Train/Test Split
# -------------------

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [33]:
# -------------------
# Step 5: Train classifier
# -------------------

classifier = SVC(kernel='linear', probability=True)
classifier.fit(X_train, y_train)

In [34]:
# -------------------
# Step 6: Evaluate
# -------------------

y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

                 precision    recall  f1-score   support

   lionel_messi       0.92      0.73      0.81        15
maria_sharapova       0.62      0.44      0.52        18
  roger_federer       0.56      0.71      0.62         7
serena_williams       0.47      0.58      0.52        12
    virat_kohli       0.72      0.82      0.77        22

       accuracy                           0.66        74
      macro avg       0.65      0.66      0.65        74
   weighted avg       0.68      0.66      0.66        74



In [35]:
joblib.dump(classifier, "celebrity_classifier.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

print("Model and label encoder saved successfully.")

Model and label encoder saved successfully.


In [42]:
# -------------------
# Step 8: Prediction function
# -------------------

def predict_celebrity(image_path):
    # Load saved model and label encoder
    classifier = joblib.load("celebrity_classifier.pkl")
    label_encoder = joblib.load("label_encoder.pkl")
    
    try:
        # Extract embedding
        embedding = DeepFace.represent(img_path=image_path, model_name=model_name, enforce_detection=False)[0]["embedding"]
        embedding = np.array(embedding).reshape(1, -1)
        
        # Predict
        pred = classifier.predict(embedding)[0]
        proba = classifier.predict_proba(embedding).max()
        celebrity_name = label_encoder.inverse_transform([pred])[0]
        
        print(f"Prediction: {celebrity_name} ({proba*100:.2f}% confidence)")
        return celebrity_name
    except Exception as e:
        print(f"Prediction failed: {e}")
        return None

# Example usage:
predict_celebrity(r"C:\Users\niraj.meshram\Desktop\Data Science\Machine Learning\Exercise\PythonProject\Project_Celebrity_Image_Detection\model\test_images\5c6becd02628985d2a2ee2a2.JFIF")

Prediction: roger_federer (94.88% confidence)


'roger_federer'