In [None]:
!kaggle datasets download -d iamtushara/face-detection-dataset

In [None]:
pip uninstall opencv-python
pip install opencv-contrib-python numpy

In [None]:
import cv2
import os
import numpy as np
from PIL import Image # Pillow library for easier image handling
import pickle # To save the name mapping

# Path to the dataset containing subfolders for each person
dataset_path = 'dataset/friends'
# Path where the trained model will be saved
trainer_path = 'trainer'
model_file = os.path.join(trainer_path, 'lbph_model.yml')
label_map_file = os.path.join(trainer_path, 'labels.pickle')

# Ensure the trainer directory exists
if not os.path.exists(trainer_path):
    os.makedirs(trainer_path)

# We'll use a Haar Cascade classifier to detect faces within the training images
# This ensures LBPH is trained only on face regions
face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Initialize LBPH face recognizer
recognizer = cv2.face.LBPHFaceRecognizer_create()

def get_images_and_labels(dataset_path):
    """
    Reads images from the dataset, detects faces, and prepares lists of
    face samples and corresponding integer labels.
    """
    image_paths = []
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                path = os.path.join(root, file)
                image_paths.append(path)

    face_samples = []
    ids = []
    label_ids = {} # Dictionary to map person names to integer labels
    current_id = 0

    print("Preparing training data...")
    for image_path in image_paths:
        # Get the person's name from the directory structure
        label_name = os.path.basename(os.path.dirname(image_path))
        # print(f"Processing {label_name}: {os.path.basename(image_path)}") # Debugging

        # Assign an integer ID if this is a new person
        if label_name not in label_ids:
            label_ids[label_name] = current_id
            current_id += 1
        person_id = label_ids[label_name]

        try:
            # Open image using Pillow (handles various formats, converts to grayscale)
            pil_image = Image.open(image_path).convert('L') # Convert to grayscale
            image_np = np.array(pil_image, 'uint8')

            # Detect faces in the training image
            faces = face_detector.detectMultiScale(image_np, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

            for (x, y, w, h) in faces:
                # Extract the face ROI (Region of Interest)
                face_roi = image_np[y:y+h, x:x+w]
                face_samples.append(face_roi)
                ids.append(person_id)
                # Optional: Display face being processed
                # cv2.imshow("Training", face_roi)
                # cv2.waitKey(1)

        except Exception as e:
            print(f"Error processing image {image_path}: {e}")

    # cv2.destroyAllWindows() # Close display window if used
    print(f"\nFound {len(face_samples)} face samples for training.")
    print(f"Label map created: {label_ids}")

    # Save the label map (name -> id)
    with open(label_map_file, 'wb') as f:
        pickle.dump(label_ids, f)
    print(f"Label map saved to {label_map_file}")

    return face_samples, np.array(ids)

# --- Main Training Execution ---
faces, ids = get_images_and_labels(dataset_path)

if not faces:
    print("No faces found in the dataset. Please check dataset structure and images.")
else:
    print("\nTraining LBPH model...")
    recognizer.train(faces, ids)
    recognizer.write(model_file) # Save the trained model
    print(f"LBPH model trained and saved to {model_file}")

In [None]:
import cv2
import numpy as np
import os
import pickle

# --- Configuration ---
cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
recognizer_model_path = 'trainer/lbph_model.yml'
label_map_path = 'trainer/labels.pickle'
recognition_confidence_threshold = 70 # Adjust based on testing (lower value means stricter match)

# --- Load Models and Data ---
# Load Haar Cascade for face detection
face_detector = cv2.CascadeClassifier(cascade_path)
if face_detector.empty():
    print(f"Error loading Haar Cascade from {cascade_path}")
    exit()
print("Haar Cascade face detector loaded.")

# Load the trained LBPH recognizer
recognizer = cv2.face.LBPHFaceRecognizer_create()
if not os.path.exists(recognizer_model_path):
    print(f"Error: Trained model not found at {recognizer_model_path}")
    print("Please run the train_lbph.py script first.")
    exit()
recognizer.read(recognizer_model_path)
print(f"LBPH recognizer model loaded from {recognizer_model_path}")

# Load the label map (name -> id) and invert it (id -> name)
if not os.path.exists(label_map_path):
    print(f"Error: Label map not found at {label_map_path}")
    print("Please run the train_lbph.py script first.")
    exit()
with open(label_map_path, 'rb') as f:
    og_label_ids = pickle.load(f)
    # Invert the dictionary to map id -> name
    id_to_name = {v: k for k, v in og_label_ids.items()}
print(f"Label map loaded: {id_to_name}")


# --- Initialize Webcam ---
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

print("\nStarting real-time detection and recognition...")
print("Press 'q' to quit.")

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break

    # Convert to grayscale for detection and recognition
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces using Haar Cascade
    faces = face_detector.detectMultiScale(
        gray,
        scaleFactor=1.1,      # How much the image size is reduced at each image scale
        minNeighbors=5,       # How many neighbors each candidate rectangle should have to retain it
        minSize=(40, 40)      # Minimum possible object size. Objects smaller than this are ignored
    )

    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face ROI (Region of Interest) in grayscale
        face_roi_gray = gray[y:y+h, x:x+w]

        # Perform recognition using the trained LBPH model
        id_, confidence = recognizer.predict(face_roi_gray)

        # Default name if confidence is too low
        name = "Unknown"
        display_color = (0, 0, 255) # Red for Unknown

        # Check if the confidence is below the threshold (lower score is better in LBPH)
        # LBPH confidence is distance, 0 is perfect match.
        if confidence < recognition_confidence_threshold:
            if id_ in id_to_name:
                name = id_to_name[id_]
                display_color = (0, 255, 0) # Green for known
            else:
                # This case should ideally not happen if label map is correct
                print(f"Warning: Recognized ID {id_} not found in label map.")
                name = f"ID {id_}?" # Display the ID if name is missing

            display_text = f"{name} ({confidence:.2f})"
        else:
            # Confidence is too high (meaning poor match)
             display_text = f"Unknown ({confidence:.2f})"


        # --- Draw bounding box and text on the original color frame ---
        # Draw rectangle around the face
        cv2.rectangle(frame, (x, y), (x + w, y + h), display_color, 2)

        # Put text (Name and Confidence) above the rectangle
        font = cv2.FONT_HERSHEY_SIMPLEX
        text_y = y - 10 if y - 10 > 10 else y + 10 # Position text above box, avoid going off-screen
        cv2.putText(frame, display_text, (x, text_y), font, 0.6, display_color, 2)


    # Display the resulting frame
    cv2.imshow('Real-Time Face Detection and Recognition', frame)

    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# --- Cleanup ---
print("Releasing resources...")
cap.release()
cv2.destroyAllWindows()
print("Done.")