# Step 1: Import necessary libraries

In [None]:
import os
import cv2
import numpy as np
import pickle
from matplotlib import pyplot as plt
from mtcnn.mtcnn import MTCNN
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Step 2: Data Augmentation

In [None]:
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [5]:
datagen

<keras.src.legacy.preprocessing.image.ImageDataGenerator at 0x20698be8210>

In [None]:
# Step 3: Function to extract faces using MTCNN

In [None]:
detector = MTCNN()

In [3]:
detector

<mtcnn.mtcnn.MTCNN at 0x20697ef9510>

In [None]:
def extract_face(image, required_size=(160, 160)):
    results = detector.detect_faces(image)
    if len(results) == 0:
        return None
    x1, y1, width, height = results[0]['box']
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    face = image[y1:y2, x1:x2]
    face = cv2.resize(face, required_size)
    return face

In [None]:
# Step 4: Load and augment images

In [None]:
def load_images_from_folder(folder, augment=False):
    images = []
    labels = []
    for person_name in os.listdir(folder):
        person_folder = os.path.join(folder, person_name)
        if not os.path.isdir(person_folder):
            continue
        for filename in os.listdir(person_folder):
            img_path = os.path.join(person_folder, filename)
            img = cv2.imread(img_path)
            face = extract_face(img)
            if face is not None:
                images.append(face)
                labels.append(person_name)
                if augment:
                    face = face.reshape((1, ) + face.shape)
                    for i, batch in enumerate(datagen.flow(face, batch_size=1)):
                        augmented_img = batch[0].astype('uint8')
                        images.append(augmented_img)
                        labels.append(person_name)
                        save_augmented_image(augmented_img, person_name, filename, i)
                        if i >= 4:  # Augment 5 images per original image
                            break
    return np.array(images), np.array(labels)

In [None]:
def save_augmented_image(image, person_name, original_filename, idx):
    output_folder = os.path.join("augmented_data", person_name)
    os.makedirs(output_folder, exist_ok=True)
    base_name = os.path.splitext(original_filename)[0]
    output_path = os.path.join(output_folder, f"{base_name}_aug_{idx}.png")
    cv2.imwrite(output_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

In [None]:
# Step 5: Load dataset with augmentation

In [None]:
dataset_path = "data"
images, labels = load_images_from_folder(dataset_path, augment=True)

In [21]:
images

array([[[[  5,   7,   8],
         [  4,   5,   7],
         [  0,   1,   3],
         ...,
         [  1,   1,   1],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  5,   7,   8],
         [  4,   6,   8],
         [ 10,  11,  15],
         ...,
         [  2,   2,   2],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  1,   2,   3],
         [  1,   2,   4],
         [ 15,  17,  21],
         ...,
         [  2,   2,   2],
         [  0,   0,   0],
         [  0,   0,   0]],

        ...,

        [[  2,   2,   2],
         [  2,   2,   2],
         [  1,   1,   1],
         ...,
         [ 80, 112, 177],
         [ 74, 107, 176],
         [ 67, 102, 175]],

        [[  2,   2,   2],
         [  2,   2,   2],
         [  1,   1,   1],
         ...,
         [ 79, 112, 180],
         [ 76, 110, 180],
         [ 71, 105, 179]],

        [[  2,   2,   2],
         [  2,   2,   2],
         [  2,   2,   2],
         ...,
         [ 75, 108, 182],
        

In [8]:
labels

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [None]:
# Step 6: Encode labels

In [None]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)
labels = to_categorical(labels)

In [9]:
label_encoder

In [10]:
labels

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [None]:
# Step 7: Split the dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [12]:
X_train, X_test, y_train, y_test

(array([[[[ 42,  42,  26],
          [ 42,  42,  26],
          [ 42,  42,  26],
          ...,
          [ 51,  72,  94],
          [ 47,  69,  86],
          [ 47,  69,  82]],
 
         [[ 42,  42,  26],
          [ 42,  42,  26],
          [ 42,  42,  26],
          ...,
          [ 51,  75, 100],
          [ 46,  71,  91],
          [ 44,  69,  85]],
 
         [[ 42,  42,  26],
          [ 42,  42,  26],
          [ 42,  42,  26],
          ...,
          [ 51,  77, 106],
          [ 46,  73,  96],
          [ 45,  72,  90]],
 
         ...,
 
         [[ 82, 114,  99],
          [ 82, 114,  99],
          [ 82, 114,  99],
          ...,
          [ 29,  39,  32],
          [ 24,  33,  27],
          [ 19,  27,  20]],
 
         [[ 82, 114,  99],
          [ 82, 114,  99],
          [ 82, 114,  99],
          ...,
          [ 29,  35,  31],
          [ 25,  32,  26],
          [ 19,  26,  19]],
 
         [[ 82, 114,  99],
          [ 82, 114,  99],
          [ 82, 114,  99],
   

In [None]:
# Step 8: Define the model

In [None]:
model = Sequential([
    Flatten(input_shape=(160, 160, 3)),
    Dense(100, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Step 9: Train the model

In [None]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [13]:
model

<Sequential name=sequential, built=True>

In [None]:
# Step 10: Save the model using pickle

In [None]:
model_path = "face_recognition_model.pkl"
with open(model_path, 'wb') as f:
    pickle.dump(model, f)

In [None]:
# Step 11: Load the model using pickle

In [None]:
with open(model_path, 'rb') as f:
    model = pickle.load(f)

In [14]:
# Step 12: Real-time face recognition
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    faces = detector.detect_faces(frame)
    for face in faces:
        x, y, width, height = face['box']
        x, y = abs(x), abs(y)
        x2, y2 = x + width, y + height
        face_crop = frame[y:y2, x:x2]
        face_crop = cv2.resize(face_crop, (160, 160))
        face_crop = face_crop.astype('float32') / 255.0
        face_crop = np.expand_dims(face_crop, axis=0)
        
        # Predict the class
        prediction = model.predict(face_crop)
        class_index = np.argmax(prediction)
        class_name = label_encoder.classes_[class_index]
        
        # Draw bounding box and label
        cv2.rectangle(frame, (x, y), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, class_name, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    cv2.imshow('Face Recognition', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12

In [None]:
# Step 13: Print necessary details for reference

In [2]:
print("Dataset Path:", dataset_path)
print("Number of Original Images:", len(os.listdir(dataset_path)))
print("Number of Augmented Images:", len(images) - len(os.listdir(dataset_path)))
print("Model Path:", model_path)
print("Classes:", label_encoder.classes_)

Dataset Path: data
Number of Original Images: 7
Number of Augmented Images: 1805
Model Path: face_recognition_model.pkl
Classes: ['Cristiano_Ronaldo' 'Kamal_Ghansala' 'Mahesh_Semwal' 'Narendra_Modi'
 'Rohit_Sharma' 'Selena_Gomez' 'Virat_Kohli']
