In [1]:
from imutils import paths
import numpy as np
import argparse
import imutils
import pickle
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
import argparse
import time
from imutils.video import FPS
from imutils.video import VideoStream


# Start

In [7]:
print("Loading Face Detector...")
protoPath = "face_detection_model/deploy.prototxt"
modelPath = "face_detection_model/res10_300x300_ssd_iter_140000.caffemodel"
imagePaths = list(paths.list_images("dataset"))

Loading Face Detector...


In [2]:
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
print("Loading Face Recognizer...")
embedder = cv2.dnn.readNetFromTorch("openface_nn4.small2.v1.t7")
print("Quantifying Faces...")
knownEmbeddings = []
knownNames = []
total = 0
for (i, imagePath) in enumerate(imagePaths):
	if (i%50 == 0):
		print("Processing image {}/{}".format(i, len(imagePaths)))
	name = imagePath.split(os.path.sep)[-2]
	image = cv2.imread(imagePath)
	image = imutils.resize(image, width=600)
	(h, w) = image.shape[:2]

	# construct a blob from the image
	imageBlob = cv2.dnn.blobFromImage(
		cv2.resize(image, (300, 300)), 1.0, (300, 300),
		(104.0, 177.0, 123.0), swapRB=False, crop=False)
	detector.setInput(imageBlob)
	detections = detector.forward()

	if len(detections) > 0:
		
		i = np.argmax(detections[0, 0, :, 2])
		confidence = detections[0, 0, i, 2]

		
		if confidence > 0.5:
			box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
			(startX, startY, endX, endY) = box.astype("int")

			face = image[startY:endY, startX:endX]
			(fH, fW) = face.shape[:2]

			if fW < 20 or fH < 20:
				continue

			
			faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,
				(96, 96), (0, 0, 0), swapRB=True, crop=False)
			embedder.setInput(faceBlob)
			vec = embedder.forward()

			knownNames.append(name)
			knownEmbeddings.append(vec.flatten())
			total += 1

# dump the facial embeddings + names to disk
print("[INFO] serializing {} encodings...".format(total))
data = {"embeddings": knownEmbeddings, "names": knownNames}
f = open("output/embeddings.pickle", "wb")
f.write(pickle.dumps(data))
f.close()

Loading Face Detector...
Loading Face Recognizer...
Quantifying Faces...
Processing image 0/200
Processing image 50/200
Processing image 100/200
Processing image 150/200
[INFO] serializing 200 encodings...


In [3]:
print("[INFO] loading face embeddings...")
data = pickle.loads(open("output/embeddings.pickle", "rb").read())

# encode the labels
print("[INFO] encoding labels...")
le = LabelEncoder()
labels = le.fit_transform(data["names"])
print("[INFO] training model...")
recognizer = SVC(C=1.0, kernel="linear", probability=True)
recognizer.fit(data["embeddings"], labels)
f = open("output/recognizer", "wb")
f.write(pickle.dumps(recognizer))
f.close()
f = open("output/le.pickle", "wb")
f.write(pickle.dumps(le))
f.close()

[INFO] loading face embeddings...
[INFO] encoding labels...
[INFO] training model...


In [4]:
print("Loading Face Detector...")
protoPath = "face_detection_model/deploy.prototxt"
modelPath = "face_detection_model/res10_300x300_ssd_iter_140000.caffemodel"
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)

# load serialized face embedding model
print("Loading Face Recognizer...")
embedder = cv2.dnn.readNetFromTorch("openface_nn4.small2.v1.t7")

# load the actual face recognition model along with the label encoder
recognizer = pickle.loads(open("output/recognizer", "rb").read())
le = pickle.loads(open("output/le.pickle", "rb").read())

# initialize the video stream, then allow the camera sensor to warm up
print("Starting Video Stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# start the FPS throughput estimator
fps = FPS().start()

# loop over frames from the video file stream
while True:
	# grab the frame from the threaded video stream
	frame = vs.read()

	# resize the frame to have a width of 600 pixels (while maintaining the aspect ratio), and then grab the image dimensions
	frame = imutils.resize(frame, width=600)
	(h, w) = frame.shape[:2]

	# construct a blob from the image
	imageBlob = cv2.dnn.blobFromImage(
		cv2.resize(frame, (300, 300)), 1.0, (300, 300),
		(104.0, 177.0, 123.0), swapRB=False, crop=False)

	# apply OpenCV's deep learning-based face detector to localize faces in the input image
	detector.setInput(imageBlob)
	detections = detector.forward()

	# loop over the detections
	for i in range(0, detections.shape[2]):
		# extract the confidence (i.e., probability) associated with the prediction
		confidence = detections[0, 0, i, 2]

		# filter out weak detections
		if confidence > 0.5:
			# compute the (x, y)-coordinates of the bounding box for the face
			box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
			(startX, startY, endX, endY) = box.astype("int")

			# extract the face ROI
			face = frame[startY:endY, startX:endX]
			(fH, fW) = face.shape[:2]

			# ensure the face width and height are sufficiently large
			if fW < 20 or fH < 20:
				continue

			# construct a blob for the face ROI, then pass the blob through our face embedding model to obtain the 128-d quantification of the face
			faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255,
				(96, 96), (0, 0, 0), swapRB=True, crop=False)
			embedder.setInput(faceBlob)
			vec = embedder.forward()

			# perform classification to recognize the face
			preds = recognizer.predict_proba(vec)[0]
			j = np.argmax(preds)
			proba = preds[j]
			name = le.classes_[j]

			# draw the bounding box of the face along with the associated probability
			text = "{}: {:.2f}%".format(name, proba * 100)
			y = startY - 10 if startY - 10 > 10 else startY + 10
			cv2.rectangle(frame, (startX, startY), (endX, endY),
				(0, 0, 255), 2)
			cv2.putText(frame, text, (startX, y),
				cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

	# update the FPS counter
	fps.update()

	# show the output frame
	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF

	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
		break

# stop the timer and display FPS information
fps.stop()
print("Elasped time: {:.2f}".format(fps.elapsed()))
print("Approx. FPS: {:.2f}".format(fps.fps()))

# cleanup
cv2.destroyAllWindows()
vs.stop()


Loading Face Detector...
Loading Face Recognizer...
Starting Video Stream...
Elasped time: 57.64
Approx. FPS: 12.16


# END

# Dataset Creater

In [5]:
import cv2
import os

def capture_images(output_folder, label, num_images=100):
    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Open the webcam
    cap = cv2.VideoCapture(0)

    # Counter for the number of captured images
    count = 0

    while count < num_images:
        # Capture frame-by-frame
        ret, frame = cap.read()

        # Display the frame
        cv2.imshow('Capture Images', frame)

        # Save the image to the output folder
        image_filename = os.path.join(output_folder, f"{label}_{count}.png")
        cv2.imwrite(image_filename, frame)

        # Increment the counter
        count += 1

        # Break the loop if 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the webcam and close the OpenCV window
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    # Set the output folder and label
    output_folder = "D:\\Mtech 2022-24\\Semester 3\\Project\\face-recognition-using-deep-learning\\dataset\\vatsal"
    label = "vatsal"  # Change this label as needed

    # Specify the number of images to capture
    num_images = 100

    # Capture images
    capture_images(output_folder, label, num_images)


In [8]:
# Training Phase
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
import pickle

# Load pre-trained face embeddings and names
data = pickle.loads(open("output/embeddings.pickle", "rb").read())

# Encode labels
le = LabelEncoder()
labels = le.fit_transform(data["names"])

# Train SVM model
recognizer = SVC(C=1.0, kernel="linear", probability=True)
recognizer.fit(data["embeddings"], labels)

# Save the trained model and label encoder
with open("output/recognizer.pickle", "wb") as f:
    f.write(pickle.dumps(recognizer))

with open("output/le.pickle", "wb") as f:
    f.write(pickle.dumps(le))

# Recognition Phase
import cv2
import numpy as np

# Load trained face recognition model and label encoder
recognizer = pickle.loads(open("output/recognizer.pickle", "rb").read())
le = pickle.loads(open("output/le.pickle", "rb").read())

# Initialize video capture
vs = cv2.VideoCapture(0)

while True:
    # Capture frame from video stream
    ret, frame = vs.read()

    # Preprocess the frame (resize, convert to grayscale, etc.)

    # Perform face detection

    # Extract face region

    # Preprocess face for embedding

    # Pass face through embedding model

    # Use trained recognizer to predict person's nameq

    # Display bounding box and name on the frame

    # Check for exit key press

# Release video stream
vs.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 

In [9]:
import cv2
import pickle
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder

# Load the trained face recognition model and label encoder
recognizer = pickle.loads(open("output/recognizer.pickle", "rb").read())
le = pickle.loads(open("output/le.pickle", "rb").read())

# Initialize video capture
vs = cv2.VideoCapture(0)

while True:
    # Capture frame from the video stream
    ret, frame = vs.read()

    # Perform face detection using a pre-trained face detector
    # This part needs to be replaced with an actual face detection mechanism
    # For example, you can use OpenCV's pre-trained Haarcascades classifier
    # https://github.com/opencv/opencv/tree/master/data/haarcascades
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5)

    # Loop over detected faces
    for (x, y, w, h) in faces:
        # Extract face region
        face = frame[y:y + h, x:x + w]

        # Preprocess face for embedding
        # This part needs to be replaced with your face preprocessing logic

        # Pass face through the embedding model
        # This part needs to be replaced with your face embedding logic

        # Placeholder for the face embedding vector
        vec = np.random.rand(128)  # Replace with actual embedding

        # Use the trained recognizer to predict the person's name
        preds = recognizer.predict_proba([vec])[0]
        j = np.argmax(preds)
        proba = preds[j]
        name = le.classes_[j]

        # Display bounding box and name on the frame
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        text = f"{name}: {proba * 100:.2f}%"
        cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)

    # Display the output frame
    cv2.imshow("Face Recognition", frame)

    # Check for exit key press
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break

# Release video stream
vs.release()
cv2.destroyAllWindows()


error: OpenCV(4.8.1) D:\a\opencv-python\opencv-python\opencv\modules\objdetect\src\cascadedetect.cpp:1689: error: (-215:Assertion failed) !empty() in function 'cv::CascadeClassifier::detectMultiScale'
