In [5]:
# import the necessary packages
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import imutils
import time
import cv2
import os

In [6]:
def detect_and_predict_mask(frame, faceNet, maskNet):
	# grab the dimensions of the frame and then construct a blob
	# from it

    (h, w) = frame.shape[:2] # Frame usually contains (h,w,channel=3) ,Here It will take height and weight.
    blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),(104.0, 177.0, 123.0)) #Scale Factor: 1.0 (no scaling of pixel values) , Mean Values: (104.0, 177.0, 123.0) (subtract these mean values from each channel).

	# pass the blob through the network and obtain the face detections
    faceNet.setInput(blob) #faceNet.setInput(blob) sets this blob as the input to the face detection network.
    detections = faceNet.forward() # runs the network to get face detections.
    print(detections.shape)  #detections.shape is (1, 1, 200, 7) ,1: There is one batch (as typically expected) ,1: The detection layer output,200: The network predicts up to 200 potential detections per image.
#7: For each detection, the array contains 7 values: batch index, class ID, confidence score, and the coordinates of the bounding box (startX, startY, endX, endY)

	# initialize our list of faces, their corresponding locations,
	# and the list of predictions from our face mask network
    faces = []
    locs = []
    preds = []

	# loop over the detections
    for i in range(0, detections.shape[2]):
		# extract the confidence (i.e., probability) associated with
		# the detection
        confidence = detections[0, 0, i, 2] # It gives 0 to 1 value and [0, 0, i, 2] this 2 refers to confidence score

		# filter out weak detections by ensuring the confidence is
		# greater than the minimum confidence
        if confidence > 0.5: 
			# compute the (x, y)-coordinates of the bounding box for
			# the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])  # detections[0, 0, i, 3:7] contains (startX, startY, endX, endY) : Purpose: extract the dimensions for face 
            (startX, startY, endX, endY) = box.astype("int")

			# ensure the bounding boxes fall within the dimensions of the frame.
            (startX, startY) = (max(0, startX), max(0, startY)) # Ensure dimensions are within the range (no negative)
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

			# extract the face ROI, convert it from BGR to RGB channel
			# ordering, resize it to 224x224, and preprocess it
            face = frame[startY:endY, startX:endX] #This face already contain h and w ,It will fetch particular h and w. From frame ,It will take face image.
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)  # changes from bgr to rgb.
            face = cv2.resize(face, (224, 224)) # 224,224 is target image size and defined in training dataset.
            face = img_to_array(face) #Image 224*224 to flatten
            face = preprocess_input(face) #This step ensures the face image is properly scaled or normalized according to the model’s requirements.

			# add the face and bounding boxes to their respective
			# lists
            faces.append(face) 
            locs.append((startX, startY, endX, endY)) #In summary, these lists (faces and locs) are used to manage and process multiple face detections in a single frame efficiently.

	# only make a predictions if at least one face was detected
    if len(faces) > 0:
		# for faster inference we'll make batch predictions on *all*
		# faces at the same time rather than one-by-one predictions
		# in the above `for` loop
        faces = np.array(faces, dtype="float32") # process follow before base model creation 0r preprocess technique.
        preds = maskNet.predict(faces, batch_size=32) 

	# return a 2-tuple of the face locations and their corresponding
	# locations
    return (locs, preds)

In [10]:
# load our serialized face detector model from disk
prototxtPath = r"C:\HopeAI\7.Deep Learning\3.FaceMask\Facemask\face_detector\deploy.prototxt"  #specifies the layers and structure of the neural network.
weightsPath = r"C:\HopeAI\7.Deep Learning\3.FaceMask\Facemask\face_detector\res10_300x300_ssd_iter_140000.caffemodel" # trained weights of the network.
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)  #OpenCV library's deep neural network (dnn) module. It is used to load a pre-trained network from specified files

# load the face mask detector model from disk
maskNet = load_model(r"C:\HopeAI\7.Deep Learning\3.FaceMask\Facemask\Face_Mask_Detector.h5")

# initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

[INFO] starting video stream...


In [None]:
# loop over the frames from the video stream
#while True:
    # grab the frame from the threaded video stream and resize it
    # to have a maximum width of 400 pixels
    frame = vs.read()
    frame = imutils.resize(frame, width=400)

    # detect faces in the frame and determine if they are wearing a
    # face mask or not
    (locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)

    # loop over the detected face locations and their corresponding
    # locations
    for (box, pred) in zip(locs, preds):
        # unpack the bounding box and predictions
        (startX, startY, endX, endY) = box
        (mask, withoutMask) = pred

        # determine the class label and color we'll use to draw
        # the bounding box and text
        label = "Mask" if mask > withoutMask else "No Mask"    	#if mask > withoutMask:  label = "Mask  else:  label = "No Mask"
        color = (0, 255, 0) if label == "Mask" else (0, 0, 255)

        # include the probability in the label
        #label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
        
        # display the label and bounding box rectangle on the output
        # frame
        if(label=="Mask"):
            cv2.putText(frame,"Mask: You are allowed", (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
        elif(label=="No Mask"):
            lab="No Mask: You are not allowed"
            cv2.putText(frame, lab, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
            cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)

    # show the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()

vs.release()

(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200