**IMPORTING THE REQUIRED PACKAGES**

In [2]:
import os
import cv2
import time
import imutils  # used for image processing like resizing,scaling,rotation etc
import numpy as np  # mathematical op.

In [3]:
from imutils.video import VideoStream
from tensorflow.keras.models import load_model  #use any pretrained model
from tensorflow.keras.preprocessing.image import img_to_array   # converting image to array or in the same format which can be used by mobilenet model
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input  # preprocess input makes the image precessable by mobilenet

**LOADING THE FACE DETECTION MODEL**

In [4]:
# WE CAN ALSO USE THE HOG CLASSIFIER OR HAARCASCADE FOR DETECTING THE FACE

# but here we use transfer learning with pretrained model
prototxtPath = r"face_detector\deploy.prototxt"
weightsPath = r"face_detector\res10_300x300_ssd_iter_140000.caffemodel"
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)

**LOADING THE MASK DETECTION MODEL**

In [5]:
# load the face mask detector model from disk
maskNet = load_model("facemask_detector.model")

**DEFINING THE FUNCTION FOR FINDING FACE AND MASK DETECTION AND PREDICTING THE CATEGORY**
1. Finding dimensions of the frame and then constructing a blob from it
2. Passing the blob through our network and obtaining the face detections
3. Initializing our list of faces, getting thier corresponding locations and the list of predictions from our face mask network
4. Looping over to find the detections
5. Filtering out weak detections by setting threshold for minimum confidence
6. Ensuring the bounding boxes fall within the dimensions of the frame
7. Extracting the face ROI, converting it from BGR to RGB, resize it to 224x224, and preprocessing it
8. Adding the face and bounding boxes to their respective lists
9. Making batch predictions on all faces at the same time
10. Returning a tuple with face locations and their corresponding locations

In [6]:
def find_and_predict_facemask(frame, faceNet, maskNet):
    # finding dimensions of the frame and then constructing a blob from it
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),
        (104.0, 177.0, 123.0))

    # passing the blob through our network and obtaining the face detections
    faceNet.setInput(blob)
    detections = faceNet.forward()
    print(detections.shape)

    # initializing our list of faces, getting thier corresponding locations and the list of predictions from our face mask network
    faces = []
    locs = []
    preds = []

    # looping over to find the detections
    for i in range(0, detections.shape[2]):
        # finding the confidence associated with the detection
        confidence = detections[0, 0, i, 2]

        # filtering out weak detections by setting threshold for minimum confidence
        if confidence > 0.5:
            # computing the (x, y)coordinates of the bounding box for the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # ensuring the bounding boxes fall within the dimensions of the frame
            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

            # extract the face ROI, converting it from BGR to RGB, resize it to 224x224, and preprocessing it
            face = frame[startY:endY, startX:endX]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
            face = cv2.flip(face,1)
            face = img_to_array(face)
            face = preprocess_input(face)

            # adding the face and bounding boxes to their respective lists
            faces.append(face)
            locs.append((startX, startY, endX, endY))

    # only make a predictions if at least one face was detected
    if len(faces) > 0:
        # make batch predictions on all faces at the same time
        faces = np.array(faces, dtype="float32")
        preds = maskNet.predict(faces, batch_size=32)

    # return a tuple with face locations and their corresponding locations
    return (locs, preds)

**STARTING THE VIDEO AND RUNNING OUR FUNCTION**
1. Grabbing the frame from the threaded video stream and resize it to have a maximum width of 800 pixels
2. Detecting faces in the frame and checking if they are wearing face mask or not
3. Looping over the detected face locations and their corresponding locations
4. Determine the class label and color we'll use to draw the bounding box and text
5. Displaying the label and bounding box rectangle on the output frame

In [8]:
# initialize the video stream
print("WAIT..! starting video stream...")
vs = VideoStream(src=0).start()
while True:
    # grabbing the frame from the threaded video stream and resize it to have a maximum width of 400 pixels
    frame = vs.read()
    frame = imutils.resize(frame, width=400)
    frame = cv2.flip(frame,1)

    # detecting faces in the frame and checking if they are wearing face mask or not
    (locs, preds) = find_and_predict_facemask(frame, faceNet, maskNet)

    # looping over the detected face locations and their corresponding locations
    for (box, pred) in zip(locs, preds):
        # unpacking the bounding box and predictions
        (startX, startY, endX, endY) = box
        (mask, withoutMask) = pred

        # determine the class label and color we'll use to draw the bounding box and text
        label = "MASK" if mask > withoutMask else "NO MASK"
        color = (0, 255, 0) if label == "MASK" else (0, 0, 255)

        # include the probability in the label
        # label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)

        # displaying the label and bounding box rectangle on the output frame
        cv2.putText(frame, label, (startX, startY - 10),
        cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)

    # showing the output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break
cv2.destroyAllWindows()
vs.stop()

WAIT..! starting video stream...
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 200, 7)
(1, 1, 