In [1]:
import numpy as np
import cv2
import imutils
import time
import os
import tensorflow as tf
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream

# Loading the pre-trained FaceNet and MaskNet Models

In [2]:
path1 = r'FaceNetSavedModel\1.prototxt'
path2 = r'FaceNetSavedModel\2.caffemodel'
FaceNet = cv2.dnn.readNet(path1, path2)
MaskNet = load_model('MaskNetSavedModel\MaskNet.model')

# Function responsible for detecting faces and predicting whether mask is worn or not

In [3]:
def DetectFacePredictMask(frame, FaceNet, MaskNet):
    (height, width) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0))

    FaceNet.setInput(blob)
    detections = FaceNet.forward()
    #print(detections.shape)

    faces = []
    locations = []
    predictions = []

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence > 0.5:
            box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
            (X_start, Y_start, X_end, Y_end) = box.astype('int')

            (X_start, Y_start) = (max(0, X_start), max(0, Y_start))
            (X_end, Y_end) = (min(width - 1, X_end), min(height - 1, Y_end))

            face = frame[Y_start:Y_end, X_start:X_end]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
            face = img_to_array(face)
            face = preprocess_input(face)

            faces.append(face)
            locations.append((X_start, Y_start, X_end, Y_end))

    if len(faces) > 0:
        faces = np.array(faces, dtype='float32')
        predictions = MaskNet.predict(faces, batch_size=32)

    return (locations, predictions)

# Main function which detects faces, predicts mask is worn or not and outputs the result to the screen

In [None]:
video_stream = VideoStream(src=0).start()

while True:
    frame = video_stream.read()
    frame = imutils.resize(frame, width=400)

    (locations, predictions) = DetectFacePredictMask(frame, FaceNet, MaskNet)

    for (box, pred) in zip(locations, predictions):
        (X_start, Y_start, X_end, Y_end) = box
        
        label = 'Mask' if (pred >= 0.5) else 'No Mask'
        color = (0, 255, 0) if (label == 'Mask') else (0, 0, 255)

        if (label == 'Mask'):
            label = f"{label}: {'%.2f' %(pred*100)}%"  
        else:
            label = f"{label}: {'%.2f' %((1-pred)*100)}%"

        cv2.putText(frame, label, (X_start - 55, Y_start - 10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
        cv2.rectangle(frame, (X_start, Y_start), (X_end, Y_end), color, 2)

    cv2.namedWindow('MaskNet', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('MaskNet', 800, 600)
    cv2.imshow('MaskNet', frame)
    key = cv2.waitKey(1) & 0xFF

    #quit using 'Q'
    if key == ord('q'):
        break

cv2.destroyAllWindows()
video_stream.stop()