In [62]:
# ########################################################
# Author: José A. Ferreira Queimada
# e-mail: joseafq@hotmail.com
# Description: 
#    Detect if a medical face mask is ON or OFF applying
#    a pre-trained ResNet50-caffemodel to a video stream
# ########################################################
# Credits on the model: 
#   Author: Didi Chuxing
#   Github: https://github.com/didi/maskdetection
#   Description:
#         Developed by DiDi AI team, the mask detection technology is based on DFS face detection algorithm 
#         and the face attributes recognition algorithm DiDi employs on its platform. 
#         It uses weighted loss function and data augmentation methods to deal with different mask types and 
#         uneven mask data during the day and the night.
#         The system can identify non-mask images with 99.5 per cent accuracy.
#         The model was trained on a dataset of 200,000 faces to ensure its robustness.
# ########################################################


#Import the neccesary libraries
import numpy as np
import cv2 

# Labels of network
classNames = { 0: 'NO FACE MASK', 1: 'FACE MASK ON'}

# Open video file or capture device. 
cap = cv2.VideoCapture(0)

args_prototxt = "face_mask.prototxt"
args_weights = "face_mask.caffemodel"

# Load the Caffe model 
# Pass the arguments prototxt and weights to the function, after that we loaded correctly the network.
net = cv2.dnn.readNetFromCaffe(args_prototxt, args_weights)

# Next, we read the video frame by frame and pass to the frame to network for detections. 
# With the dnn module is easily to use our deep learning network in OpenCV and make predictions.
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    frame_resized = cv2.resize(frame,(224,224)) # resize frame for prediction
    # Why 224x224?... take a look to face_mask.prototxt "dimensions" section. It's the first one.

    # MobileNet requires fixed dimensions for input image(s) so we have to ensure that it is resized to 224x224 pixels 
    blob = cv2.dnn.blobFromImage(frame_resized, 1, (224, 224), (104, 117, 123), False)
    
    # Set to network the input blob 
    net.setInput(blob)
    
    # Prediction of network
    detections = net.forward()
    
    # Accuracy limit
    the_limit = 0.9
            
    # Sort the probabilities (in descending) order, grab the index of the top predicted label, and draw it on the input image
    idx = np.argsort(detections[0])[::-1][0]
    if (detections[0] < the_limit):
        class_id = 0
    else:
        class_id = 1
    
    # Setup text
    font = cv2.FONT_HERSHEY_SIMPLEX
    this_is_red = (0, 0, 255)
    this_is_green = (0, 255, 0)
    text = "{}, {:.2f}%".format(classNames[class_id], detections[0][idx] * 100)
    
    # Get boundary of this text
    textsize = cv2.getTextSize(text, font, 1, 2)[0]

    # Get coords based on boundary
    textX = int((frame.shape[1] - textsize[0]) / 1.4)
    #textY = int((frame.shape[0] + textsize[1]) / 2)
    textY = 35

    # Format the displayed text based on the outcome of the model
    if (detections[0] < the_limit):
        cv2.putText(frame, text, (textX, textY), font, 0.7, this_is_red, 2)
    else:
        cv2.putText(frame, text, (textX, textY), font, 0.7, this_is_green, 2)
    
    # Display the image of frame normal and resize to screen.
    cv2.imshow("FACE MASK DETECTOR - Press 'q' to exit", frame)

    # If 'q' is pressed, finish the loop
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break

# When everything is done, release the capture
cap.release()
cv2.destroyAllWindows()