# Face Mask Detection using MobileNet

In [1]:
#Defining our data generators
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
import os

img_rows, img_cols = 128,128
batch_size = 16

train_data_dir = './dataset/train'
validation_data_dir = './dataset/validation'

# Let's use some data augmentation and define our generators
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=30,
      shear_range=0.3,
      zoom_range=0.3,
      width_shift_range=0.4,
      height_shift_range=0.4,
      horizontal_flip=True,
      fill_mode='nearest')
 
validation_datagen = ImageDataGenerator(rescale=1./255)
 
train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

train_images = train_generator.samples
validation_images = validation_generator.samples

Using TensorFlow backend.


Found 3146 images belonging to 2 classes.
Found 700 images belonging to 2 classes.


# Loading the MobileNet

In [2]:
"""
    We will use the pretrained MobileNet to which we will add our own network of FC layers.
    We then train the new model formed for the additional layers.

"""
from keras.applications import MobileNet

# Lets load the MobileNet model without the top or FC layers
MobileNet = MobileNet(weights = 'imagenet', 
                 include_top = False, 
                 input_shape = (img_rows, img_cols, 3))

# Layers are set to be trainable as True by default but lets make them untrainable
for layer in MobileNet.layers:
    layer.trainable = False
    
# Let's check our layers 
for (i,layer) in enumerate(MobileNet.layers):
    print(str(i) + " "+ layer.__class__.__name__, layer.trainable)

0 InputLayer False
1 ZeroPadding2D False
2 Conv2D False
3 BatchNormalization False
4 ReLU False
5 DepthwiseConv2D False
6 BatchNormalization False
7 ReLU False
8 Conv2D False
9 BatchNormalization False
10 ReLU False
11 ZeroPadding2D False
12 DepthwiseConv2D False
13 BatchNormalization False
14 ReLU False
15 Conv2D False
16 BatchNormalization False
17 ReLU False
18 DepthwiseConv2D False
19 BatchNormalization False
20 ReLU False
21 Conv2D False
22 BatchNormalization False
23 ReLU False
24 ZeroPadding2D False
25 DepthwiseConv2D False
26 BatchNormalization False
27 ReLU False
28 Conv2D False
29 BatchNormalization False
30 ReLU False
31 DepthwiseConv2D False
32 BatchNormalization False
33 ReLU False
34 Conv2D False
35 BatchNormalization False
36 ReLU False
37 ZeroPadding2D False
38 DepthwiseConv2D False
39 BatchNormalization False
40 ReLU False
41 Conv2D False
42 BatchNormalization False
43 ReLU False
44 DepthwiseConv2D False
45 BatchNormalization False
46 ReLU False
47 Conv2D False
48 Batc

In [3]:
def addTopModelMobileNet(bottom_model):
    """
    
        Creates the head of the model that will be 
        placed ontop of the bottom layers
    
    """

    top_model = bottom_model.output
    top_model = GlobalAveragePooling2D()(top_model)
    top_model = Dense(512,activation='relu')(top_model)
    top_model = Dense(128,activation='relu')(top_model)
    top_model = Dense(64,activation='relu')(top_model)
    
    top_model = Dense(2,activation='sigmoid')(top_model)
    return top_model


#Combining the model
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers.normalization import BatchNormalization
from keras.models import Model

FC_Head = addTopModelMobileNet(MobileNet)

model = Model(inputs = MobileNet.input, outputs = FC_Head)

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 128, 3)       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 129, 129, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 64, 64, 32)        864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 64, 64, 32)        128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 64, 64, 32)        0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 64, 64, 32)        288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 64, 64, 32)        128       
__________

# Training the model

In [4]:
#Training our model
#One could do some tweakings!
from keras.optimizers import RMSprop, SGD,Adadelta
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

                     
checkpoint = ModelCheckpoint("face_mask_detector.h5",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 3,
                          verbose = 1,
                          restore_best_weights = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 2, verbose = 1, min_delta = 0.0001)

callbacks = [earlystop, checkpoint, reduce_lr]

nb_train_samples = train_images
nb_validation_samples = validation_images
batch_size = 16
epochs = 20

model.compile(loss = 'categorical_crossentropy',
                  optimizer = 'rmsprop',
                  metrics = ['accuracy'])

history = model.fit_generator(
    train_generator,
    steps_per_epoch = nb_train_samples // batch_size,
    epochs = epochs,
    callbacks = callbacks,
    validation_data = validation_generator,
    validation_steps = nb_validation_samples // batch_size)


Epoch 1/20
 39/196 [====>.........................] - ETA: 1:27 - loss: 0.7290 - acc: 0.5994

  'to RGBA images')



Epoch 00001: val_loss improved from inf to 0.61789, saving model to face_mask_detector.h5
Epoch 2/20

Epoch 00002: val_loss improved from 0.61789 to 0.37627, saving model to face_mask_detector.h5
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.37627
Epoch 4/20

Epoch 00004: val_loss improved from 0.37627 to 0.34036, saving model to face_mask_detector.h5
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.34036
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.34036

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/20
Restoring model weights from the end of the best epoch

Epoch 00007: val_loss did not improve from 0.34036
Epoch 00007: early stopping


# Testing the model

In [5]:
# Loading our model
from keras.models import load_model
classifier = load_model('face_mask_detector.h5')

In [8]:
#Test on realtime video
from os import listdir
from os.path import isfile, join
import os
import cv2
import numpy as np

# A function that puts the predicted class lables on the parametric image frames
def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.8, thickness=1):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]), (x + size[0], y), (255, 0, 0), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
    
#Define our prediction dictionary
face_classes = {0: 'with_mask', 1: 'without_mask'}
img_size = 128

detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

cap = cv2.VideoCapture(0)

while True:
    
    ret, frame = cap.read()
    preprocessed_faces = []           
 
    input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_h, img_w, _ = np.shape(frame)
    detected = detector.detectMultiScale(frame)
    faces = np.empty((len(detected), img_size, img_size, 3))
   
    if len(detected) > 0:
        
        for f in detected:
            
            # Obtain the coordinates of the detected face and draw a bounding box
            x1, y1, w, h = [v for v in f]
            cv2.rectangle(frame, (x1, y1), (x1+w,y1+h), (255, 0, 0), 2)
            face =  frame[y1:y1+h, x1:x1+w, :]
            face = cv2.resize(face, (img_rows,img_cols))
            face = face.astype("float32") / 255.0
            face = np.expand_dims(face, axis=0)
            preprocessed_faces.append(face)

        
        # Make predictions for the detected face 
        face_labels = []
        for i, d in enumerate(detected):
            preds = classifier.predict(preprocessed_faces[i])[0]
            face_labels.append(face_classes[np.argmax(preds,axis=0)])
            print(preds)
        
        # Putting labels on frames
        for i, d in enumerate(detected):
            label = "{}".format(face_labels[i])
            print(label)
            draw_label(frame, (x1,y1), label)
    
    #Display the results
    cv2.imshow("Face Mask Recognition", frame)
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()      

[1.576768e-04 9.999989e-01]
without_mask
[2.6911663e-04 9.9999893e-01]
without_mask
[4.7888530e-05 9.9999964e-01]
without_mask
[1.255241e-04 9.999994e-01]
without_mask
[0.00301612 0.999997  ]
without_mask
[1.7908857e-04 9.9999857e-01]
without_mask
[0.00255569 0.99999094]
without_mask
[7.0412440e-04 9.9999774e-01]
without_mask
[1.4995376e-04 9.9999869e-01]
without_mask
[2.3343964e-05 9.9999976e-01]
without_mask
[1.9685271e-04 9.9999785e-01]
without_mask
[0.01742642 0.99998105]
without_mask
[0.03843908 0.999964  ]
without_mask
[0.03059829 0.9999485 ]
without_mask
[0.00670263 0.99998224]
without_mask
[0.00302822 0.99999475]
without_mask
[0.00114709 0.9999944 ]
without_mask
[1.5519311e-04 9.9999917e-01]
without_mask
[2.2079781e-05 9.9999976e-01]
without_mask
[1.0655540e-05 9.9999976e-01]
without_mask
[1.9795671e-06 9.9999976e-01]
without_mask
[1.2678189e-05 9.9999952e-01]
without_mask
[3.758627e-05 9.999995e-01]
without_mask
[6.9458777e-04 9.9999917e-01]
without_mask
[1.4203678e-04 9.99997

[1.5199448e-04 9.9999809e-01]
without_mask
[4.3713844e-06 9.9999976e-01]
without_mask
[1.1152334e-06 9.9999988e-01]
without_mask
[0.00106093 0.9999938 ]
without_mask
[2.5963371e-05 9.9999917e-01]
without_mask
[7.8373705e-04 9.9999154e-01]
without_mask
[2.9515013e-05 9.9999964e-01]
without_mask
[3.690771e-05 9.999995e-01]
without_mask
[4.2618063e-04 9.9999523e-01]
without_mask
[9.271666e-05 9.999989e-01]
without_mask
[1.7294467e-04 9.9999869e-01]
without_mask
[0.00154102 0.9999933 ]
without_mask
[0.00279597 0.9999887 ]
without_mask
[4.3586144e-05 9.9999928e-01]
without_mask
[1.8569910e-05 9.9999964e-01]
without_mask
[9.564017e-06 9.999994e-01]
without_mask
[0.02515906 0.99996424]
without_mask
[4.4292808e-04 9.9999714e-01]
without_mask
[0.00167587 0.99999356]
without_mask
[7.4042374e-04 9.9999607e-01]
without_mask
[8.700983e-05 9.999988e-01]
without_mask
[0.01024633 0.9999777 ]
without_mask
[0.00274657 0.9999956 ]
without_mask
[3.3894341e-04 9.9999785e-01]
without_mask
[0.00783181 0.9999

[6.633980e-05 9.999981e-01]
without_mask
[3.3450917e-06 9.9999952e-01]
without_mask
[4.029209e-05 9.999976e-01]
without_mask
[6.8841800e-05 9.9999917e-01]
without_mask
[0.00256899 0.9999908 ]
without_mask
[4.9973969e-05 9.9999857e-01]
without_mask
[1.0464421e-04 9.9999821e-01]
without_mask
[2.2751144e-04 9.9999797e-01]
without_mask
[5.394118e-05 9.999993e-01]
without_mask
[6.1531227e-06 9.9999976e-01]
without_mask
[2.4598357e-05 9.9999917e-01]
without_mask
[6.362889e-05 9.999995e-01]
without_mask
[2.7133554e-05 9.9999964e-01]
without_mask
[2.2962579e-04 9.9999917e-01]
without_mask
[5.162734e-05 9.999995e-01]
without_mask
[9.796721e-06 9.999999e-01]
without_mask
[1.0137172e-06 1.0000000e+00]
without_mask
[4.928766e-06 9.999999e-01]
without_mask
[5.1737511e-06 9.9999976e-01]
without_mask
[8.8385132e-06 9.9999976e-01]
without_mask
[4.05294e-07 1.00000e+00]
without_mask
[9.977749e-05 9.999993e-01]
without_mask
[4.5433192e-04 9.9999785e-01]
without_mask
[2.0709865e-06 9.9999988e-01]
without

[0.9998878  0.02043229]
with_mask
[0.9974583  0.54708934]
with_mask
[0.9992945  0.17451793]
with_mask
[0.9998685  0.04324101]
with_mask
[0.9989808  0.25153863]
with_mask
[0.9995845  0.09880426]
with_mask
[0.99787307 0.6154733 ]
with_mask
[0.9994696  0.17481188]
with_mask
[0.9998425  0.02063629]
with_mask
[0.9999701  0.00632897]
with_mask
[0.99948716 0.12431333]
with_mask
[0.99971443 0.04964601]
with_mask
[0.99976784 0.05131686]
with_mask
[0.999759   0.06519525]
with_mask
[0.9998596  0.02623267]
with_mask
[0.99983656 0.03825286]
with_mask
[0.9999201  0.01549731]
with_mask
[0.9998185 0.0431028]
with_mask
[0.99974674 0.06538045]
with_mask
[0.9995461  0.10752773]
with_mask
[0.9998989  0.01752444]
with_mask
[0.99992204 0.01695182]
with_mask
[0.9953766  0.74369794]
with_mask
[0.9993467  0.17320594]
with_mask
[0.99997056 0.0061856 ]
with_mask
[0.99838305 0.51887095]
with_mask
[0.99998796 0.00136133]
with_mask
[0.9995925 0.10598  ]
with_mask
[0.99966943 0.07101982]
with_mask
[0.99953735 0.1267

<img src = 'test.png'>

<img src = 'test2.png'>

In [9]:
#You can use some images for testing also using this part of the code
from os import listdir
from os.path import isfile, join
import os
import cv2
import numpy as np

# A function that puts the predicted class lables on the parametric image frames
def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.8, thickness=1):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]), (x + size[0], y), (255, 0, 0), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
    
#Define our prediction dictionary
face_classes = {0: 'with_mask', 1: 'without_mask'}
img_size = 128

detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    
frame = cv2.imread('test2.png')
preprocessed_faces = []           

input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img_h, img_w, _ = np.shape(frame)
detected = detector.detectMultiScale(frame)
faces = np.empty((len(detected), img_size, img_size, 3))

if len(detected) > 0:

    for f in detected:

        # Obtain the coordinates of the detected face and draw a bounding box
        x1, y1, w, h = [v for v in f]
        cv2.rectangle(frame, (x1, y1), (x1+w,y1+h), (255, 0, 0), 2)
        face =  frame[y1:y1+h, x1:x1+w, :]
        face = cv2.resize(face, (img_rows,img_cols))
        face = face.astype("float32") / 255.0
        face = np.expand_dims(face, axis=0)
        preprocessed_faces.append(face)


    # Make predictions for the detected face 
    face_labels = []
    for i, d in enumerate(detected):
        preds = classifier.predict(preprocessed_faces[i])[0]
        face_labels.append(face_classes[np.argmax(preds,axis=0)])
        print(preds)

    # Putting labels on frames
    for i, d in enumerate(detected):
        label = "{}".format(face_labels[i])
        print(label)
        draw_label(frame, (x1,y1), label)

# Display the results
cv2.imshow("Face Recognition", frame)
cv2.waitKey(0) == 13


cap.release()
cv2.destroyAllWindows()      

[0.99976736 0.05862271]
with_mask
