In [1]:
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
import os

img_rows, img_cols = 48,48
batch_size = 16

train_data_dir = './datasets/train'
validation_data_dir = './datasets/validation'

# Let's use some data augmentation and define our generators
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=30,
      shear_range=0.3,
      zoom_range=0.3,
      width_shift_range=0.4,
      height_shift_range=0.4,
      horizontal_flip=True,
      fill_mode='nearest')
 
validation_datagen = ImageDataGenerator(rescale=1./255)
 
train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

validation_generator = validation_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_rows, img_cols),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

train_images = train_generator.samples
validation_images = validation_generator.samples

Using TensorFlow backend.


Found 1320 images belonging to 2 classes.
Found 60 images belonging to 2 classes.


In [2]:
#Make a dictionary for later use in predictions
class_labels = validation_generator.class_indices
class_labels = {v: k for k, v in class_labels.items()}
classes = list(class_labels.values())
print(class_labels)

{0: 'with_mask', 1: 'without_mask'}


In [5]:
#Let us create our model
import numpy as np
import tensorflow as tf
from keras.models import Sequential,Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense,Dropout,Softmax,Flatten,Activation,BatchNormalization
import keras.backend as K

model = Sequential()

model.add(Conv2D(32, (3, 3),padding = 'same',input_shape=(img_rows,img_cols,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3),padding = 'same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3),padding = 'same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3),padding = 'same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
# model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('softmax'))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 48, 48, 32)        896       
_________________________________________________________________
activation_5 (Activation)    (None, 48, 48, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 48, 48, 32)        9248      
_________________________________________________________________
activation_6 (Activation)    (None, 48, 48, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 24, 24, 32)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 24, 24, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 24, 24, 64)        18496     
__________

In [6]:
#Training our model
#This model has been trained only for one epoch. One could do some tweakings!
from keras.optimizers import RMSprop, SGD,Adadelta
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

                     
checkpoint = ModelCheckpoint("mask_detector.h5",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 3,
                          verbose = 1,
                          restore_best_weights = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 2, verbose = 1, min_delta = 0.0001)

callbacks = [earlystop, checkpoint, reduce_lr]

nb_train_samples = train_images
nb_validation_samples = validation_images
batch_size = 16
epochs = 20

model.compile(loss = 'categorical_crossentropy',
                  optimizer = RMSprop(0.001),
                  metrics = ['accuracy'])

history = model.fit_generator(
    train_generator,
    steps_per_epoch = nb_train_samples // batch_size,
    epochs = epochs,
    callbacks = callbacks,
    validation_data = validation_generator,
    validation_steps = nb_validation_samples // batch_size)


Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.67165, saving model to mask_detector.h5
Epoch 2/20

Epoch 00002: val_loss improved from 0.67165 to 0.32809, saving model to mask_detector.h5
Epoch 3/20

Epoch 00003: val_loss improved from 0.32809 to 0.22598, saving model to mask_detector.h5
Epoch 4/20

Epoch 00004: val_loss improved from 0.22598 to 0.08052, saving model to mask_detector.h5
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.08052
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.08052

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/20

Epoch 00007: val_loss improved from 0.08052 to 0.04744, saving model to mask_detector.h5
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.04744
Epoch 9/20

Epoch 00009: val_loss improved from 0.04744 to 0.02957, saving model to mask_detector.h5
Epoch 10/20

Epoch 00010: val_loss did not improve from 0.02957
Epoch 11/20

Epoch 00011: val_loss did not improve from 0.02957

In [7]:
# Loading our model
from keras.models import load_model
classifier = load_model('mask_detector.h5')

In [8]:
#Test on realtime video
from os import listdir
from os.path import isfile, join
import os
import cv2
import numpy as np
import dlib


def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.8, thickness=1):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]), (x + size[0], y), (255, 0, 0), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
    
#Define our prediction dictionary
face_classes = class_labels
img_size = 48

# Use the efficient dlib's face detector
detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

#I have used my webcam, one may use a '.mp4' video as well
cap = cv2.VideoCapture(0)

while True:
    
    ret, frame = cap.read()
#     frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation = cv2.INTER_LINEAR)
    preprocessed_faces = []           
 
    input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img_h, img_w, _ = np.shape(frame)
    detected = detector.detectMultiScale(frame)
    faces = np.empty((len(detected), img_size, img_size, 3))
   
    if len(detected) > 0:
        
        for f in detected:
            
            # Obtain the coordinates of the detected face and draw a bounding box
            x1, y1, w, h = [v for v in f]
            cv2.rectangle(frame, (x1, y1), (x1+w,y1+h), (255, 0, 0), 2)
            face =  frame[y1:y1+h, x1:x1+w, :]
            face = cv2.resize(face, (img_rows,img_cols))
            face = face.astype("float32") / 255.0
            face = np.expand_dims(face, axis=0)
            preprocessed_faces.append(face)

        
        # Make predictions for the detected face 
        face_labels = []
        for i, d in enumerate(detected):
            preds = classifier.predict(preprocessed_faces[i])[0]
            face_labels.append(face_classes[np.argmax(preds,axis=0)])
            print(preds)
        
        # Display the results
        for i, d in enumerate(detected):
            label = "{}".format(face_labels[i])
            print(label)
            draw_label(frame, (x1,y1), label)

    cv2.imshow("Face Recognition", frame)
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break

cap.release()
cv2.destroyAllWindows()      

[0.2065365 0.7934635]
without_mask
[0.20390283 0.79609716]
without_mask
[0.20927751 0.7907225 ]
without_mask
[0.22880322 0.7711968 ]
without_mask
[0.22098978 0.77901024]
without_mask
[0.21871413 0.7812859 ]
without_mask
[0.22765528 0.7723447 ]
without_mask
[0.22780749 0.7721925 ]
without_mask
[0.2327372 0.7672628]
without_mask
[0.23141903 0.768581  ]
without_mask
[0.2268251 0.7731749]
without_mask
[0.21381614 0.7861839 ]
without_mask
[0.23821789 0.76178217]
without_mask
[0.21517904 0.7848209 ]
without_mask
[0.22794914 0.7720509 ]
without_mask
[0.23563401 0.764366  ]
without_mask
[0.22540908 0.77459097]
without_mask
[0.2277178 0.7722822]
without_mask
[0.21367157 0.7863284 ]
without_mask
[0.22511165 0.77488834]
without_mask
[0.22859232 0.77140766]
without_mask
[0.23085694 0.76914304]
without_mask
[0.22225648 0.7777435 ]
without_mask
[0.21932782 0.78067213]
without_mask
[0.2112049 0.7887951]
without_mask
[0.2219059 0.7780941]
without_mask
[0.2167231 0.7832769]
without_mask
[0.2260017  0.7

[0.3200945 0.6799055]
without_mask
[0.36663407 0.633366  ]
without_mask
[0.34088245 0.6591176 ]
without_mask
[0.31899896 0.68100107]
without_mask
[0.3311673 0.6688327]
without_mask
[0.3640564  0.63594353]
without_mask
[0.4254463  0.57455367]
without_mask
[0.35932404 0.640676  ]
without_mask
[0.42682576 0.5731742 ]
without_mask
[0.37352172 0.62647825]
without_mask
[0.426008 0.573992]
without_mask
[0.32456744 0.67543256]
without_mask
[0.34960344 0.6503966 ]
without_mask
[0.33295286 0.66704714]
without_mask
[0.31913507 0.6808649 ]
without_mask
[0.3076988 0.6923012]
without_mask
[0.30706906 0.69293094]
without_mask
[0.35838485 0.64161515]
without_mask
[0.27920195 0.72079813]
without_mask
[0.32593423 0.67406577]
without_mask
[0.28554648 0.7144535 ]
without_mask
[0.2608309  0.73916906]
without_mask
[0.23681833 0.7631816 ]
without_mask
[0.24696761 0.75303245]
without_mask
[0.2512642 0.7487358]
without_mask
[0.19673419 0.80326587]
without_mask
[0.22626613 0.77373385]
without_mask
[0.22905113 0

In [14]:
#Test on realtime video
from os import listdir
from os.path import isfile, join
import os
import cv2
import numpy as np
import dlib


def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.8, thickness=1):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]), (x + size[0], y), (255, 0, 0), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
    
#Define our prediction dictionary
face_classes = class_labels
img_size = 48

# Use the efficient dlib's face detector
detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

#I have used my webcam, one may use a '.mp4' video as well
# cap = cv2.VideoCapture(0)

# while True:
    
frame = cv2.imread('5.jpg')
#     frame = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation = cv2.INTER_LINEAR)
preprocessed_faces = []           

input_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
img_h, img_w, _ = np.shape(frame)
detected = detector.detectMultiScale(frame)
faces = np.empty((len(detected), img_size, img_size, 3))

if len(detected) > 0:

    for f in detected:

        # Obtain the coordinates of the detected face and draw a bounding box
        x1, y1, w, h = [v for v in f]
        cv2.rectangle(frame, (x1, y1), (x1+w,y1+h), (255, 0, 0), 2)
        face =  frame[y1:y1+h, x1:x1+w, :]
        face = cv2.resize(face, (img_rows,img_cols))
        face = face.astype("float32") / 255.0
        face = np.expand_dims(face, axis=0)
        preprocessed_faces.append(face)


    # Make predictions for the detected face 
    face_labels = []
    for i, d in enumerate(detected):
        preds = classifier.predict(preprocessed_faces[i])[0]
        face_labels.append(face_classes[np.argmax(preds,axis=0)])
        print(preds)

    # Display the results
    for i, d in enumerate(detected):
        label = "{}".format(face_labels[i])
        print(label)
        draw_label(frame, (x1,y1), label)

cv2.imshow("Face Recognition", frame)
cv2.waitKey(0) == 13


cap.release()
cv2.destroyAllWindows()      