## Project - Face Mask Detection
- **Data Preparation & Preprocessing**

In [8]:
# import libraries
import os
import numpy as np
import pandas as pd
import cv2
import gc#garbage collect, as we are working on big data, to stop the computer from not responding, we will clean the data/garbage collections using gc
from tqdm import tqdm # will be used for the loops
from glob import glob

#os and glob are used to extract the path to prepare the data accordingly (Labelling the folder)

### Collecting & Labelling our data
- Collecting all data
- labeling

In [9]:
dirs = os.listdir('Data')
print(dirs)

['Mask', 'Mask_Chin', 'Mask_Mouth_Chin', 'Mask_Nose_Mouth']


In [16]:
path = glob('./Data/Mask/*.jpg') #checking if the Folder Mask has data (testing)

In [17]:
path

['./Data/Mask\\00000.jpg',
 './Data/Mask\\00001.jpg',
 './Data/Mask\\00002.jpg',
 './Data/Mask\\00003.jpg',
 './Data/Mask\\00004.jpg',
 './Data/Mask\\00005.jpg',
 './Data/Mask\\00006.jpg',
 './Data/Mask\\00007.jpg',
 './Data/Mask\\00008.jpg',
 './Data/Mask\\00009.jpg',
 './Data/Mask\\00010.jpg',
 './Data/Mask\\00011.jpg',
 './Data/Mask\\00012.jpg',
 './Data/Mask\\00013.jpg',
 './Data/Mask\\00014.jpg',
 './Data/Mask\\00015.jpg',
 './Data/Mask\\00016.jpg',
 './Data/Mask\\00017.jpg',
 './Data/Mask\\00018.jpg',
 './Data/Mask\\00019.jpg',
 './Data/Mask\\00020.jpg',
 './Data/Mask\\00021.jpg',
 './Data/Mask\\00022.jpg',
 './Data/Mask\\00023.jpg',
 './Data/Mask\\00024.jpg',
 './Data/Mask\\00025.jpg',
 './Data/Mask\\00026.jpg',
 './Data/Mask\\00027.jpg',
 './Data/Mask\\00028.jpg',
 './Data/Mask\\00029.jpg',
 './Data/Mask\\00030.jpg',
 './Data/Mask\\00031.jpg',
 './Data/Mask\\00032.jpg',
 './Data/Mask\\00033.jpg',
 './Data/Mask\\00034.jpg',
 './Data/Mask\\00035.jpg',
 './Data/Mask\\00036.jpg',
 

In [4]:
images_path = []
labels = []
for folder in dirs:
    path = glob('./Data/{}/*.jpg'.format(folder))
    label =['{}'.format(folder)]*len(path) #labelling the pictures according to their category upon which we have named the folder
    # append
    images_path.extend(path)
    labels.extend(label)

In [14]:
set(labels)
#These are the different labels

{'Mask', 'Mask_Chin', 'Mask_Mouth_Chin', 'Mask_Nose_Mouth'}

In [18]:
print(images_path)
#List of Path of All Images

['./Data/Mask\\00000.jpg', './Data/Mask\\00001.jpg', './Data/Mask\\00002.jpg', './Data/Mask\\00003.jpg', './Data/Mask\\00004.jpg', './Data/Mask\\00005.jpg', './Data/Mask\\00006.jpg', './Data/Mask\\00007.jpg', './Data/Mask\\00008.jpg', './Data/Mask\\00009.jpg', './Data/Mask\\00010.jpg', './Data/Mask\\00011.jpg', './Data/Mask\\00012.jpg', './Data/Mask\\00013.jpg', './Data/Mask\\00014.jpg', './Data/Mask\\00015.jpg', './Data/Mask\\00016.jpg', './Data/Mask\\00017.jpg', './Data/Mask\\00018.jpg', './Data/Mask\\00019.jpg', './Data/Mask\\00020.jpg', './Data/Mask\\00021.jpg', './Data/Mask\\00022.jpg', './Data/Mask\\00023.jpg', './Data/Mask\\00024.jpg', './Data/Mask\\00025.jpg', './Data/Mask\\00026.jpg', './Data/Mask\\00027.jpg', './Data/Mask\\00028.jpg', './Data/Mask\\00029.jpg', './Data/Mask\\00030.jpg', './Data/Mask\\00031.jpg', './Data/Mask\\00032.jpg', './Data/Mask\\00033.jpg', './Data/Mask\\00034.jpg', './Data/Mask\\00035.jpg', './Data/Mask\\00036.jpg', './Data/Mask\\00037.jpg', './Data/Mas

### Face Detection & Cropping
- Face Detection - We will use the pre-trained deep neural network model
- Cropping

In OpenCV we have a module called DNN (OpenCV-> Deep Neural Network), we will use the Face Detection (FP16) model.

#### To Download

res10_300x300_ssd_iter_140000_fp16.caffemodel - 

https://github.com/opencv/opencv_3rdparty/raw/19512576c112aa2c7b6328cb0e8d589a4a90a26d/res10_300x300_ssd_iter_140000_fp16.caffemodel

deploy.prototxt - 

https://github.com/opencv/opencv/blob/master/samples/dnn/face_detector/deploy.prototxt

In [32]:
img_path = images_path[1]
img = cv2.imread(img_path) #reading the image using OpenCV

In [23]:
cv2.imshow('original',img) #looking at the image we are working on now
cv2.waitKey()
cv2.destroyAllWindows()

Now we will detect the face and draw a bounding box on top of the face using OpenCV and Deep Neural Networks

In [24]:
# Loading the face detection model (Caffe Model). Alternatives can be Tendorflow or Torch
face_detection_model = cv2.dnn.readNetFromCaffe('./models/deploy.prototxt.txt',
                                                './models/res10_300x300_ssd_iter_140000_fp16.caffemodel')

Now we need to calculate the blob from image, i.e RGB Mean Subtraction

A Blob, in a sense, is anything that is considered a large object or anything bright in a dark background, in images, we can generalize it as a group of pixel values that forms a somewhat colony or a large object that is distinguishable from its background. Using image processing, we can detect such blobs in an image

In [25]:
def face_detection_dnn(img):
    # blob from image (rgb mean subraction image)
    image = img.copy() # making a copy of the original image
    h,w = image.shape[:2] # calculating the height and width. Originally we get the height, width and depth but here we need only the height and width so [:2]
    #calculating blob from image using cv2
    blob = cv2.dnn.blobFromImage(image,1,(300,300),(104,117,123),swapRB=True) # resizing to 300x300 because our model is trained for 300x300 images, the next is the RGB mean values(the mean value is from imagenet dataset {https://www.image-net.org/})
    # get the detections to pass to the face_detection_model
    face_detection_model.setInput(blob)
    detections = face_detection_model.forward() #Forward Pass
    #print(detections)
    #print(detections.shape) -> (1, 1, 200, 7) [[  [ [1,2,..7], .. 200 arrays ]  ]]
    for i in range(0,detections.shape[2]): #we want to evalulate the confidence score for all 200 boxes
        confidence = detections[0,0,i,2] # confidence score
        #print(confidence) -> 200 scores ranging from 0.0 to 1.0
        if confidence > 0.5: # filtering the score greater than 0.5 (we get only one mostly)
            box = detections[0,0,i,3:7]*np.array([w,h,w,h]) #bounding box information is from 3 to 7, multiplying with w,h,w,h to normalize the array
            box = box.astype(int) #converting the floating point values to integers
            #print(box) -> [216, 142, 802, 801] 
            pt1 = (box[0],box[1])
            pt2 = (box[2],box[3])
            #cv2.rectangle(image,pt1,pt2,(0,255,0),2)
            roi = image[box[1]:box[3],box[0]:box[2]] # cropping the image by keeping only the region of interest

            return roi # if face is detected we are returning the region of interest(cropped image)
    return None # returning None if no face is detected

In [26]:
img_roi = face_detection_dnn(img) # testing the function

In [27]:
cv2.imshow('roi',img_roi)
cv2.imshow('original',img)
cv2.waitKey()
cv2.destroyAllWindows()

### Blob from Image (RGB Mean Subtraction):
- Blob from image (Feature extraction process)

In [40]:
def datapreprocess(img):
    # blob from image (rgb mean subtraction image)
    face = face_detection_dnn(img)
    if face is not None:

        # computing blob from image
        blob = cv2.dnn.blobFromImage(face,1,(100,100),(104,117,123),swapRB=True)
        #print(bolb.shape) -> (1, 3, 100, 100)
        blob_squeeze = np.squeeze(blob).T # reducing the dimension of the image and taking transpose for getting height, width, depth from original depth, width, height
        #print(blob_squueze.shape) -> (3, 103, 100)
        
        #rotating and flipping to bring the blob image to the original orientation 
        blob_rotate = cv2.rotate(blob_squeeze,cv2.ROTATE_90_CLOCKWISE)
        blob_flip = cv2.flip(blob_rotate,1)
        
        # remove negative values and normalize it into 0 to 1 range
        img_norm = np.maximum(blob_flip,0)/blob_flip.max()
    
        return img_norm
    
    else:
        
        return None


### Applying the same to all the images in the dataset and appending it into a List

In [29]:
#len(images_path) -> 10000, took nearly 13 minutes to process the changes below

data_img = []
label_img = []
i = 0
for path, label in tqdm(zip(images_path,labels),desc='preprocessing'):
    img = cv2.imread(path) # reading the image
    process_img = datapreprocess(img)
    if process_img is not None:
        data_img.append(process_img)
        label_img.append(label)
          
    i += 1
    if i%100 == 0: # for every 100 iterations, we are clearing the garbage memory
        gc.collect()

preprocessing: 10000it [12:56, 12.89it/s]


In [41]:
X = np.array(data_img)
y = np.array(label_img)

In [34]:
X.shape, y.shape

((9959, 100, 100, 3), (9959,))

In [35]:
np.savez('./data/data_preprocess.npz',X,y) #zipping and saving our data into npz format (NumPyZip)

## END