# Import Packages

In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from pylab import rcParams

In [2]:
#-- Dataset loader --#

def fetch_images(path, label=None, image_ext='jpg'):
    image_names = [x for x in os.listdir(path) if x.split('.')[-1] in image_ext]
    if len(image_names) == 0: raise Exception('No files found')
    bga_images = [cv2.imread(path + '/' + img) for img in  image_names]
    images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in bga_images]
    return images, [[label, image_names[i]] for i in range(len(images))]

def fetch_dataset(path, image_ext=['jpg','png']):
    # Get all folders in the path var
    all_folders = [fol for fol in os.listdir(path)]
    # Get only subfolders that contains files with image_ext
    folders = [fol for fol in all_folders if len([x for x in os.listdir(path + '/' + fol) if x.split('.')[-1] in image_ext]) > 0]
    # Throw error if no files are found
    if len(folders) == 0: raise Exception('No files found')
    print(f'Found the following folders: {folders}')
    data_list, label_list = [], []
    for folder in folders:
        data, label = fetch_images(path + '/' + folder, folder, image_ext)
        data_list = data_list + data
        label_list = label_list + label
    return data_list, label_list


#-- Data visualizer --#

def visualizer(x, y=None, grid=None, font=None):
    if not font:
        font = {'font.family': 'Arial', 'font.size' : 12}
    rcParams.update(font)
    if not grid or 1 in grid:
        fig = plt.figure(figsize=(6,6))
        plt.tick_params(axis='both', bottom=False, left=False, labelbottom=False, labelleft=False)
        plt.imshow(x, cmap="Greys")
        if y: plt.title(y)
    else:
        fig, axes = plt.subplots(grid[0], grid[1],figsize=(10,10))
        for row in axes:
            for axe in row:
                axe.tick_params(axis='both', bottom=False, left=False, labelbottom=False, labelleft=False)
                r = np.random.randint(len(x))
                axe.imshow(x[r], cmap="Greys")
                if y: axe.set_title(y[r])
                #plt.tight_layout()
    plt.show()

In [3]:
#-- Image processing methods --#

def convertToRGB(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def detect_faces(f_cascade, colored_img, scaleFactor = 1.1):
    #just making a copy of image passed, so that passed image is not changed
    img_copy = colored_img.copy()
    
    #convert the test image to gray image as opencv face detector expects gray images
    gray = cv2.cvtColor(img_copy, cv2.COLOR_RGB2GRAY)
    
    #let's detect multiscale (some images may be closer to camera than others) images
    faces = f_cascade.detectMultiScale(gray, scaleFactor=scaleFactor, minNeighbors=5)
    
    #go over list of faces and draw them as rectangles on original colored img
    for (x, y, w, h) in faces:
        cv2.rectangle(img_copy, (x, y), (x+w, y+h), (0, 255, 0), 1)
        
    return img_copy

def crop_face(f_cascade, img, scaleFactor = 1.1):
    # convert the image to gray image as opencv face detector expects gray images
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # let's detect multiscale (some images may be closer to camera than others) images
    faces = f_cascade.detectMultiScale(gray, scaleFactor=scaleFactor, minNeighbors=5)
    try:
        x, y, w, h = faces[0]
    except IndexError:
        return None
    return img[y:y+h, x:x+w]

In [4]:
def forge_dataset_pipeline(dataset_path,
                            new_path,
                            haar_face_cascade,
                            drop=100,
                            image_type=['jpg','png']):

    # Validate input
    assert os.path.exists(dataset_path)
    assert type(new_path) == str
    assert len(new_path) in range(1,20)
    assert type(drop) == int
    assert 0 < drop
    assert type(haar_face_cascade) == cv2.CascadeClassifier
    assert type(image_type) == list

    print('Loading dataset')
    data, label = fetch_dataset(dataset_path, image_type)

    print('Cropping dataset')
    data_crop, label_crop = [], []
    for i, j in zip(data, label):
        crop = crop_face(haar_face_cascade, i)
        if type(crop) != type(None):
            if len(crop) > drop:
                data_crop.append(crop)
                label_crop.append(j)

    print('Saving dataset')
    for image, label in zip(data_crop, label_crop):
        if not os.path.exists('dataset/' + new_path + '/' + label[0]):
            os.makedirs('dataset/' + new_path + '/' + label[0])
            print(f'Creating folder: {new_path}/{label[0]}')
        cv2.imwrite('dataset/' + new_path + '/' + label[0] + '/' + label[1], convertToRGB(image))

    #return data_crop, label_crop

# Image Processing

In [5]:
!dir data\haarcascades

 Volume in drive E is Homura
 Volume Serial Number is 9A5F-4CD0

 Directory of e:\Users\Anti\Documents\Noroff\3rd Year\Bachelor Project (UC3BPR201)\project\mask-detection-system\data\haarcascades

08/11/2021  12:53    <DIR>          .
08/11/2021  12:53    <DIR>          ..
08/11/2021  09:49           341.406 haarcascade_eye.xml
08/11/2021  09:49           601.661 haarcascade_eye_tree_eyeglasses.xml
08/11/2021  09:49           411.388 haarcascade_frontalcatface.xml
08/11/2021  09:49           382.918 haarcascade_frontalcatface_extended.xml
08/11/2021  09:49           676.709 haarcascade_frontalface_alt.xml
08/11/2021  09:49           540.616 haarcascade_frontalface_alt2.xml
08/11/2021  09:49         2.689.040 haarcascade_frontalface_alt_tree.xml
08/11/2021  09:49           930.127 haarcascade_frontalface_default.xml
08/11/2021  09:49           476.827 haarcascade_fullbody.xml
08/11/2021  09:49           195.369 haarcascade_lefteye_2splits.xml
08/11/2021  09:49            47.775 haarcasc

In [6]:
# Set face detection classifier
haar_face_cascade = cv2.CascadeClassifier('data\haarcascades\haarcascade_frontalface_alt2.xml')

# Dataset

In [7]:
forge_dataset_pipeline('dataset/FaceMask Dataset', 'FMD_filter', haar_face_cascade)

Loading dataset
Found the following folders: ['Mask', 'No Mask']
Cropping dataset
Saving dataset
Creating folder: FMD_filter/Mask
Creating folder: FMD_filter/No Mask


In [8]:
forge_dataset_pipeline('dataset/Face Mask Detection Dataset', 'FMDD_filter', haar_face_cascade)

Loading dataset
Found the following folders: ['without_mask', 'with_mask']
Cropping dataset
Saving dataset
Creating folder: FMDD_filter/without_mask
Creating folder: FMDD_filter/with_mask


In [15]:
from random import randrange
images = os.listdir('dataset/Combined/No Mask')
im_keep = []

l = 605 - 61

for i in range(l):
    r = randrange(0, len(images))
    im_keep.append(images.pop(r))

for i in os.listdir('dataset/Combined/No Mask'):
    if i not in im_keep:
        os.remove('dataset/Combined/No Mask/' + i)
