In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report
import shutil
import os

In [2]:
!pip install split-folders
!pip install gdown

Collecting split-folders
  Downloading split_folders-0.4.3-py3-none-any.whl (7.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.4.3
Collecting gdown
  Downloading gdown-3.13.0.tar.gz (9.3 kB)
  Installing build dependencies ... [?25l- \ | / - done
[?25h  Getting requirements to build wheel ... [?25l- done
[?25h    Preparing wheel metadata ... [?25l- done
Building wheels for collected packages: gdown
  Building wheel for gdown (PEP 517) ... [?25l- done
[?25h  Created wheel for gdown: filename=gdown-3.13.0-py3-none-any.whl size=9034 sha256=d56a3b462c17df8237089679bc66207ffc8e5cb33c75af2798a901c2b1646198
  Stored in directory: /root/.cache/pip/wheels/2f/2a/2f/86449b6bdbaa9aef873f68332b68be6bfbc386b9219f47157d
Successfully built gdown
Installing collected packages: gdown
Successfully installed gdown-3.13.0


In [3]:
import gdown

# download files for opencv nn ssd face detector model

hello = '1-0pbqawPfRSSG1NYDNbdmmEfSSn7wM7N'
url = 'https://drive.google.com/uc?id=' + hello
gdown.download(url, 'deploy.prototxt', quiet=False)

hello = '1dDbOtqSm8hnKVvKM2lDN9e_6fEFV5Ros'
url = 'https://drive.google.com/uc?id=' + hello
gdown.download(url, 'res10_300x300_ssd_iter_140000_fp16.caffemodel', quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1-0pbqawPfRSSG1NYDNbdmmEfSSn7wM7N
To: /kaggle/working/deploy.prototxt
100%|██████████| 28.1k/28.1k [00:00<00:00, 30.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1dDbOtqSm8hnKVvKM2lDN9e_6fEFV5Ros
To: /kaggle/working/res10_300x300_ssd_iter_140000_fp16.caffemodel
5.35MB [00:00, 186MB/s]


'res10_300x300_ssd_iter_140000_fp16.caffemodel'

In [4]:
# load the face detector model
modelFile = "res10_300x300_ssd_iter_140000_fp16.caffemodel"
configFile = "deploy.prototxt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)

In [5]:
!mkdir no_split
!mkdir no_split/correct
!mkdir no_split/incorrect

In [6]:
!ls ../input/cabani-correctly-masked/

__notebook__.ipynb  __results__.html   correct
__output__.json     __results___files  custom.css


In [7]:
from tqdm import tqdm

# crop the faces out from cabani dataset as the images will have wider range compared to xzhangyang's dataset
# make the training dataset same as those detected faces later in practical testing phase

output = {
    'correct': ['./cabani/correct/'], 
    'incorrect': [
        './cabani/Mask_Mouth_Chin/', 
        './cabani/Mask_Chin/', 
        './cabani/Mask_Nose_Mouth/'
    ]
}

detection_failed = dict()

for path, input_folders in output.items():
    output_path = os.path.join('no_split', path)
    
    for input_folder in input_folders:
        a = os.listdir(input_folder)
        
        detection_failed[input_folder] = 0
        
        print(input_folder)
        for image_p in tqdm(a):
            image_path = os.path.join(input_folder, image_p)
            image = cv2.imread(image_path)

            blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
            net.setInput(blob)
            detections = net.forward()

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            h, w, _ = image.shape

            i = 0
            confidence = detections[0, 0, i, 2]
            box = detections[0, 0, i, 3:]
            box = box * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            if endX > w or endX > h or startX < 0 or startY < 0:
                detection_failed[input_folder] += 1
                continue

            face = image[startY:endY, startX:endX]

            plt.imsave(os.path.join(output_path, image_p), face)
            
        print(input_folder, detection_failed[input_folder])

  0%|          | 1/10000 [00:00<28:30,  5.85it/s]

../input/cabani-correctly-masked/correct/


100%|██████████| 10000/10000 [14:49<00:00, 11.24it/s]


../input/cabani-correctly-masked/correct/ 4032


  0%|          | 2/10000 [00:00<16:01, 10.40it/s]

../input/cabani-incorrectly-masked/Mask_Mouth_Chin/


100%|██████████| 10000/10000 [16:42<00:00,  9.97it/s]


../input/cabani-incorrectly-masked/Mask_Mouth_Chin/ 1695


  0%|          | 1/4836 [00:00<08:33,  9.42it/s]

../input/cabani-incorrectly-masked/Mask_Chin/


100%|██████████| 4836/4836 [08:17<00:00,  9.73it/s]


../input/cabani-incorrectly-masked/Mask_Chin/ 558


  0%|          | 2/6245 [00:00<09:26, 11.02it/s]

../input/cabani-incorrectly-masked/Mask_Nose_Mouth/


100%|██████████| 6245/6245 [09:36<00:00, 10.83it/s]

../input/cabani-incorrectly-masked/Mask_Nose_Mouth/ 2033





In [8]:
# download unmasked dataset from XZhangYang's dataset

import gdown

hello = '1_vaq_6opGjkWWk_bNa6kEyYfFy3PDMgi'

url = 'https://drive.google.com/uc?id=' + hello
gdown.download(url, 'no_mask.zip', quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1_vaq_6opGjkWWk_bNa6kEyYfFy3PDMgi
To: /kaggle/working/no_mask.zip
65.3MB [00:00, 65.4MB/s]


'no_mask.zip'

In [9]:
!unzip -q no_mask

In [10]:
# combine the two datasets
e = shutil.copytree("./unmasked", "./no_split/no_mask")

In [11]:
# download masked dataset from XZhangYang's dataset

import gdown

hello = '1uXCJJiRdnTYd2u1158uxSpUNpSHHbu5l'

url = 'https://drive.google.com/uc?id=' + hello
gdown.download(url, 'masked.zip', quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1uXCJJiRdnTYd2u1158uxSpUNpSHHbu5l
To: /kaggle/working/masked.zip
34.3MB [00:00, 98.1MB/s]


'masked.zip'

In [12]:
!unzip -q masked

In [13]:
# combine the two datasets
for masked_image in os.listdir('masked'):
    shutil.copy(os.path.join('masked', masked_image), os.path.join('./no_split/correct/', 'xzy_'+masked_image))

In [14]:
# split the dataset into train, test and val
import splitfolders
splitfolders.ratio('no_split', output="split", seed=69, ratio=(.7, 0.15,0.15)) 

Copying files: 34966 files [00:06, 5526.99 files/s]
