In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import cv2
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

### Preparing the Dataset

In [2]:
BATCH_SIZE = 65
IMG_SIZE   = (128, 128)

percent_train = 0.7
percent_val   = 0.1
percent_test  = 0.2

IMAGES_DIR  = '../DatasetCleaning/'
USAGE_DIRS  = [ 'croppedface_no_mask', 'croppedface_with_mask', 'croppedface_with_mask_incorrect' ]
TYPE_DIRS   = { 'invalid' : ['croppedface_other_covering', 'croppedface_no_mask'], 
                'valid'   : ['croppedface_with_mask', 'croppedface_with_mask_incorrect'] }

img_usage_pairs = []
img_type_pairs  = []

for dir in USAGE_DIRS:
    class_name      = dir[len('cropped') : ]
    img_usage_pairs = img_usage_pairs + [ ('{}/{}'.format(dir, x), class_name) for x in os.listdir(IMAGES_DIR + dir)]

for class_name, dirs in TYPE_DIRS.items():
    for dir in dirs:
        img_type_pairs = img_type_pairs + [('{}/{}'.format(dir, x), class_name) for x in os.listdir(IMAGES_DIR + dir)]
    

mask_usage_df = pd.DataFrame(img_usage_pairs, columns = ['filename', 'usage']).set_index('filename')
mask_type_df  = pd.DataFrame(img_type_pairs, columns = ['filename', 'type']).set_index('filename')

mask_usage_type_df = mask_usage_df.join(mask_type_df, how = 'right')

# This should be equivalent to the number of images in croppedface_other_covering
print(mask_usage_type_df.isna().sum())
mask_usage_type_df['usage'] = mask_usage_type_df['usage'].fillna('face_no_mask')
mask_usage_type_df

print()
print(mask_usage_type_df.value_counts())
mask_usage_type_df['class'] = mask_usage_type_df['usage']  + '--' + mask_usage_type_df['type']
mask_usage_type_df = shuffle(mask_usage_type_df.reset_index())
mask_usage_type_df

mask_train, mask_test = train_test_split(mask_usage_type_df, test_size = percent_test)

usage    1372
type        0
dtype: int64

usage                     type   
face_with_mask            valid      4172
face_no_mask              invalid    2938
face_with_mask_incorrect  valid       150
dtype: int64


In [3]:
train_val_split = percent_val / (1 - percent_test)
mask_image_gen  = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1.0 / 255, 
                                                                 validation_split = train_val_split)
train_ds       = mask_image_gen.flow_from_dataframe(mask_train, 
                                                    directory   = IMAGES_DIR,
                                                    x_col       = 'filename',
                                                    y_col       = 'class',
                                                    target_size = IMG_SIZE,
                                                    subset      = "training",
                                                    batch_size  = BATCH_SIZE)
val_ds       = mask_image_gen.flow_from_dataframe(mask_train, 
                                                  directory   = IMAGES_DIR,
                                                  x_col       = 'filename',
                                                  y_col       = 'class',
                                                  target_size = IMG_SIZE,
                                                  subset      = "validation",
                                                  batch_size  = BATCH_SIZE)

Found 5082 validated image filenames belonging to 3 classes.
Found 726 validated image filenames belonging to 3 classes.


### Face mask usage classifier

In [4]:
usage_model = tf.keras.models.Sequential([
    tf.keras.applications.ResNet50(include_top = False, input_shape = (*IMG_SIZE,3), pooling = 'max'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(3, activation = 'softmax'),
])
usage_model.compile(optimizer = 'adam', loss = tf.losses.CategoricalCrossentropy(), metrics=['accuracy'])

In [None]:
history = usage_model.fit(train_ds, validation_data = val_ds, epochs=10)