<a href="https://colab.research.google.com/github/linlih/CovidFaceMaskDetector/blob/master/Covid_Face_Mask_Detector%EF%BC%88Keras_Implementation%EF%BC%89.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from pathlib import Path

import pandas as pd
from google_drive_downloader import GoogleDriveDownloader as gdd
from tqdm import tqdm

In [0]:
datasetPath = Path('./data/mask.zip')
# 从GoogleDrive的共享文件中下载训练数据
gdd.download_file_from_google_drive(file_id='1UlOk6EtiaXTHylRUx2mySgvJX9ycoeBp',
                  dest_path=str(datasetPath),
                  unzip=True)

Downloading 1UlOk6EtiaXTHylRUx2mySgvJX9ycoeBp into data/mask.zip... Done.
Unzipping...Done.


In [0]:
datasetPath.unlink() # 删除下载的zipwe文件

In [0]:
# 构建DataFrame，并保存序列化，如果序列化过了，就无需无需执行这个内容，直接读入序列化的文件即可
# 注意DataFrame的append是要赋值等号的形式：maskDF = maskDF.append(xxx)，这个使用形式和其他直接append无法赋值就生效的不一致，要十分注意
datasetPath = Path('./data/self-built-masked-face-recognition-dataset')
maskPath = datasetPath/'AFDB_masked_face_dataset'
nonMaskPath = datasetPath/'AFDB_face_dataset'

maskDF = pd.DataFrame()

for subject in tqdm(list(maskPath.iterdir()),desc='mask photos'):
  for imgPath in subject.iterdir():
    maskDF = maskDF.append({
        'image': str(imgPath),
        'mask': 1
    }, ignore_index=True)

for subject in tqdm(list(nonMaskPath.iterdir()),desc='no mask photos'):
  for imgPath in subject.iterdir():
    maskDF = maskDF.append({
        'image': str(imgPath),
        'mask': 0
    }, ignore_index=True)
    
dfName = './data/mask_df.pickle'
print(f'saving DataFrame to {dfName}')
maskDF.to_pickle(dfName) # 保存序列化文件，读取的函数使用pd.read_pickle

mask photos: 100%|██████████| 525/525 [00:06<00:00, 81.97it/s] 
no mask photos: 100%|██████████| 460/460 [05:52<00:00,  1.31it/s]

saving DataFrame to ./data/mask_df.pickle





In [0]:
# 如果已经序列化过，直接执行这个创建DataFrame即可
import pandas as pd
maskDF = pd.read_pickle('./data/mask_df.pickle')

In [0]:
from keras import layers
from keras import models
from keras.optimizers import Adam

Using TensorFlow backend.


In [0]:
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3), padding='same'))   
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu', input_shape=(100, 100, 3), padding='same'))   
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu', input_shape=(100, 100, 3), padding='same', strides=(3, 3)))   
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(lr=0.0001), 
       loss='binary_crossentropy', 
       metrics=['accuracy'])  

In [0]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 100, 100, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 50, 50, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 50, 50, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 25, 25, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 9, 9, 128)         73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 4, 4, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)             

In [0]:
# 需要将mask这一列数据转化成string类型的，keras的ImageDataGenerator才可以处理
# 注意：pandas的object类型对应python中的str类型
# ref: https://www.cnblogs.com/yu121/p/12878369.html
maskDF_t = pd.DataFrame(maskDF)
maskDF_t[['mask']] = maskDF_t[['mask']].astype(str)

In [0]:
from keras_preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

train, validate = train_test_split(maskDF_t, test_size=0.3, random_state=0, stratify=maskDF['mask'])

datagen = ImageDataGenerator(rescale=1./255)
train_generator = datagen.flow_from_dataframe(dataframe=train, 
                        directory=None, 
                        x_col="image", 
                        y_col="mask", 
                        class_mode="binary", 
                        target_size=(100,100), 
                        batch_size=256)

tdatagen = ImageDataGenerator(rescale=1./255)
valid_generator = tdatagen.flow_from_dataframe(dataframe=validate, 
                        directory=None, 
                        x_col="image", 
                        y_col="mask", 
                        class_mode="binary", 
                        target_size=(100,100), 
                        batch_size=256)

Found 64869 validated image filenames belonging to 2 classes.
Found 27802 validated image filenames belonging to 2 classes.


In [0]:
STEP_SIZE_TRAIN = train_generator.n
STEP_SIZE_VALID = valid_generator.n
model.fit_generator(generator=train_generator,
          steps_per_epoch=train_generator.n,
          epochs=1,
          validation_data=valid_generator,
          validation_steps=valid_generator.n,
          verbose=2)
# 注意使用verbose=1的话，会吐出大量的日志，导致Colab内存被用光，所以这里将verbose关掉
# 但是关掉又涉及到一个问题，就是怎么看到训练结果？

Epoch 1/1


In [0]:
!nvidia-smi