<a href="https://colab.research.google.com/github/linlih/CovidFaceMaskDetector/blob/master/Covid_Face_Mask_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 下载数据

数据来源：https://github.com/X-zhangyang/Real-World-Masked-Face-Dataset


In [0]:
from pathlib import Path

import pandas as pd
from google_drive_downloader import GoogleDriveDownloader as gdd
from tqdm import tqdm

In [3]:
datasetPath = Path('./data/mask.zip')
# 从GoogleDrive的共享文件中下载训练数据
gdd.download_file_from_google_drive(file_id='1UlOk6EtiaXTHylRUx2mySgvJX9ycoeBp',
                  dest_path=str(datasetPath),
                  unzip=True)

Downloading 1UlOk6EtiaXTHylRUx2mySgvJX9ycoeBp into data/mask.zip... Done.
Unzipping...Done.


In [0]:
datasetPath.unlink() # 删除下载的zipwe文件

In [0]:
# 构建DataFrame，并保存序列化，如果序列化过了，就无需无需执行这个内容，直接读入序列化的文件即可
# 注意DataFrame的append是要赋值等号的形式：maskDF = maskDF.append(xxx)，这个使用形式和其他直接append无法赋值就生效的不一致，要十分注意
datasetPath = Path('./data/self-built-masked-face-recognition-dataset')
maskPath = datasetPath/'AFDB_masked_face_dataset'
nonMaskPath = datasetPath/'AFDB_face_dataset'

maskDF = pd.DataFrame()

for subject in tqdm(list(maskPath.iterdir()),desc='mask photos'):
  for imgPath in subject.iterdir():
    maskDF = maskDF.append({
        'image': str(imgPath),
        'mask': 1
    }, ignore_index=True)

for subject in tqdm(list(nonMaskPath.iterdir()),desc='no mask photos'):
  for imgPath in subject.iterdir():
    maskDF = maskDF.append({
        'image': str(imgPath),
        'mask': 0
    }, ignore_index=True)
    
dfName = './data/mask_df.pickle'
print(f'saving DataFrame to {dfName}')
maskDF.to_pickle(dfName) # 保存序列化文件，读取的函数使用pd.read_pickle

In [0]:
# 如果已经序列化过，直接执行这个创建DataFrame即可
maskDF = pd.read_pickle('./data/mask_df.pickle')

In [18]:
# 统计结果中共有戴口罩的人脸图片是2203张，正常人脸是90468张
# 和Github数据集上说明的5千张戴口罩和9万张正常人脸有一定的出入
maskDF['mask'].value_counts()

0.0    90468
1.0     2203
Name: mask, dtype: int64

In [0]:
# 构建Dataset，这里是为了能够让PyTorch进行读取
import cv2
from torch import long, tensor
from torch.utils.data.dataset import Dataset
from torchvision.transforms import Compose, Resize, ToPILImage, ToTensor

In [0]:
class MaskDataset(Dataset):
  def __init__(self, dataFrame):
    self.dataFrame = dataFrame
    self.transformations = Compose([
        ToPILImage(),
        Resize((100, 100)), # 每张人脸的大小调整为100*100
        ToTensor(),
    ])
    
  def __getitem__(self, key):
    if isinstance(key, slice):
      raise NotImplementedError('slicing is not supported')
    row = self.dataFrame.iloc[key]
    return {
        'image': self.transformations(cv2.imread(row['image'])),
        'mask': tensor([row['mask']], dtype=long)
    }

  def __len__(self):
    return len(self.dataFrame.index)

In [0]:
!pip install pytorch-lightning -q

In [0]:
# 构建模型
from pathlib import Path
from typing import Dict, List, Union

import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn.init as init

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch import Tensor
from torch.nn import (Conv2d, CrossEntropyLoss, Linear, MaxPool2d, ReLU, Sequential)
from torch.optim import Adam
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader