In [None]:
from google.colab import files
files.upload() # expire any previous token(s) and upload recreated token

!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download andrewmvd/face-mask-detection
!unzip -q face-mask-detection.zip -d face-mask-detection # outputs supressed

In [None]:
import os
import torch
from PIL import Image
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
import xml.etree.ElementTree as ET

In [None]:
class GestureDataset:

    CLASSES = {"with_mask": 0,
               "without_mask": 1,
               "mask_weared_incorrect": 2}

    def __init__(self, dataset_path, transform=None):

        self.image_dir = os.path.join(dataset_path, "images")
        self.xml_dir = os.path.join(dataset_path, "annotations")
        self.image_list = os.listdir(self.image_dir)

    def __getitem__(self, idx):
        """
        Load an image and an annotation
        """
        img_name = self.image_list[idx]
        img_path = os.path.join(self.image_dir, img_name)
        img = Image.open(img_path).convert('RGB')
        img = transforms.ToTensor()(img)

        bbox, labels = self.read_xml(img_name, self.xml_dir)
        boxes = torch.as_tensor(bbox, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = dict()

        target['boxes'] = boxes
        target['label'] = labels
        target['image_id'] = torch.tensor([idx])

        return img, target

    def __len__(self):
        return len(self.image_list)

    def read_xml(self, file_name, xml_dir):
        """
        Function used to get the bounding boxes and labels from the xml file
        Input:
            file_name: image file name
            xml_dir: directory of xml file
        Return:
            bbox : list of bounding boxes
            labels: list of labels
        """
        bboxes = []
        labels = []

        annot_path = os.path.join(xml_dir, file_name[:-3] + 'xml')
        tree = ET.parse(annot_path)
        root = tree.getroot()
        for boxes in root.iter('object'):
            ymin = int(boxes.find("bndbox/ymin").text)
            xmin = int(boxes.find("bndbox/xmin").text)
            ymax = int(boxes.find("bndbox/ymax").text)
            xmax = int(boxes.find("bndbox/xmax").text)
            label = boxes.find('name').text

            label_idx = GestureDataset.CLASSES[label]
            bboxes.append([xmin, ymin, xmax, ymax])

            labels.append(label_idx)

        return bboxes, labels

In [None]:
# Dataloader
def data_loader(batch_size):
  gesture_dataset = GestureDataset('/content/face-mask-detection/')

  # Divide dataset into test, train and validation subsets
  image_num = len(gesture_dataset)
  index_list = list(range(0, image_num))
  test_split = int(len(index_list) * 0.9) # 10% of dataset for testing
  val_split = int(test_split * 0.8) # 20% of remaining for validation

  # Shuffle to make the allocation random
  np.random.seed(2343)
  np.random.shuffle(index_list)
  train_indices, validation_indices, test_indices = index_list[:val_split], index_list[val_split:test_split], index_list[test_split:]
  
  train_sample = SubsetRandomSampler(train_indices)
  train_set = torch.utils.data.DataLoader(gesture_dataset, batch_size=batch_size, num_workers=1, sampler=train_sample)

  validation_sample = SubsetRandomSampler(validation_indices)
  validation_set = torch.utils.data.DataLoader(gesture_dataset, batch_size=batch_size, num_workers=1, sampler=validation_sample)

  test_sample = SubsetRandomSampler(test_indices)
  test_set = torch.utils.data.DataLoader(gesture_dataset, batch_size=batch_size, num_workers=1, sampler=test_sample)

  return train_set, validation_set, test_set
