<a href="https://colab.research.google.com/github/skj092/Computer_Vision_Lab/blob/main/Mask_Detection_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install pycocotools --quiet
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0

!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

Cloning into 'vision'...
remote: Enumerating objects: 175725, done.[K
remote: Counting objects: 100% (8088/8088), done.[K
remote: Compressing objects: 100% (572/572), done.[K
remote: Total 175725 (delta 7594), reused 7944 (delta 7499), pack-reused 167637[K
Receiving objects: 100% (175725/175725), 346.36 MiB | 38.49 MiB/s, done.
Resolving deltas: 100% (157214/157214), done.
fatal: not a git repository (or any of the parent directories): .git


In [2]:
!pip install --upgrade albumentations

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting albumentations
  Downloading albumentations-1.2.1-py3-none-any.whl (116 kB)
[K     |████████████████████████████████| 116 kB 29.1 MB/s 
[?25hCollecting qudida>=0.0.4
  Downloading qudida-0.0.4-py3-none-any.whl (3.5 kB)
Installing collected packages: qudida, albumentations
  Attempting uninstall: albumentations
    Found existing installation: albumentations 0.1.12
    Uninstalling albumentations-0.1.12:
      Successfully uninstalled albumentations-0.1.12
Successfully installed albumentations-1.2.1 qudida-0.0.4


In [5]:
!pip uninstall opencv-python-headless -y



In [6]:
!pip install opencv-python-headless==4.1.2.30

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting opencv-python-headless==4.1.2.30
  Downloading opencv_python_headless-4.1.2.30-cp37-cp37m-manylinux1_x86_64.whl (21.8 MB)
[K     |████████████████████████████████| 21.8 MB 1.4 MB/s 
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.1.2.30


In [1]:
# Importing necessary libraries 

import os 
from pathlib import Path 
import xml.etree.ElementTree as ET 
from torch.utils.data import Dataset
from PIL import Image
import torch 
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import numpy as np
import cv2 
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision

# these are the helper libraries imported.
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# Downloading the dataset

In [None]:
 %%bash

wget "https://public.roboflow.com/ds/kQQWZNGxGA?key=65kkvLv3xW" -O data.zip 

unzip -q data.zip

rm data.zip 

ls

# Define Transform 

In [3]:
import torchvision.transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [4]:
class MaskDetectionDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = Path(root)
        self.transforms = transforms
        self.image_paths = list(self.root.glob('*.jpg'))
        self.image_paths.sort()
        self.mask_paths = [p.parent / (p.stem + '.xml') for p in self.image_paths]

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        mask_path = self.mask_paths[idx]
        image = cv2.imread(str(image_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Parsing xml to get the labels and the bounding boxes
        annotation = ET.parse(mask_path)
        root = annotation.getroot()
        objects = root.findall('object')
        boxes = []
        labels = []
        for obj in objects:
            name = obj.find('name').text
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(name)

        # Converting the labels and the bounding boxes to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = list(map(lambda x: 1 if x == 'mask' else 0, labels))
        labels = torch.as_tensor(labels)
        image_id = torch.tensor([idx])

        # Other parameters
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

        # Creating the tensor for the dataset
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd

        # Applying the transforms
        if self.transforms is not None:
          image = self.transforms(image)

        return image, target


    def __len__(self):
        return len(self.image_paths)

In [5]:
def get_object_detection_model(num_classes):

    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) 

    return model

In [6]:
train_ds = MaskDetectionDataset(root='train/', transforms=get_transform(train=True))
valid_ds = MaskDetectionDataset(root='valid/', transforms=get_transform(train=False))

train_dl = DataLoader(train_ds, batch_size=2, shuffle=True, num_workers=0, collate_fn=utils.collate_fn)
valid_dl = DataLoader(valid_ds, batch_size=2, shuffle=True, num_workers=0, collate_fn=utils.collate_fn)


# to train on gpu if selected.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


num_classes = 2

# get the model using our helper function
model = get_object_detection_model(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                            step_size=3,
                                            gamma=0.1)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [7]:
# training for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # training for one epoch
    train_one_epoch(model, optimizer, train_dl, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, valid_dl, device=device)

Epoch: [0]  [ 0/52]  eta: 0:06:56  lr: 0.000103  loss: 1.3373 (1.3373)  loss_classifier: 0.7559 (0.7559)  loss_box_reg: 0.2927 (0.2927)  loss_objectness: 0.2608 (0.2608)  loss_rpn_box_reg: 0.0279 (0.0279)  time: 8.0022  data: 0.0560  max mem: 2573
Epoch: [0]  [10/52]  eta: 0:00:53  lr: 0.001082  loss: 0.9786 (0.9326)  loss_classifier: 0.4628 (0.4857)  loss_box_reg: 0.1836 (0.2323)  loss_objectness: 0.1764 (0.1890)  loss_rpn_box_reg: 0.0206 (0.0256)  time: 1.2706  data: 0.0457  max mem: 3631
Epoch: [0]  [20/52]  eta: 0:00:30  lr: 0.002062  loss: 0.6915 (0.8609)  loss_classifier: 0.3356 (0.4224)  loss_box_reg: 0.2410 (0.2758)  loss_objectness: 0.0807 (0.1405)  loss_rpn_box_reg: 0.0195 (0.0222)  time: 0.6141  data: 0.0406  max mem: 4051
Epoch: [0]  [30/52]  eta: 0:00:18  lr: 0.003041  loss: 0.6063 (0.7815)  loss_classifier: 0.2407 (0.3515)  loss_box_reg: 0.2574 (0.2639)  loss_objectness: 0.0585 (0.1438)  loss_rpn_box_reg: 0.0155 (0.0224)  time: 0.6102  data: 0.0369  max mem: 4051
Epoch: [