<a href="https://colab.research.google.com/github/kyrajeep/DL_Projects/blob/master/detect_mask.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers



In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from bs4 import BeautifulSoup
import torchvision
from torchvision import transforms, datasets, models
import torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import matplotlib.patches as patches


In [3]:
from transformers import AutoModelForImageClassification, ViTImageProcessor
import os

In [4]:
!pip install kaggle
from google.colab import files
files.upload()



Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"kjpark5","key":"5ea5f2df2a120ba7094bbc40237c1cd4"}'}

In [5]:
# use the Kaggle API to directly download data
!mkdir ~/.kaggle
#files.upload()
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
#!kaggle datasets list


In [None]:
!kaggle datasets download -d andrewmvd/face-mask-detection
!unzip face-mask-detection.zip


In [7]:
!ls

annotations  face-mask-detection.zip  images  sample_data


In [8]:
images = list(sorted(os.listdir("images/")))
labels = list(sorted(os.listdir("annotations/")))

In [9]:
def generate_box(obj):

    xmin = int(obj.find('xmin').text)
    ymin = int(obj.find('ymin').text)
    xmax = int(obj.find('xmax').text)
    ymax = int(obj.find('ymax').text)

    return [xmin, ymin, xmax, ymax]

def generate_label(obj):
    if obj.find('name').text == "with_mask":
        return 1
    elif obj.find('name').text == "mask_weared_incorrect":
        return 2
    return 0

def generate_target(image_id, file):
    with open(file) as f:
        data = f.read()
        soup = BeautifulSoup(data, 'xml')
        objects = soup.find_all('object')

        num_objs = len(objects)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        labels = []
        for i in objects:
            boxes.append(generate_box(i))
            labels.append(generate_label(i))
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([image_id])
        # Annotation is in dictionary format
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = img_id

        return target

In [10]:
class MaskDataset(object):
    def __init__(self, transforms):
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir("images/")))
#         self.labels = list(sorted(os.listdir("/kaggle/input/face-mask-detection/annotations/")))

    def __getitem__(self, idx):
        # load images ad masks
        file_image = 'maksssksksss'+ str(idx) + '.png'
        file_label = 'maksssksksss'+ str(idx) + '.xml'
        img_path = os.path.join("images/", file_image)
        label_path = os.path.join("annotations/", file_label)
        img = Image.open(img_path).convert("RGB")
        #Generate Label
        target = generate_target(idx, label_path)

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)



In [11]:
data_transform = transforms.Compose([
        transforms.ToTensor(),
    ])


In [12]:
#batch data with different sizes without manual padding
#https://stackoverflow.com/questions/65279115/how-to-use-collate-fn-with-dataloaders
def collate_fn(batch):
    return tuple(zip(*batch))

dataset = MaskDataset(data_transform)

In [14]:
train_size=int(len(dataset)*0.7)
test_size=len(dataset)-train_size
print('Length of dataset is', len(dataset), '\nLength of training set is :',train_size,'\nLength of test set is :', test_size)


Length of dataset is 853 
Length of training set is : 597 
Length of test set is : 256


In [15]:
trainset, testset=torch.utils.data.random_split(dataset,[train_size,test_size])

data_loader = torch.utils.data.DataLoader(
 trainset, batch_size=4, collate_fn=collate_fn)


In [18]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
for imgs, annotations in data_loader:
    imgs = list(img.to(device) for img in imgs)
    img_size = imgs[0].size()
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    print(annotations)
    break


[{'boxes': tensor([[102.,  54., 239., 210.]], device='cuda:0'), 'labels': tensor([1], device='cuda:0'), 'image_id': tensor([170], device='cuda:0')}, {'boxes': tensor([[ 42.,  88.,  68., 112.],
        [ 46.,  59.,  69.,  78.],
        [ 77.,  56.,  97.,  72.],
        [101.,  69., 120.,  87.],
        [114.,  51., 132.,  70.],
        [ 84.,  34., 105.,  56.],
        [ 42., 125.,  62., 158.],
        [ 63., 138.,  93., 171.],
        [ 91., 162., 121., 197.],
        [129., 176., 168., 218.],
        [136., 102., 162., 131.],
        [136.,  69., 161.,  89.],
        [140.,  33., 155.,  48.],
        [177.,  32., 194.,  54.],
        [172.,  46., 184.,  65.],
        [196.,  33., 213.,  52.],
        [221.,  22., 237.,  40.],
        [236.,  35., 261.,  60.],
        [175.,  90., 197., 114.],
        [170., 130., 196., 153.],
        [213., 101., 238., 130.],
        [185., 169., 202., 193.],
        [165., 172., 182., 207.],
        [229., 186., 265., 223.],
        [244., 113., 272.

In [19]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

#url = "http://images.cocodataset.org/val2017/000000039769.jpg"
#image = Image.open(requests.get(url, stream=True).raw)

# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=imgs, return_tensors="pt", do_rescale=False)
outputs = model(**inputs)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/401 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/6.60k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/167M [00:00<?, ?B/s]

In [20]:
# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = [img.shape[-2:] for img in imgs]
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )

Detected person with confidence 0.92 at location [0.03, 0.1, 332.56, 225.09]
