# IMPORT

In [1]:
import os
import bs4
import torch
from PIL import Image
from torchvision import transforms, datasets, models
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import matplotlib.patches as patches
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


# LOAD DATA

In [2]:
def generate_box(obj):
    
    xmin = int(obj.find('xmin').text)
    ymin = int(obj.find('ymin').text)
    xmax = int(obj.find('xmax').text)
    ymax = int(obj.find('ymax').text)
    
    return [xmin, ymin, xmax, ymax]

def generate_label(obj):
    if obj.find('name').text == "with_mask":
        return 1
    elif obj.find('name').text == "mask_weared_incorrect":
        return 2
    return 0

def generate_target(image_id, file): 
    with open(file) as f:
        data = f.read()
        soup = bs4.BeautifulSoup(data, 'xml')
        objects = soup.find_all('object')

        num_objs = len(objects)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        labels = []
        for i in objects:
            boxes.append(generate_box(i))
            labels.append(generate_label(i))
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels (In my case, I only one class: target class or background)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([image_id])
        # Annotation is in dictionary format
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = img_id
        
        return target

In [3]:
# get images list
imgs= list(sorted(os.listdir("C:/Users/90761/Desktop/kaggle/face-mask-detection/dataset/archive/images/")))
# get labels list
labels=list(sorted(os.listdir("C:/Users/90761/Desktop/kaggle/face-mask-detection/dataset/archive/annotations/")))

# DATA PREPROCESSING

In [4]:
class MaskDataset(object):
    def __init__(self,transforms):
        self.transforms=transforms
        self.imgs=list(sorted(os.listdir("C:/Users/90761/Desktop/kaggle/face-mask-detection/dataset/archive/images/")))

    def __getitem__(self,idx):
        file_img='maksssksksss'+str(idx)+'.png'
        file_label='maksssksksss'+str(idx)+'.xml'
        img_path=os.path.join("C:/Users/90761/Desktop/kaggle/face-mask-detection/dataset/archive/images/", file_img)
        label_path=os.path.join("C:/Users/90761/Desktop/kaggle/face-mask-detection/dataset/archive/annotations/",file_label)
        img=Image.open(img_path).convert("RGB")
        target=generate_target(idx, label_path)
        if self.transforms is not None:
            img = self.transforms(img)
        return img,target
    def __len__(self):
        return len(self.imgs)

In [5]:
data_transform= transforms.Compose([
        transforms.ToTensor(), 
    ])

In [6]:
def collate_fn(batch):
    return tuple(zip(*batch))
dataset=MaskDataset(data_transform)
dataloader=torch.utils.data.DataLoader(dataset,batch_size=4,collate_fn=collate_fn)
torch.cuda.is_available()

True

In [7]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.COCO_V1)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

model = get_model_instance_segmentation(3)

In [8]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
for imgs, annotations in dataloader:
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
    print(annotations)
    break

[{'boxes': tensor([[ 79., 105., 109., 142.],
        [185., 100., 226., 144.],
        [325.,  90., 360., 141.]], device='cuda:0'), 'labels': tensor([0, 1, 0], device='cuda:0'), 'image_id': tensor([0], device='cuda:0')}, {'boxes': tensor([[321.,  34., 354.,  69.],
        [224.,  38., 261.,  73.],
        [299.,  58., 315.,  81.],
        [143.,  74., 174., 115.],
        [ 74.,  69.,  95.,  99.],
        [191.,  67., 221.,  93.],
        [ 21.,  73.,  44.,  93.],
        [369.,  70., 398.,  99.],
        [ 83.,  56., 111.,  89.]], device='cuda:0'), 'labels': tensor([1, 1, 1, 1, 1, 1, 1, 1, 0], device='cuda:0'), 'image_id': tensor([1], device='cuda:0')}, {'boxes': tensor([[ 68.,  42., 105.,  69.],
        [154.,  47., 178.,  74.],
        [238.,  34., 262.,  69.],
        [333.,  31., 366.,  65.]], device='cuda:0'), 'labels': tensor([1, 1, 1, 2], device='cuda:0'), 'image_id': tensor([2], device='cuda:0')}, {'boxes': tensor([[ 52.,  53.,  73.,  76.],
        [ 72.,  53.,  92.,  75.],
  

In [9]:
num_epochs = 20
model.to(device)
    
# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

len_dataloader = len(dataloader)

for epoch in range(num_epochs):
    model.train()
    i = 0    
    epoch_loss = 0
    for imgs, annotations in dataloader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        loss_dict = model([imgs[0]], [annotations[0]])
        losses = sum(loss for loss in loss_dict.values())        

        optimizer.zero_grad()
        losses.backward()
        optimizer.step() 
        print(f'Iteration: {i}/{len_dataloader}, Loss: {losses}')
        epoch_loss += losses
    print(epoch_loss)
torch.save(model.state_dict(),'model.pt')

Iteration: 1/214, Loss: 1.1745675802230835
Iteration: 2/214, Loss: 0.6301990747451782
Iteration: 3/214, Loss: 0.3290580213069916
Iteration: 4/214, Loss: 0.9844484329223633
Iteration: 5/214, Loss: 0.30488312244415283
Iteration: 6/214, Loss: 0.37946823239326477
Iteration: 7/214, Loss: 0.20081521570682526
Iteration: 8/214, Loss: 0.44382888078689575
Iteration: 9/214, Loss: 0.6705703735351562
Iteration: 10/214, Loss: 1.7708250284194946
Iteration: 11/214, Loss: 0.6534496545791626
Iteration: 12/214, Loss: 0.8332648277282715
Iteration: 13/214, Loss: 0.11863160878419876
Iteration: 14/214, Loss: 2.0190484523773193
Iteration: 15/214, Loss: 0.43926000595092773
Iteration: 16/214, Loss: 0.3698033094406128
Iteration: 17/214, Loss: 2.194249153137207
Iteration: 18/214, Loss: 0.09814711660146713
Iteration: 19/214, Loss: 0.08205889910459518
Iteration: 20/214, Loss: 1.0562340021133423
Iteration: 21/214, Loss: 0.18489094078540802
Iteration: 22/214, Loss: 0.4247574806213379
Iteration: 23/214, Loss: 0.269438

In [10]:
import numpy as np
def plot_image(img_tensor, annotation):
    
    fig,ax = plt.subplots(1)
    img = img_tensor.cpu().data

    # Display the image
    ax.imshow(img.permute(1, 2, 0))
    for box in annotation["boxes"]:
        box=box.cpu().detach().numpy()
        xmin, ymin, xmax, ymax = box

        # Create a Rectangle patch
        rect = patches.Rectangle((xmin,ymin),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='r',facecolor='none')
        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()
for imgs, annotations in dataloader:
        imgs = list(img.to(device) for img in imgs)
        annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
        break
model2 = get_model_instance_segmentation(3)
model2.load_state_dict(torch.load("C:/Users/90761/Desktop/kaggle/face-mask-detection/model.pt"))
model2.eval()
model2.to(device)
preds = model2(imgs)
plot_image(imgs[0],preds[0])

: 

: 