# Load the Dataset

In [1]:
!unzip -u "./dataset.zip" -d "./dataset"

Archive:  ./dataset.zip
   creating: ./dataset/dataset/
  inflating: ./dataset/__MACOSX/._dataset  
  inflating: ./dataset/dataset/.DS_Store  
  inflating: ./dataset/__MACOSX/dataset/._.DS_Store  
   creating: ./dataset/dataset/train/
  inflating: ./dataset/__MACOSX/dataset/._train  
  inflating: ./dataset/dataset/val_set_coco.json  
  inflating: ./dataset/__MACOSX/dataset/._val_set_coco.json  
  inflating: ./dataset/dataset/training_set_coco.json  
  inflating: ./dataset/__MACOSX/dataset/._training_set_coco.json  
   creating: ./dataset/dataset/val/
  inflating: ./dataset/__MACOSX/dataset/._val  
  inflating: ./dataset/dataset/train/10.jpeg  
  inflating: ./dataset/__MACOSX/dataset/train/._10.jpeg  
  inflating: ./dataset/dataset/train/26.jpeg  
  inflating: ./dataset/__MACOSX/dataset/train/._26.jpeg  
  inflating: ./dataset/dataset/train/30.jpeg  
  inflating: ./dataset/__MACOSX/dataset/train/._30.jpeg  
  inflating: ./dataset/dataset/train/.DS_Store  
  inflating: ./dataset/__MACOSX

Let us create a class for our dataset

In [2]:
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image
from pycocotools.coco import COCO

class myOwnDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path))

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # Labels
        labels = torch.ones((num_objs,), dtype=torch.int64)
        # Tensorise img_id
        img_id = torch.tensor([img_id])
        # Size of bbox (Rectangular)
        areas = []
        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        # Iscrowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        # Annotation is in dictionary format
        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        #my_annotation["image_id"] = img_id
        my_annotation["image_id"] = self.ids[index]
        #my_annotation["image_id"] = index
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

# Since inputs for a PyTorch model must be in tensor format.
def get_transform():
    custom_transforms = []
    custom_transforms.append(torchvision.transforms.ToTensor())
    return torchvision.transforms.Compose(custom_transforms)

Now we will setup the DataLoaders (one for training set and one for validation set)

In [3]:
# path to the data and coco file
train_data_dir = 'dataset/dataset/train'
train_coco = 'dataset/dataset/training_set_coco.json'

val_data_dir = 'dataset/dataset/val'
val_coco = 'dataset/dataset/val_set_coco.json'


# create own Dataset for training and validation
tr_dataset = myOwnDataset(root=train_data_dir,
                          annotation=train_coco,
                          transforms=get_transform()
                          )

val_dataset = myOwnDataset(root=val_data_dir,
                          annotation=val_coco,
                          transforms=get_transform()
                          )


# collate_fn needs for batch
def collate_fn(batch):
    return tuple(zip(*batch))

# Batch size
train_batch_size = 1
val_batch_size = 1

# own DataLoaders
data_loader_train = torch.utils.data.DataLoader(tr_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

data_loader_val = torch.utils.data.DataLoader(val_dataset,
                                          batch_size=val_batch_size,
                                          shuffle=True,
                                          num_workers=4,
                                          collate_fn=collate_fn)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!




# Run the model

Some dependencies

In [4]:
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py")
os.system("wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py")

0

In [5]:
from torchvision.models.detection.faster_rcnn import *
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from engine import train_one_epoch, evaluate

def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')
    #model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights='DEFAULT')
    #model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights='DEFAULT')
    #model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights='DEFAULT')
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# 5 classes; 4 target classes and the background
num_classes = 5
num_epochs = 5
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)

# parameters
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)
len_dataloader = len(data_loader_train)

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_val, device=device)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:01<00:00, 143MB/s]


Epoch: [0]  [ 0/32]  eta: 0:21:28  lr: 0.000166  loss: 2.0186 (2.0186)  loss_classifier: 1.8821 (1.8821)  loss_box_reg: 0.0863 (0.0863)  loss_objectness: 0.0213 (0.0213)  loss_rpn_box_reg: 0.0290 (0.0290)  time: 40.2762  data: 0.2009
Epoch: [0]  [10/32]  eta: 0:12:08  lr: 0.001777  loss: 1.3880 (1.3838)  loss_classifier: 1.2762 (1.1658)  loss_box_reg: 0.1084 (0.1868)  loss_objectness: 0.0153 (0.0165)  loss_rpn_box_reg: 0.0103 (0.0147)  time: 33.1052  data: 0.0197
Epoch: [0]  [20/32]  eta: 0:06:33  lr: 0.003389  loss: 0.3742 (0.9404)  loss_classifier: 0.1945 (0.7329)  loss_box_reg: 0.0993 (0.1611)  loss_objectness: 0.0135 (0.0292)  loss_rpn_box_reg: 0.0114 (0.0171)  time: 32.4034  data: 0.0025
Epoch: [0]  [30/32]  eta: 0:01:04  lr: 0.005000  loss: 0.2286 (0.8388)  loss_classifier: 0.1039 (0.6077)  loss_box_reg: 0.0963 (0.1796)  loss_objectness: 0.0161 (0.0340)  loss_rpn_box_reg: 0.0190 (0.0174)  time: 31.6545  data: 0.0034
Epoch: [0]  [31/32]  eta: 0:00:32  lr: 0.005000  loss: 0.2286 (0