In [1]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-j9ad3819
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-j9ad3819
  Resolved https://github.com/facebookresearch/detectron2.git to commit 0ae803b1449cd2d3f8fa1b7c0f59356db10b3083
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6)
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting omegaconf<2.4,>=2.1 (from detectron2==

In [2]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, random
import cv2
import torch

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer
from detectron2.data.datasets import register_coco_instances

Need to mount to the Google Drive to access the folder containing the data

In [3]:
from google.colab import drive
from google.colab.patches import cv2_imshow

drive.mount('/content/drive')


Mounted at /content/drive


Registering the dataset with detectron2 so the model can access it

In [4]:
register_coco_instances("BasketballCourt_Sat", {}, "/content/drive/My Drive/detectronDataset/imageLabels.json", "/content/drive/My Drive/detectronDataset/")
registered_datasets = DatasetCatalog.list()
print(registered_datasets)

['coco_2014_train', 'coco_2014_val', 'coco_2014_minival', 'coco_2014_valminusminival', 'coco_2017_train', 'coco_2017_val', 'coco_2017_test', 'coco_2017_test-dev', 'coco_2017_val_100', 'keypoints_coco_2014_train', 'keypoints_coco_2014_val', 'keypoints_coco_2014_minival', 'keypoints_coco_2014_valminusminival', 'keypoints_coco_2017_train', 'keypoints_coco_2017_val', 'keypoints_coco_2017_val_100', 'coco_2017_train_panoptic_separated', 'coco_2017_train_panoptic_stuffonly', 'coco_2017_train_panoptic', 'coco_2017_val_panoptic_separated', 'coco_2017_val_panoptic_stuffonly', 'coco_2017_val_panoptic', 'coco_2017_val_100_panoptic_separated', 'coco_2017_val_100_panoptic_stuffonly', 'coco_2017_val_100_panoptic', 'lvis_v1_train', 'lvis_v1_val', 'lvis_v1_test_dev', 'lvis_v1_test_challenge', 'lvis_v0.5_train', 'lvis_v0.5_val', 'lvis_v0.5_val_rand_100', 'lvis_v0.5_test', 'lvis_v0.5_train_cocofied', 'lvis_v0.5_val_cocofied', 'cityscapes_fine_instance_seg_train', 'cityscapes_fine_sem_seg_train', 'citysca

In [5]:
# Get cfg var for setup
cfg = get_cfg()
# This imports a pre-trained model config (acts as the model's backbone)
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
# Specify the dataset to train on
cfg.DATASETS.TRAIN = ("BasketballCourt_Sat",)
# Did not implement custom test dataset, predictions were done in a different file
cfg.DATASETS.TEST = ()
# Where to save the trained model
cfg.OUTPUT_DIR = "/content/drive/My Drive/trained_models"
# Default value suggested by tutorial
cfg.DATALOADER.NUM_WORKERS = 2
# Import pre-trained model weights from same model in model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
# Do not filter out empty annotations to avoid filtering out images that do not contain basetkball courts
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False
# This is the batch size
cfg.SOLVER.IMS_PER_BATCH = 8
# This is the learning rate
cfg.SOLVER.BASE_LR = 0.0002
# Max number of iterations
cfg.SOLVER.MAX_ITER = 650
# Do not decay the learning rate
cfg.SOLVER.STEPS = []
# Default is 512, but using smaller values because of the smaller dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
# Set number of classes in dataset - since we are only detecting basketball courts, this is 1
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

In [6]:
# Instantiate model
model = DefaultTrainer.build_model(cfg)

# Replace the last layer with new layer for 1 class
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor.cls_score = torch.nn.Linear(in_features,
                                                          cfg.MODEL.ROI_HEADS.NUM_CLASSES)

# Freeze the weights of the backbone network
for param in model.backbone.parameters():
    param.requires_grad = False

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

print("## Model has finished training")

[05/11 20:54:44 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_f10217.pkl: 178MB [00:00, 264MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[05/11 20:54:46 d2.engine.train_loop]: Starting training from iteration 0


  self.pid = os.fork()
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[05/11 20:57:28 d2.utils.events]:  eta: 0:55:38  iter: 19  total_loss: 1.609  loss_cls: 0.6393  loss_box_reg: 0.1491  loss_mask: 0.691  loss_rpn_cls: 0.1172  loss_rpn_loc: 0.009595    time: 7.8145  last_time: 3.7092  data_time: 4.6001  last_data_time: 0.2314   lr: 2.3331e-06  max_mem: 13706M
[05/11 20:58:58 d2.utils.events]:  eta: 0:56:02  iter: 39  total_loss: 1.6  loss_cls: 0.6139  loss_box_reg: 0.1646  loss_mask: 0.6873  loss_rpn_cls: 0.1241  loss_rpn_loc: 0.01035    time: 5.9473  last_time: 4.1429  data_time: 0.6937  last_data_time: 0.6040   lr: 4.6836e-06  max_mem: 13706M
[05/11 21:00:23 d2.utils.events]:  eta: 0:54:46  iter: 59  total_loss: 1.564  loss_cls: 0.5798  loss_box_reg: 0.1782  loss_mask: 0.679  loss_rpn_cls: 0.1169  loss_rpn_loc: 0.0111    time: 5.3727  last_time: 4.1577  data_time: 0.7206  last_data_time: 0.5884   lr: 7.0342e-06  max_mem: 13706M
[05/11 21:01:50 d2.utils.events]:  eta: 0:53:52  iter: 79  total_loss: 1.505  loss_cls: 0.5297  loss_box_reg: 0.1726  loss_ma

KeyboardInterrupt: 