In [1]:
cd /content/drive/MyDrive/Thesis_Organized

/content/drive/MyDrive/Thesis_Organized


In [2]:
import os
main_dir = os.getcwd()

In [3]:
# importing
from Data.data_dirs import *
from Detectron2.detectron_params import *

In [4]:
# check for GPU availability

!nvidia-smi

Mon Jul 24 12:28:29 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [6]:
# Setups
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
torch:  2.0 ; cuda:  cu118
detectron2: 0.6


In [7]:
import detectron2
# from detectron2.utils.logger import setup_logger
# setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import os

Data should be registered for detectron2, in the cell down below we will register our datasets. Remember that we have our data in COCO format therefore we can directly use the `register_coco_instances` function.

In [8]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, COCO_LABELS_TRAIN_DIR, os.path.join(main_dir, CROPPED_IMAGES_DIR, "Train"))
register_coco_instances("my_dataset_test", {}, COCO_LABELS_TEST_DIR, os.path.join(main_dir, CROPPED_IMAGES_DIR, "Test"))
register_coco_instances("my_dataset_val", {}, COCO_LABELS_TAL_DIR, os.path.join(main_dir, CROPPED_IMAGES_DIR, "Val"))

In [9]:
from detectron2.engine.hooks import HookBase
from detectron2.evaluation import inference_context
from detectron2.utils.logger import log_every_n_seconds
from detectron2.data import DatasetMapper, build_detection_test_loader
import detectron2.utils.comm as comm
import torch
import time
import datetime
import logging

class LossEvalHook(HookBase):
    def __init__(self, eval_period, model, data_loader):
        self._model = model
        self._period = eval_period
        self._data_loader = data_loader

    def _do_loss_eval(self):
        # Copying inference_on_dataset from evaluator.py
        total = len(self._data_loader)
        num_warmup = min(5, total - 1)

        start_time = time.perf_counter()
        total_compute_time = 0
        losses = []
        for idx, inputs in enumerate(self._data_loader):
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
            start_compute_time = time.perf_counter()
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    "Loss on Validation  done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)
                    ),
                    n=5,
                )
            loss_batch = self._get_loss(inputs)
            losses.append(loss_batch)
        mean_loss = np.mean(losses)
        self.trainer.storage.put_scalar('validation_loss', mean_loss)
        comm.synchronize()

        return losses

    def _get_loss(self, data):
        # How loss is calculated on train_loop
        metrics_dict = self._model(data)
        metrics_dict = {
            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
            for k, v in metrics_dict.items()
        }
        total_losses_reduced = sum(loss for loss in metrics_dict.values())
        return total_losses_reduced


    def after_step(self):
        next_iter = self.trainer.iter + 1
        is_final = next_iter == self.trainer.max_iter
        if is_final or (self._period > 0 and next_iter % self._period == 0):
            self._do_loss_eval()
        self.trainer.storage.put_scalars(timetest=12)

In [10]:
from detectron2.data import DatasetMapper, build_detection_test_loader
from detectron2.engine import DefaultTrainer


class CustomTrainer(DefaultTrainer):
    """
    Custom Trainer deriving from the "DefaultTrainer"

    Overloads build_hooks to add a hook to calculate loss on the test set during training.
    """

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1, LossEvalHook(
            19, # Frequency of calculation - every 19 iterations here
            self.model,
            build_detection_test_loader(
                self.cfg,
                self.cfg.DATASETS.TEST[0],
                DatasetMapper(self.cfg, True)
            )
        ))

        return hooks

In [16]:
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader


cfg = get_cfg()
cfg.OUTPUT_DIR = "./Detectron2/output"
cfg.merge_from_file(model_zoo.get_config_file(MODELS_LIST[1]))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_test",)
# cfg.TEST.EVAL_PERIOD = 19
cfg.DATALOADER.NUM_WORKERS = NUM_WORKERS
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(MODELS_LIST[1])  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = IMS_PER_BATCH  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = BASE_LR  # pick a good LR
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = BATCH_SIZE_PER_IMAGE  # The "RoIHead batch size". 128 is faster, and good enough for this dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES  # only has one class (Track). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)


for i in range(MAX_ITER%40):

  if i == 0:
    cfg.SOLVER.MAX_ITER = 40
    trainer = CustomTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = SEG_THRESHOLD   # set a custom testing threshold
    predictor = DefaultPredictor(cfg)
    evaluator = COCOEvaluator("my_dataset_test", output_dir="./output")
    val_loader = build_detection_test_loader(cfg, "my_dataset_test")
    print(inference_on_dataset(predictor.model, val_loader, evaluator))

  else:
    cfg.SOLVER.MAX_ITER = 40 * (i+1)
    trainer = CustomTrainer(cfg)
    trainer.resume_or_load(resume=True)
    trainer.train()
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = SEG_THRESHOLD   # set a custom testing threshold
    predictor = DefaultPredictor(cfg)
    evaluator = COCOEvaluator("my_dataset_test", output_dir="./Detectron2/output")
    val_loader = build_detection_test_loader(cfg, "my_dataset_test")
    print(inference_on_dataset(predictor.model, val_loader, evaluator))

[07/24 13:02:22 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[07/24 13:02:22 d2.engine.train_loop]: Starting training from iteration 0
[07/24 13:03:14 detectron2]: Loss on Validation  done 11/98. 0.0005 s / img. ETA=0:00:10
[07/24 13:03:19 detectron2]: Loss on Validation  done 49/98. 0.0002 s / img. ETA=0:00:06
[07/24 13:03:24 detectron2]: Loss on Validation  done 91/98. 0.0001 s / img. ETA=0:00:00
[07/24 13:03:28 d2.utils.events]:  eta: 0:00:53  iter: 19  total_loss: 2.319  loss_cls: 0.7614  loss_box_reg: 0.5  loss_mask: 0.6857  loss_rpn_cls: 0.3473  loss_rpn_loc: 0.07044  validation_loss: 2    time: 2.6462  last_time: 2.6446  data_time: 0.1779  last_data_time: 0.1324   lr: 7.6084e-05  max_mem: 12291M
[07/24 13:04:18 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/24 13:04:23 detectron2]: Loss on Validation  done 51/98. 0.0001 s / img. ETA=0:00:05
[07/24 13:04:28 detectron2]: Loss on Validation  done 84/98. 0.0001 s / img. ETA=0:00:01
[07/24 13:04:39 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0

KeyboardInterrupt: ignored

In [18]:
inference_on_dataset(predictor.model, val_loader, evaluator)

[07/24 13:17:43 d2.evaluation.evaluator]: Start inference on 98 batches
[07/24 13:17:46 d2.evaluation.evaluator]: Inference done 11/98. Dataloading: 0.0101 s/iter. Inference: 0.1260 s/iter. Eval: 0.0028 s/iter. Total: 0.1389 s/iter. ETA=0:00:12
[07/24 13:17:51 d2.evaluation.evaluator]: Inference done 49/98. Dataloading: 0.0050 s/iter. Inference: 0.1278 s/iter. Eval: 0.0026 s/iter. Total: 0.1355 s/iter. ETA=0:00:06
[07/24 13:17:56 d2.evaluation.evaluator]: Inference done 87/98. Dataloading: 0.0037 s/iter. Inference: 0.1284 s/iter. Eval: 0.0026 s/iter. Total: 0.1349 s/iter. ETA=0:00:01
[07/24 13:17:58 d2.evaluation.evaluator]: Total inference time: 0:00:12.645523 (0.135973 s / iter per device, on 1 devices)
[07/24 13:17:58 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:11 (0.128719 s / iter per device, on 1 devices)
[07/24 13:17:58 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[07/24 13:17:58 d2.evaluation.coco_evaluation]: Saving results to ./o

OrderedDict([('bbox',
              {'AP': 33.60209946293746,
               'AP50': 62.575356373410905,
               'AP75': 32.12285343578618,
               'APs': 7.128712871287128,
               'APm': 42.9900738045088,
               'APl': 11.962178997616197}),
             ('segm',
              {'AP': 21.970765361682698,
               'AP50': 58.87130831690545,
               'AP75': 7.050049930917507,
               'APs': 1.1692597831211693,
               'APm': 22.862224941839415,
               'APl': 29.824742740323774})])

In [42]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(MODELS_LIST[1]))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ("my_dataset_test",)
# cfg.TEST.EVAL_PERIOD = 19
cfg.DATALOADER.NUM_WORKERS = NUM_WORKERS
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(MODELS_LIST[1])  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = IMS_PER_BATCH  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = BASE_LR  # pick a good LR
cfg.SOLVER.MAX_ITER = MAX_ITER    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = BATCH_SIZE_PER_IMAGE  # The "RoIHead batch size". 128 is faster, and good enough for this dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES  # only has one class (Track). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
# trainer = DefaultTrainer(cfg)
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=TRAIN_FROM_LAST_CHECKPOINT)
trainer.train()

[07/23 21:29:29 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[07/23 21:29:30 d2.engine.train_loop]: Starting training from iteration 0




[07/23 21:30:24 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:30:29 detectron2]: Loss on Validation  done 51/98. 0.0001 s / img. ETA=0:00:05
[07/23 21:30:34 detectron2]: Loss on Validation  done 92/98. 0.0001 s / img. ETA=0:00:00
[07/23 21:30:38 d2.utils.events]:  eta: 0:21:41  iter: 19  total_loss: 2.436  loss_cls: 0.6656  loss_box_reg: 0.5295  loss_mask: 0.6937  loss_rpn_cls: 0.4995  loss_rpn_loc: 0.08658  validation_loss: 2.287    time: 2.6824  last_time: 2.9780  data_time: 0.1272  last_data_time: 0.2244   lr: 6.2339e-06  max_mem: 12573M




[07/23 21:31:32 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:31:37 detectron2]: Loss on Validation  done 51/98. 0.0001 s / img. ETA=0:00:05
[07/23 21:31:42 detectron2]: Loss on Validation  done 85/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:31:50 d2.utils.events]:  eta: 0:21:23  iter: 39  total_loss: 2.24  loss_cls: 0.5669  loss_box_reg: 0.4998  loss_mask: 0.6849  loss_rpn_cls: 0.3803  loss_rpn_loc: 0.0861  validation_loss: 2.214    time: 2.7826  last_time: 2.7418  data_time: 0.0920  last_data_time: 0.1346   lr: 1.2628e-05  max_mem: 12573M




[07/23 21:32:40 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:32:45 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:32:50 detectron2]: Loss on Validation  done 86/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:33:01 d2.utils.events]:  eta: 0:20:37  iter: 59  total_loss: 2.015  loss_cls: 0.4582  loss_box_reg: 0.5179  loss_mask: 0.6688  loss_rpn_cls: 0.2745  loss_rpn_loc: 0.07869  validation_loss: 2.082    time: 2.8088  last_time: 2.8970  data_time: 0.0936  last_data_time: 0.0924   lr: 1.9021e-05  max_mem: 12573M




[07/23 21:33:49 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:33:54 detectron2]: Loss on Validation  done 49/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:33:59 detectron2]: Loss on Validation  done 89/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:34:12 d2.utils.events]:  eta: 0:19:53  iter: 79  total_loss: 1.987  loss_cls: 0.4498  loss_box_reg: 0.6357  loss_mask: 0.6522  loss_rpn_cls: 0.1803  loss_rpn_loc: 0.07205  validation_loss: 1.964    time: 2.8172  last_time: 2.9002  data_time: 0.0931  last_data_time: 0.1031   lr: 2.5415e-05  max_mem: 12573M




[07/23 21:34:59 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:35:04 detectron2]: Loss on Validation  done 52/98. 0.0001 s / img. ETA=0:00:05
[07/23 21:35:09 detectron2]: Loss on Validation  done 92/98. 0.0001 s / img. ETA=0:00:00
[07/23 21:35:24 d2.utils.events]:  eta: 0:19:04  iter: 99  total_loss: 1.937  loss_cls: 0.4422  loss_box_reg: 0.6979  loss_mask: 0.6388  loss_rpn_cls: 0.08958  loss_rpn_loc: 0.06013  validation_loss: 1.882    time: 2.8300  last_time: 2.7733  data_time: 0.1018  last_data_time: 0.1443   lr: 3.1808e-05  max_mem: 12573M




[07/23 21:36:06 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:36:11 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:36:16 detectron2]: Loss on Validation  done 85/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:36:36 d2.utils.events]:  eta: 0:18:08  iter: 119  total_loss: 1.875  loss_cls: 0.4402  loss_box_reg: 0.7232  loss_mask: 0.6268  loss_rpn_cls: 0.05007  loss_rpn_loc: 0.06206  validation_loss: 1.879    time: 2.8370  last_time: 2.9856  data_time: 0.0819  last_data_time: 0.1067   lr: 3.8202e-05  max_mem: 12573M




[07/23 21:37:15 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:37:20 detectron2]: Loss on Validation  done 46/98. 0.0001 s / img. ETA=0:00:07
[07/23 21:37:25 detectron2]: Loss on Validation  done 84/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:37:48 d2.utils.events]:  eta: 0:17:11  iter: 139  total_loss: 1.867  loss_cls: 0.4346  loss_box_reg: 0.7407  loss_mask: 0.5992  loss_rpn_cls: 0.03569  loss_rpn_loc: 0.05571  validation_loss: 1.89    time: 2.8421  last_time: 2.8505  data_time: 0.0936  last_data_time: 0.0322   lr: 4.4596e-05  max_mem: 12573M




[07/23 21:38:25 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:38:30 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:38:36 detectron2]: Loss on Validation  done 88/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:39:01 d2.utils.events]:  eta: 0:16:16  iter: 159  total_loss: 1.892  loss_cls: 0.4353  loss_box_reg: 0.7847  loss_mask: 0.5804  loss_rpn_cls: 0.0258  loss_rpn_loc: 0.04994  validation_loss: 1.878    time: 2.8559  last_time: 3.0975  data_time: 0.1094  last_data_time: 0.2306   lr: 5.0989e-05  max_mem: 12573M




[07/23 21:39:36 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:39:41 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:39:46 detectron2]: Loss on Validation  done 84/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:40:14 d2.utils.events]:  eta: 0:15:21  iter: 179  total_loss: 1.898  loss_cls: 0.4322  loss_box_reg: 0.856  loss_mask: 0.5355  loss_rpn_cls: 0.0296  loss_rpn_loc: 0.04409  validation_loss: 1.87    time: 2.8619  last_time: 2.7136  data_time: 0.0900  last_data_time: 0.0537   lr: 5.7383e-05  max_mem: 12573M




[07/23 21:40:46 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:40:51 detectron2]: Loss on Validation  done 42/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:40:56 detectron2]: Loss on Validation  done 81/98. 0.0001 s / img. ETA=0:00:02
[07/23 21:41:27 d2.utils.events]:  eta: 0:14:23  iter: 199  total_loss: 1.877  loss_cls: 0.414  loss_box_reg: 0.89  loss_mask: 0.4813  loss_rpn_cls: 0.02676  loss_rpn_loc: 0.05031  validation_loss: 1.848    time: 2.8610  last_time: 2.7351  data_time: 0.0729  last_data_time: 0.0690   lr: 6.3776e-05  max_mem: 12573M




[07/23 21:41:56 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:42:01 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:42:06 detectron2]: Loss on Validation  done 86/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:42:40 d2.utils.events]:  eta: 0:13:28  iter: 219  total_loss: 1.755  loss_cls: 0.3742  loss_box_reg: 0.8385  loss_mask: 0.4516  loss_rpn_cls: 0.01817  loss_rpn_loc: 0.03291  validation_loss: 1.804    time: 2.8677  last_time: 3.0583  data_time: 0.1052  last_data_time: 0.1782   lr: 7.017e-05  max_mem: 12573M




[07/23 21:43:05 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:43:10 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:43:15 detectron2]: Loss on Validation  done 84/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:43:52 d2.utils.events]:  eta: 0:12:30  iter: 239  total_loss: 1.698  loss_cls: 0.3563  loss_box_reg: 0.8601  loss_mask: 0.4153  loss_rpn_cls: 0.0198  loss_rpn_loc: 0.03678  validation_loss: 1.746    time: 2.8676  last_time: 2.8938  data_time: 0.0956  last_data_time: 0.0480   lr: 7.6564e-05  max_mem: 12573M




[07/23 21:44:15 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:11
[07/23 21:44:20 detectron2]: Loss on Validation  done 48/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:44:25 detectron2]: Loss on Validation  done 88/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:45:04 d2.utils.events]:  eta: 0:11:32  iter: 259  total_loss: 1.628  loss_cls: 0.322  loss_box_reg: 0.8539  loss_mask: 0.3781  loss_rpn_cls: 0.01833  loss_rpn_loc: 0.03626  validation_loss: 1.69    time: 2.8677  last_time: 2.9230  data_time: 0.0866  last_data_time: 0.0506   lr: 8.2957e-05  max_mem: 12573M




[07/23 21:45:24 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:45:29 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:45:34 detectron2]: Loss on Validation  done 89/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:46:16 d2.utils.events]:  eta: 0:10:35  iter: 279  total_loss: 1.583  loss_cls: 0.3087  loss_box_reg: 0.8384  loss_mask: 0.3784  loss_rpn_cls: 0.02225  loss_rpn_loc: 0.03464  validation_loss: 1.622    time: 2.8699  last_time: 2.7221  data_time: 0.0934  last_data_time: 0.0461   lr: 8.9351e-05  max_mem: 12573M




[07/23 21:46:33 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:46:38 detectron2]: Loss on Validation  done 45/98. 0.0001 s / img. ETA=0:00:07
[07/23 21:46:43 detectron2]: Loss on Validation  done 82/98. 0.0001 s / img. ETA=0:00:02
[07/23 21:47:27 d2.utils.events]:  eta: 0:09:37  iter: 299  total_loss: 1.474  loss_cls: 0.2627  loss_box_reg: 0.7862  loss_mask: 0.3577  loss_rpn_cls: 0.01643  loss_rpn_loc: 0.03554  validation_loss: 1.549    time: 2.8691  last_time: 3.0541  data_time: 0.0845  last_data_time: 0.1651   lr: 9.5744e-05  max_mem: 12573M




[07/23 21:47:42 detectron2]: Loss on Validation  done 11/98. 0.0002 s / img. ETA=0:00:11
[07/23 21:47:47 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:47:52 detectron2]: Loss on Validation  done 91/98. 0.0001 s / img. ETA=0:00:00
[07/23 21:48:40 d2.utils.events]:  eta: 0:08:40  iter: 319  total_loss: 1.376  loss_cls: 0.2472  loss_box_reg: 0.7346  loss_mask: 0.3187  loss_rpn_cls: 0.01173  loss_rpn_loc: 0.03442  validation_loss: 1.48    time: 2.8705  last_time: 2.9178  data_time: 0.0908  last_data_time: 0.0589   lr: 0.00010214  max_mem: 12573M




[07/23 21:48:51 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:48:56 detectron2]: Loss on Validation  done 49/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:49:01 detectron2]: Loss on Validation  done 85/98. 0.0002 s / img. ETA=0:00:01
[07/23 21:49:51 d2.utils.events]:  eta: 0:07:42  iter: 339  total_loss: 1.294  loss_cls: 0.2333  loss_box_reg: 0.6947  loss_mask: 0.3076  loss_rpn_cls: 0.0151  loss_rpn_loc: 0.03238  validation_loss: 1.392    time: 2.8711  last_time: 2.9155  data_time: 0.1071  last_data_time: 0.0745   lr: 0.00010853  max_mem: 12573M




[07/23 21:50:00 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:50:05 detectron2]: Loss on Validation  done 46/98. 0.0003 s / img. ETA=0:00:07
[07/23 21:50:10 detectron2]: Loss on Validation  done 86/98. 0.0002 s / img. ETA=0:00:01
[07/23 21:51:04 d2.utils.events]:  eta: 0:06:44  iter: 359  total_loss: 1.156  loss_cls: 0.2196  loss_box_reg: 0.624  loss_mask: 0.2816  loss_rpn_cls: 0.01193  loss_rpn_loc: 0.0308  validation_loss: 1.291    time: 2.8720  last_time: 2.9626  data_time: 0.0962  last_data_time: 0.0841   lr: 0.00011493  max_mem: 12573M




[07/23 21:51:09 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:51:14 detectron2]: Loss on Validation  done 49/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:51:19 detectron2]: Loss on Validation  done 82/98. 0.0001 s / img. ETA=0:00:02




[07/23 21:52:18 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:52:24 detectron2]: Loss on Validation  done 47/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:52:29 detectron2]: Loss on Validation  done 84/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:52:31 d2.utils.events]:  eta: 0:05:47  iter: 379  total_loss: 1.053  loss_cls: 0.208  loss_box_reg: 0.57  loss_mask: 0.2385  loss_rpn_cls: 0.01872  loss_rpn_loc: 0.03421  validation_loss: 1.089    time: 2.8728  last_time: 2.9424  data_time: 0.0826  last_data_time: 0.0936   lr: 0.00012132  max_mem: 12573M




[07/23 21:53:28 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:53:33 detectron2]: Loss on Validation  done 50/98. 0.0001 s / img. ETA=0:00:05
[07/23 21:53:38 detectron2]: Loss on Validation  done 89/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:53:43 d2.utils.events]:  eta: 0:04:49  iter: 399  total_loss: 0.9701  loss_cls: 0.1969  loss_box_reg: 0.4991  loss_mask: 0.2159  loss_rpn_cls: 0.01628  loss_rpn_loc: 0.02787  validation_loss: 1.021    time: 2.8728  last_time: 3.0224  data_time: 0.0844  last_data_time: 0.1225   lr: 0.00012771  max_mem: 12573M




[07/23 21:54:38 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:54:43 detectron2]: Loss on Validation  done 49/98. 0.0001 s / img. ETA=0:00:06
[07/23 21:54:48 detectron2]: Loss on Validation  done 89/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:54:56 d2.utils.events]:  eta: 0:03:51  iter: 419  total_loss: 0.9635  loss_cls: 0.1919  loss_box_reg: 0.4779  loss_mask: 0.2081  loss_rpn_cls: 0.01444  loss_rpn_loc: 0.03061  validation_loss: 0.9758    time: 2.8753  last_time: 2.9826  data_time: 0.0960  last_data_time: 0.0963   lr: 0.00013411  max_mem: 12573M




[07/23 21:55:48 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:55:53 detectron2]: Loss on Validation  done 46/98. 0.0001 s / img. ETA=0:00:07
[07/23 21:55:58 detectron2]: Loss on Validation  done 83/98. 0.0001 s / img. ETA=0:00:02
[07/23 21:56:08 d2.utils.events]:  eta: 0:02:53  iter: 439  total_loss: 0.8727  loss_cls: 0.176  loss_box_reg: 0.4555  loss_mask: 0.1975  loss_rpn_cls: 0.0109  loss_rpn_loc: 0.02587  validation_loss: 0.9515    time: 2.8759  last_time: 2.4215  data_time: 0.0926  last_data_time: 0.0722   lr: 0.0001405  max_mem: 12573M




[07/23 21:56:57 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:09
[07/23 21:57:02 detectron2]: Loss on Validation  done 51/98. 0.0001 s / img. ETA=0:00:05
[07/23 21:57:07 detectron2]: Loss on Validation  done 86/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:57:21 d2.utils.events]:  eta: 0:01:55  iter: 459  total_loss: 0.858  loss_cls: 0.1715  loss_box_reg: 0.4409  loss_mask: 0.1923  loss_rpn_cls: 0.009599  loss_rpn_loc: 0.02953  validation_loss: 0.9298    time: 2.8768  last_time: 2.8566  data_time: 0.1156  last_data_time: 0.0187   lr: 0.00014689  max_mem: 12573M




[07/23 21:58:07 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:58:17 detectron2]: Loss on Validation  done 90/98. 0.0001 s / img. ETA=0:00:01
[07/23 21:58:33 d2.utils.events]:  eta: 0:00:57  iter: 479  total_loss: 0.8185  loss_cls: 0.1725  loss_box_reg: 0.429  loss_mask: 0.1861  loss_rpn_cls: 0.00721  loss_rpn_loc: 0.02449  validation_loss: 0.9014    time: 2.8773  last_time: 2.9972  data_time: 0.0785  last_data_time: 0.0990   lr: 0.00015329  max_mem: 12573M




[07/23 21:59:15 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:10
[07/23 21:59:21 detectron2]: Loss on Validation  done 47/98. 0.0001 s / img. ETA=0:00:07
[07/23 21:59:26 detectron2]: Loss on Validation  done 87/98. 0.0001 s / img. ETA=0:00:01




[07/23 21:59:50 detectron2]: Loss on Validation  done 11/98. 0.0001 s / img. ETA=0:00:08
[07/23 21:59:55 detectron2]: Loss on Validation  done 51/98. 0.0001 s / img. ETA=0:00:05
[07/23 22:00:00 detectron2]: Loss on Validation  done 85/98. 0.0001 s / img. ETA=0:00:01
[07/23 22:00:02 d2.utils.events]:  eta: 0:00:00  iter: 499  total_loss: 0.9024  loss_cls: 0.18  loss_box_reg: 0.4863  loss_mask: 0.1919  loss_rpn_cls: 0.01173  loss_rpn_loc: 0.02919  validation_loss: 0.8865    time: 2.8773  last_time: 2.9514  data_time: 0.0720  last_data_time: 0.1026   lr: 0.00015968  max_mem: 12573M
[07/23 22:00:02 d2.engine.hooks]: Overall training speed: 498 iterations in 0:23:52 (2.8773 s / it)
[07/23 22:00:02 d2.engine.hooks]: Total training time: 0:30:26 (0:06:33 on hooks)
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[07/23 22:00:02 d2.data.datasets.coco]: Loaded 98 images in COCO format from Data/COCO format/coco_test_data.json
[07/23 22:00:02 d2.data.datas



In [21]:
%load_ext tensorboard
%tensorboard --logdir output

In [20]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = SEG_THRESHOLD   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[07/23 21:11:02 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from ./output/model_final.pth ...


In [21]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_dataset_test", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "my_dataset_test")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[07/23 21:11:05 d2.data.datasets.coco]: Loaded 98 images in COCO format from Data/COCO format/coco_test_data.json
[07/23 21:11:05 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[07/23 21:11:05 d2.data.common]: Serializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[07/23 21:11:05 d2.data.common]: Serializing 98 elements to byte tensors and concatenating them all ...
[07/23 21:11:05 d2.data.common]: Serialized dataset takes 0.52 MiB
[07/23 21:11:05 d2.evaluation.evaluator]: Start inference on 98 batches
[07/23 21:11:07 d2.evaluation.evaluator]: Inference done 11/98. Dataloading: 0.0017 s/iter. Inference: 0.1308 s/iter. Eval: 0.0001 s/iter. Total: 0.1326 s/iter. ETA=0:00:11
[07/23 21:11:12 d2.evaluation.evaluator]: Inference done 49/98. Dataloading: 0.0023 

In [24]:
inference_on_dataset

<function detectron2.evaluation.evaluator.inference_on_dataset(model, data_loader, evaluator: Union[detectron2.evaluation.evaluator.DatasetEvaluator, List[detectron2.evaluation.evaluator.DatasetEvaluator], NoneType])>

In [35]:
model =  CustomTrainer.build_model(cfg)
trainer.test(cfg,model, evaluators = [evaluator])

[07/23 21:20:22 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

OrderedDict([('bbox',
              {'AP': nan,
               'AP50': nan,
               'AP75': nan,
               'APs': nan,
               'APm': nan,
               'APl': nan})])