In [1]:
#!python module_preprocessing.py --default=True --train_ann='/input/train_annotations_equal.json' --dataset_type=1 --test_ann='/input/test_annotations_equal.json' --train_split=0.05

Detectron2 using cocolike structure training mask rcnn 50 layer.

data: module_preprocessing.py
data augmentations: https://jss367.github.io/Data-Augmentation-with-Detectron2.html / https://detectron2.readthedocs.io/modules/data_transforms.html
- flip horisontal 50% prob
- flip vertical 50% prob
- random rotation -20 to 20%. 
- random lightning 0.05 standard deviations. 

3 Stages:
- 1st stage  256x256 images
- 2nd stage  512x512 images
- 3rd stage  756x756 images
- hopefully with variable optimized learning rate. 

Configurations have been changed from the default. Current notebook contains only the mask training part. Documentation for detectron2 documentation: https://detectron2.readthedocs.io/modules/config.html


Metrics: tensorboard (http://127.0.0.1:6006) -> call from another script/notebook.

Submission: module_submittion.py

In [3]:
import torch, torchvision
print(torch.__version__, torch.cuda.is_available(), torch.version.cuda)

1.6.0 True 10.1


In [4]:
import os
import copy
import json
import pycocotools
import random 

import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
from collections import defaultdict

assert torch.__version__.startswith("1.6")

import detectron2
import detectron2.data.transforms as T
import detectron2.utils.comm as comm

from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg

from detectron2.data import MetadataCatalog,DatasetMapper,build_detection_train_loader,build_detection_test_loader
from detectron2.data import detection_utils as utils
from detectron2.data.catalog import DatasetCatalog
from detectron2.data.datasets import register_coco_instances 

from detectron2.evaluation import COCOEvaluator, inference_on_dataset

from detectron2.projects.deeplab import add_deeplab_config, build_lr_scheduler

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True #Truncated image -> https://github.com/keras-team/keras/issues/5475

In [5]:
PATH = os.path.abspath(os.getcwd())

register_coco_instances("my_dataset_train_v2",{},PATH + "/input/train_annotations_equal.json",PATH + "/input/train_v2/")
register_coco_instances("my_dataset_val_v2",{},PATH + "/input/test_annotations_equal.json",PATH + "/input/train_v2/")

my_dataset_train_metadata = MetadataCatalog.get("my_dataset_train_v2")
dataset_dicts = DatasetCatalog.get("my_dataset_train_v2")

[32m[12/09 07:17:21 d2.data.datasets.coco]: [0mLoading /application/input/train_annotations_equal.json takes 1.21 seconds.
[32m[12/09 07:17:21 d2.data.datasets.coco]: [0mLoaded 100233 images in COCO format from /application/input/train_annotations_equal.json


In [6]:
def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # List of transforms https://detectron2.readthedocs.io/modules/data_transforms.html
    # Add saturation, add shear orsmth.
    transform_list = [
                      T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
                      T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
                      T.RandomLighting(0.1),
                      T.RandomRotation((-0.2,0.2))
                     ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict

In [7]:
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        return COCOEvaluator(dataset_name, cfg, False, output_folder)
    
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)
    
    @classmethod
    def build_lr_scheduler(cls, cfg, optimizer):
        return build_lr_scheduler(cfg, optimizer)


In [7]:
#Call from anywhere else. 
#!tensorboard --logdir=run_equal --host=0.0.0.0
#http://0.0.0.0:6006/#scalars

In [6]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

cfg.DATASETS.TRAIN = ("my_dataset_train_v2",) 
cfg.DATASETS.TEST = ("my_dataset_val_v2",)
cfg.TEST.EVAL_PERIOD = 5000
cfg.DATALOADER.NUM_WORKERS = 4 ## 4 per gpu
cfg.SOLVER.IMS_PER_BATCH = 16
cfg.SOLVER.BASE_LR = 0.001  # pick a good LR
cfg.SOLVER.MAX_ITER = 20000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Only has one class (ship)
cfg.MAX_SIZE_TRAIN = 256 #Max image size 
cfg.OUTPUT_DIR = "./runs/run_50_anchortest"
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True # Teach only ships & background
cfg.LR_SCHEDULER_NAME = "WarmupCosineLR" #avoid getting stuck in local minima.
cfg.CUDNN_BENCHMARK = True
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32, 64, 128, 256, 512]]
cfg.SOLVER.AMP.ENABLED = True  # Automatic Mixed Precision

In [7]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg) 
trainer.resume_or_load(resume=True) #True takes last checkpoint file which is saved below.
trainer.train() #Trainer will throw out non-annotated pictures. 

[32m[12/08 08:26:26 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

KeyboardInterrupt: 

In [10]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

cfg.DATASETS.TRAIN = ("my_dataset_train_v2",) 
cfg.DATASETS.TEST = ("my_dataset_val_v2",)
cfg.TEST.EVAL_PERIOD = 5000
cfg.DATALOADER.NUM_WORKERS = 4 ## 4 per gpu
cfg.SOLVER.IMS_PER_BATCH = 12
cfg.SOLVER.BASE_LR = 0.001  
cfg.SOLVER.MAX_ITER = 30000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ship)
cfg.MAX_SIZE_TRAIN = 512 #Max image size 
cfg.LR_SCHEDULER_NAME = "WarmupCosineLR" #avoid getting stuck in local minima. 
cfg.OUTPUT_DIR = "./runs/run_50_anchortest"
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32, 64, 128, 256, 512]]
cfg.SOLVER.AMP.ENABLED = True  # Automatic Mixed Precision

In [11]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg) 
trainer.resume_or_load(resume=True) #True takes last checkpoint file which is saved below.
trainer.train() #Trainer will throw out non-annotated pictures. 

[32m[12/07 21:33:06 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[12/07 21:33:07 d2.data.datasets.coco]: [0mLoading /application/input/train_annotations_equal.json takes 1.28 seconds.
[32m[12/07 21:33:07 d2.data.datasets.coco]: [0mLoaded 100233 images in COCO format from /application/input/train_annotations_equal.json
[32m[12/07 21:33:08 d2.data.build]: [0mRemoved 59762 images with no usable annotations. 40471 images left.
[32m[12/07 21:33:11 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[12/07 21:33:11 d2.data.common]: [0mSerializing 40471 elements to byte tensors and concatenating them all ...
[32m[12/07 21:33:11 d2.data.common]: [0mSerialized dataset takes 20.74 MiB
[32m[12/07 21:33:12 d2.engine.train_loop]: [0mStarting training from iteration 20000
[32m[12/07 21:33:28 d2.utils.events]: [0m eta: 2:13:21  iter: 20019  total_loss: 0.5137  loss_cls: 0.07458  loss_box_reg: 0.1945  loss_mask: 0.1942  loss_rpn_cls: 0.004639  loss_rpn_loc: 0.02328  time: 0.8024  data_time: 0.0317  lr: 0.001  max_mem: 8226M
[32m[12/07

[32m[12/07 21:41:41 d2.utils.events]: [0m eta: 2:08:01  iter: 20619  total_loss: 0.5976  loss_cls: 0.08694  loss_box_reg: 0.2292  loss_mask: 0.2134  loss_rpn_cls: 0.005447  loss_rpn_loc: 0.03252  time: 0.8199  data_time: 0.0168  lr: 0.001  max_mem: 8226M
[32m[12/07 21:41:57 d2.utils.events]: [0m eta: 2:07:44  iter: 20639  total_loss: 0.5378  loss_cls: 0.08431  loss_box_reg: 0.2197  loss_mask: 0.2026  loss_rpn_cls: 0.005503  loss_rpn_loc: 0.03019  time: 0.8200  data_time: 0.0168  lr: 0.001  max_mem: 8226M
[32m[12/07 21:42:13 d2.utils.events]: [0m eta: 2:07:26  iter: 20659  total_loss: 0.5596  loss_cls: 0.07744  loss_box_reg: 0.2103  loss_mask: 0.2127  loss_rpn_cls: 0.005834  loss_rpn_loc: 0.02922  time: 0.8199  data_time: 0.0166  lr: 0.001  max_mem: 8226M
[32m[12/07 21:42:30 d2.utils.events]: [0m eta: 2:07:09  iter: 20679  total_loss: 0.5745  loss_cls: 0.0895  loss_box_reg: 0.2158  loss_mask: 0.1973  loss_rpn_cls: 0.006794  loss_rpn_loc: 0.02558  time: 0.8200  data_time: 0.0172 

[32m[12/07 21:50:26 d2.utils.events]: [0m eta: 1:59:38  iter: 21259  total_loss: 0.5308  loss_cls: 0.09089  loss_box_reg: 0.2111  loss_mask: 0.1998  loss_rpn_cls: 0.006514  loss_rpn_loc: 0.02838  time: 0.8203  data_time: 0.0167  lr: 0.001  max_mem: 8226M
[32m[12/07 21:50:43 d2.utils.events]: [0m eta: 1:59:20  iter: 21279  total_loss: 0.4961  loss_cls: 0.08211  loss_box_reg: 0.194  loss_mask: 0.2016  loss_rpn_cls: 0.007016  loss_rpn_loc: 0.02956  time: 0.8202  data_time: 0.0167  lr: 0.001  max_mem: 8226M
[32m[12/07 21:50:59 d2.utils.events]: [0m eta: 1:59:05  iter: 21299  total_loss: 0.5236  loss_cls: 0.06933  loss_box_reg: 0.2061  loss_mask: 0.1932  loss_rpn_cls: 0.005243  loss_rpn_loc: 0.03066  time: 0.8203  data_time: 0.0168  lr: 0.001  max_mem: 8226M
[32m[12/07 21:51:16 d2.utils.events]: [0m eta: 1:58:54  iter: 21319  total_loss: 0.5522  loss_cls: 0.08257  loss_box_reg: 0.2101  loss_mask: 0.2018  loss_rpn_cls: 0.007344  loss_rpn_loc: 0.03093  time: 0.8204  data_time: 0.0166 

[32m[12/07 21:59:13 d2.utils.events]: [0m eta: 1:50:51  iter: 21899  total_loss: 0.5694  loss_cls: 0.0878  loss_box_reg: 0.215  loss_mask: 0.2174  loss_rpn_cls: 0.004609  loss_rpn_loc: 0.02995  time: 0.8207  data_time: 0.0169  lr: 0.001  max_mem: 8226M
[32m[12/07 21:59:29 d2.utils.events]: [0m eta: 1:50:35  iter: 21919  total_loss: 0.5347  loss_cls: 0.08404  loss_box_reg: 0.2166  loss_mask: 0.1948  loss_rpn_cls: 0.005597  loss_rpn_loc: 0.03175  time: 0.8207  data_time: 0.0173  lr: 0.001  max_mem: 8226M
[32m[12/07 21:59:46 d2.utils.events]: [0m eta: 1:50:16  iter: 21939  total_loss: 0.5116  loss_cls: 0.0803  loss_box_reg: 0.2017  loss_mask: 0.1979  loss_rpn_cls: 0.004582  loss_rpn_loc: 0.02991  time: 0.8207  data_time: 0.0168  lr: 0.001  max_mem: 8226M
[32m[12/07 22:00:02 d2.utils.events]: [0m eta: 1:50:02  iter: 21959  total_loss: 0.5578  loss_cls: 0.09439  loss_box_reg: 0.2034  loss_mask: 0.1954  loss_rpn_cls: 0.007657  loss_rpn_loc: 0.0293  time: 0.8207  data_time: 0.0171  lr

[32m[12/07 22:07:58 d2.utils.events]: [0m eta: 1:41:50  iter: 22539  total_loss: 0.5262  loss_cls: 0.07876  loss_box_reg: 0.2043  loss_mask: 0.1959  loss_rpn_cls: 0.006204  loss_rpn_loc: 0.0328  time: 0.8205  data_time: 0.0172  lr: 0.001  max_mem: 8226M
[32m[12/07 22:08:15 d2.utils.events]: [0m eta: 1:41:36  iter: 22559  total_loss: 0.4863  loss_cls: 0.07811  loss_box_reg: 0.1863  loss_mask: 0.1759  loss_rpn_cls: 0.00765  loss_rpn_loc: 0.02615  time: 0.8206  data_time: 0.0171  lr: 0.001  max_mem: 8226M
[32m[12/07 22:08:31 d2.utils.events]: [0m eta: 1:41:21  iter: 22579  total_loss: 0.5275  loss_cls: 0.07995  loss_box_reg: 0.2273  loss_mask: 0.1977  loss_rpn_cls: 0.006175  loss_rpn_loc: 0.02669  time: 0.8206  data_time: 0.0171  lr: 0.001  max_mem: 8226M
[32m[12/07 22:08:48 d2.utils.events]: [0m eta: 1:41:12  iter: 22599  total_loss: 0.4589  loss_cls: 0.06445  loss_box_reg: 0.1875  loss_mask: 0.1583  loss_rpn_cls: 0.005598  loss_rpn_loc: 0.0279  time: 0.8207  data_time: 0.0172  l

[32m[12/07 22:16:43 d2.utils.events]: [0m eta: 1:33:08  iter: 23179  total_loss: 0.481  loss_cls: 0.07666  loss_box_reg: 0.2  loss_mask: 0.1772  loss_rpn_cls: 0.007112  loss_rpn_loc: 0.02678  time: 0.8202  data_time: 0.0177  lr: 0.001  max_mem: 8226M
[32m[12/07 22:16:59 d2.utils.events]: [0m eta: 1:32:51  iter: 23199  total_loss: 0.5077  loss_cls: 0.07802  loss_box_reg: 0.1893  loss_mask: 0.2008  loss_rpn_cls: 0.005302  loss_rpn_loc: 0.02786  time: 0.8201  data_time: 0.0173  lr: 0.001  max_mem: 8226M
[32m[12/07 22:17:15 d2.utils.events]: [0m eta: 1:32:36  iter: 23219  total_loss: 0.5643  loss_cls: 0.08573  loss_box_reg: 0.1887  loss_mask: 0.2126  loss_rpn_cls: 0.006154  loss_rpn_loc: 0.02654  time: 0.8201  data_time: 0.0172  lr: 0.001  max_mem: 8226M
[32m[12/07 22:17:32 d2.utils.events]: [0m eta: 1:32:20  iter: 23239  total_loss: 0.5682  loss_cls: 0.09225  loss_box_reg: 0.2214  loss_mask: 0.195  loss_rpn_cls: 0.004587  loss_rpn_loc: 0.02665  time: 0.8200  data_time: 0.0171  lr:

[32m[12/07 22:25:29 d2.utils.events]: [0m eta: 1:24:26  iter: 23819  total_loss: 0.5125  loss_cls: 0.09174  loss_box_reg: 0.1952  loss_mask: 0.1766  loss_rpn_cls: 0.007746  loss_rpn_loc: 0.02933  time: 0.8203  data_time: 0.0162  lr: 0.001  max_mem: 8226M
[32m[12/07 22:25:45 d2.utils.events]: [0m eta: 1:24:10  iter: 23839  total_loss: 0.5306  loss_cls: 0.08492  loss_box_reg: 0.2096  loss_mask: 0.1882  loss_rpn_cls: 0.005677  loss_rpn_loc: 0.02389  time: 0.8203  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 22:26:02 d2.utils.events]: [0m eta: 1:23:53  iter: 23859  total_loss: 0.4665  loss_cls: 0.07248  loss_box_reg: 0.1831  loss_mask: 0.1715  loss_rpn_cls: 0.006073  loss_rpn_loc: 0.02548  time: 0.8203  data_time: 0.0168  lr: 0.001  max_mem: 8226M
[32m[12/07 22:26:18 d2.utils.events]: [0m eta: 1:23:34  iter: 23879  total_loss: 0.5079  loss_cls: 0.07976  loss_box_reg: 0.1942  loss_mask: 0.2036  loss_rpn_cls: 0.004838  loss_rpn_loc: 0.02241  time: 0.8202  data_time: 0.0170

[32m[12/07 22:34:13 d2.utils.events]: [0m eta: 1:15:33  iter: 24459  total_loss: 0.4979  loss_cls: 0.08027  loss_box_reg: 0.1878  loss_mask: 0.1877  loss_rpn_cls: 0.004866  loss_rpn_loc: 0.0245  time: 0.8199  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 22:34:29 d2.utils.events]: [0m eta: 1:15:18  iter: 24479  total_loss: 0.4299  loss_cls: 0.06675  loss_box_reg: 0.1905  loss_mask: 0.1721  loss_rpn_cls: 0.00621  loss_rpn_loc: 0.02596  time: 0.8199  data_time: 0.0164  lr: 0.001  max_mem: 8226M
[32m[12/07 22:34:46 d2.utils.events]: [0m eta: 1:15:03  iter: 24499  total_loss: 0.5686  loss_cls: 0.0934  loss_box_reg: 0.244  loss_mask: 0.2008  loss_rpn_cls: 0.003829  loss_rpn_loc: 0.02694  time: 0.8199  data_time: 0.0173  lr: 0.001  max_mem: 8226M
[32m[12/07 22:35:02 d2.utils.events]: [0m eta: 1:14:53  iter: 24519  total_loss: 0.5134  loss_cls: 0.08546  loss_box_reg: 0.2053  loss_mask: 0.2087  loss_rpn_cls: 0.00622  loss_rpn_loc: 0.02412  time: 0.8199  data_time: 0.0173  lr:

[32m[12/07 22:42:05 d2.evaluation.evaluator]: [0mInference done 417/5275. 0.0566 s / img. ETA=0:05:00
[32m[12/07 22:42:10 d2.evaluation.evaluator]: [0mInference done 500/5275. 0.0565 s / img. ETA=0:04:54
[32m[12/07 22:42:15 d2.evaluation.evaluator]: [0mInference done 582/5275. 0.0566 s / img. ETA=0:04:49
[32m[12/07 22:42:20 d2.evaluation.evaluator]: [0mInference done 665/5275. 0.0566 s / img. ETA=0:04:43
[32m[12/07 22:42:25 d2.evaluation.evaluator]: [0mInference done 747/5275. 0.0566 s / img. ETA=0:04:38
[32m[12/07 22:42:30 d2.evaluation.evaluator]: [0mInference done 830/5275. 0.0565 s / img. ETA=0:04:32
[32m[12/07 22:42:35 d2.evaluation.evaluator]: [0mInference done 912/5275. 0.0565 s / img. ETA=0:04:27
[32m[12/07 22:42:40 d2.evaluation.evaluator]: [0mInference done 994/5275. 0.0565 s / img. ETA=0:04:22
[32m[12/07 22:42:45 d2.evaluation.evaluator]: [0mInference done 1078/5275. 0.0565 s / img. ETA=0:04:17
[32m[12/07 22:42:50 d2.evaluation.evaluator]: [0mInference do

Loading and preparing results...
DONE (t=0.25s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *segm*
COCOeval_opt.evaluate() finished in 0.56 seconds.
Accumulating evaluation results...
COCOeval_opt.accumulate() finished in 0.06 seconds.
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.395
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.743
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.395
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.241
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.628
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.711
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.274
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.452
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.46

[32m[12/07 22:53:40 d2.utils.events]: [0m eta: 1:01:33  iter: 25479  total_loss: 0.503  loss_cls: 0.07897  loss_box_reg: 0.2103  loss_mask: 0.188  loss_rpn_cls: 0.005061  loss_rpn_loc: 0.02165  time: 0.8194  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 22:53:57 d2.utils.events]: [0m eta: 1:01:17  iter: 25499  total_loss: 0.5517  loss_cls: 0.09094  loss_box_reg: 0.2025  loss_mask: 0.205  loss_rpn_cls: 0.006211  loss_rpn_loc: 0.02493  time: 0.8194  data_time: 0.0169  lr: 0.001  max_mem: 8226M
[32m[12/07 22:54:13 d2.utils.events]: [0m eta: 1:01:01  iter: 25519  total_loss: 0.5709  loss_cls: 0.0855  loss_box_reg: 0.2322  loss_mask: 0.2006  loss_rpn_cls: 0.006118  loss_rpn_loc: 0.02664  time: 0.8194  data_time: 0.0171  lr: 0.001  max_mem: 8226M
[32m[12/07 22:54:29 d2.utils.events]: [0m eta: 1:00:43  iter: 25539  total_loss: 0.5239  loss_cls: 0.08131  loss_box_reg: 0.2054  loss_mask: 0.2207  loss_rpn_cls: 0.004779  loss_rpn_loc: 0.02005  time: 0.8194  data_time: 0.0171  lr

[32m[12/07 23:02:24 d2.utils.events]: [0m eta: 0:52:53  iter: 26119  total_loss: 0.5068  loss_cls: 0.0828  loss_box_reg: 0.1964  loss_mask: 0.1758  loss_rpn_cls: 0.005553  loss_rpn_loc: 0.02287  time: 0.8192  data_time: 0.0171  lr: 0.001  max_mem: 8226M
[32m[12/07 23:02:40 d2.utils.events]: [0m eta: 0:52:36  iter: 26139  total_loss: 0.5206  loss_cls: 0.08254  loss_box_reg: 0.2089  loss_mask: 0.1882  loss_rpn_cls: 0.0052  loss_rpn_loc: 0.0278  time: 0.8192  data_time: 0.0169  lr: 0.001  max_mem: 8226M
[32m[12/07 23:02:57 d2.utils.events]: [0m eta: 0:52:20  iter: 26159  total_loss: 0.4783  loss_cls: 0.07218  loss_box_reg: 0.1946  loss_mask: 0.1816  loss_rpn_cls: 0.006338  loss_rpn_loc: 0.02882  time: 0.8192  data_time: 0.0172  lr: 0.001  max_mem: 8226M
[32m[12/07 23:03:13 d2.utils.events]: [0m eta: 0:52:03  iter: 26179  total_loss: 0.5486  loss_cls: 0.09665  loss_box_reg: 0.2337  loss_mask: 0.1923  loss_rpn_cls: 0.006817  loss_rpn_loc: 0.02919  time: 0.8193  data_time: 0.0169  lr

[32m[12/07 23:11:08 d2.utils.events]: [0m eta: 0:44:05  iter: 26759  total_loss: 0.5088  loss_cls: 0.07855  loss_box_reg: 0.2026  loss_mask: 0.1885  loss_rpn_cls: 0.004077  loss_rpn_loc: 0.02478  time: 0.8190  data_time: 0.0167  lr: 0.001  max_mem: 8226M
[32m[12/07 23:11:24 d2.utils.events]: [0m eta: 0:43:49  iter: 26779  total_loss: 0.4996  loss_cls: 0.07868  loss_box_reg: 0.2021  loss_mask: 0.1753  loss_rpn_cls: 0.00661  loss_rpn_loc: 0.02885  time: 0.8190  data_time: 0.0174  lr: 0.001  max_mem: 8226M
[32m[12/07 23:11:41 d2.utils.events]: [0m eta: 0:43:35  iter: 26799  total_loss: 0.5859  loss_cls: 0.09078  loss_box_reg: 0.2316  loss_mask: 0.2073  loss_rpn_cls: 0.005258  loss_rpn_loc: 0.02399  time: 0.8191  data_time: 0.0168  lr: 0.001  max_mem: 8226M
[32m[12/07 23:11:57 d2.utils.events]: [0m eta: 0:43:17  iter: 26819  total_loss: 0.4899  loss_cls: 0.06862  loss_box_reg: 0.2104  loss_mask: 0.1951  loss_rpn_cls: 0.005386  loss_rpn_loc: 0.02794  time: 0.8190  data_time: 0.0167 

[32m[12/07 23:19:52 d2.utils.events]: [0m eta: 0:35:26  iter: 27399  total_loss: 0.5282  loss_cls: 0.08092  loss_box_reg: 0.2088  loss_mask: 0.1994  loss_rpn_cls: 0.006036  loss_rpn_loc: 0.03344  time: 0.8189  data_time: 0.0169  lr: 0.001  max_mem: 8226M
[32m[12/07 23:20:08 d2.utils.events]: [0m eta: 0:35:10  iter: 27419  total_loss: 0.5107  loss_cls: 0.08106  loss_box_reg: 0.2099  loss_mask: 0.1987  loss_rpn_cls: 0.005618  loss_rpn_loc: 0.02131  time: 0.8189  data_time: 0.0164  lr: 0.001  max_mem: 8226M
[32m[12/07 23:20:24 d2.utils.events]: [0m eta: 0:34:54  iter: 27439  total_loss: 0.4717  loss_cls: 0.07151  loss_box_reg: 0.1891  loss_mask: 0.1942  loss_rpn_cls: 0.003717  loss_rpn_loc: 0.02058  time: 0.8189  data_time: 0.0169  lr: 0.001  max_mem: 8226M
[32m[12/07 23:20:40 d2.utils.events]: [0m eta: 0:34:37  iter: 27459  total_loss: 0.4882  loss_cls: 0.07153  loss_box_reg: 0.1922  loss_mask: 0.1857  loss_rpn_cls: 0.006629  loss_rpn_loc: 0.02207  time: 0.8188  data_time: 0.0170

[32m[12/07 23:28:36 d2.utils.events]: [0m eta: 0:26:46  iter: 28039  total_loss: 0.5656  loss_cls: 0.09786  loss_box_reg: 0.2213  loss_mask: 0.2128  loss_rpn_cls: 0.004532  loss_rpn_loc: 0.02259  time: 0.8188  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 23:28:52 d2.utils.events]: [0m eta: 0:26:30  iter: 28059  total_loss: 0.5405  loss_cls: 0.08123  loss_box_reg: 0.2119  loss_mask: 0.1829  loss_rpn_cls: 0.004829  loss_rpn_loc: 0.02995  time: 0.8189  data_time: 0.0167  lr: 0.001  max_mem: 8226M
[32m[12/07 23:29:09 d2.utils.events]: [0m eta: 0:26:15  iter: 28079  total_loss: 0.5557  loss_cls: 0.08543  loss_box_reg: 0.2227  loss_mask: 0.1923  loss_rpn_cls: 0.005483  loss_rpn_loc: 0.02629  time: 0.8189  data_time: 0.0166  lr: 0.001  max_mem: 8226M
[32m[12/07 23:29:25 d2.utils.events]: [0m eta: 0:25:57  iter: 28099  total_loss: 0.5797  loss_cls: 0.09354  loss_box_reg: 0.2239  loss_mask: 0.2206  loss_rpn_cls: 0.00374  loss_rpn_loc: 0.02657  time: 0.8189  data_time: 0.0172 

[32m[12/07 23:37:21 d2.utils.events]: [0m eta: 0:18:02  iter: 28679  total_loss: 0.5006  loss_cls: 0.07378  loss_box_reg: 0.1977  loss_mask: 0.1799  loss_rpn_cls: 0.003718  loss_rpn_loc: 0.02288  time: 0.8189  data_time: 0.0171  lr: 0.001  max_mem: 8226M
[32m[12/07 23:37:38 d2.utils.events]: [0m eta: 0:17:46  iter: 28699  total_loss: 0.5068  loss_cls: 0.08161  loss_box_reg: 0.2084  loss_mask: 0.1781  loss_rpn_cls: 0.005217  loss_rpn_loc: 0.02693  time: 0.8190  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 23:37:54 d2.utils.events]: [0m eta: 0:17:30  iter: 28719  total_loss: 0.4481  loss_cls: 0.0617  loss_box_reg: 0.1753  loss_mask: 0.1785  loss_rpn_cls: 0.006339  loss_rpn_loc: 0.02555  time: 0.8189  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 23:38:11 d2.utils.events]: [0m eta: 0:17:13  iter: 28739  total_loss: 0.5744  loss_cls: 0.08681  loss_box_reg: 0.2178  loss_mask: 0.2004  loss_rpn_cls: 0.006167  loss_rpn_loc: 0.02883  time: 0.8189  data_time: 0.0165 

[32m[12/07 23:46:07 d2.utils.events]: [0m eta: 0:09:18  iter: 29319  total_loss: 0.5362  loss_cls: 0.08057  loss_box_reg: 0.2219  loss_mask: 0.1811  loss_rpn_cls: 0.004613  loss_rpn_loc: 0.02282  time: 0.8191  data_time: 0.0171  lr: 0.001  max_mem: 8226M
[32m[12/07 23:46:24 d2.utils.events]: [0m eta: 0:09:01  iter: 29339  total_loss: 0.4932  loss_cls: 0.07986  loss_box_reg: 0.1992  loss_mask: 0.1792  loss_rpn_cls: 0.005051  loss_rpn_loc: 0.02275  time: 0.8191  data_time: 0.0170  lr: 0.001  max_mem: 8226M
[32m[12/07 23:46:40 d2.utils.events]: [0m eta: 0:08:45  iter: 29359  total_loss: 0.5059  loss_cls: 0.08218  loss_box_reg: 0.2173  loss_mask: 0.1853  loss_rpn_cls: 0.005868  loss_rpn_loc: 0.0272  time: 0.8191  data_time: 0.0173  lr: 0.001  max_mem: 8226M
[32m[12/07 23:46:57 d2.utils.events]: [0m eta: 0:08:29  iter: 29379  total_loss: 0.5312  loss_cls: 0.08424  loss_box_reg: 0.2163  loss_mask: 0.2058  loss_rpn_cls: 0.004675  loss_rpn_loc: 0.02333  time: 0.8191  data_time: 0.0171 

[32m[12/07 23:54:52 d2.utils.events]: [0m eta: 0:00:32  iter: 29959  total_loss: 0.5332  loss_cls: 0.08542  loss_box_reg: 0.2056  loss_mask: 0.2141  loss_rpn_cls: 0.004488  loss_rpn_loc: 0.02102  time: 0.8191  data_time: 0.0167  lr: 0.001  max_mem: 8226M
[32m[12/07 23:55:09 d2.utils.events]: [0m eta: 0:00:16  iter: 29979  total_loss: 0.4951  loss_cls: 0.08194  loss_box_reg: 0.2135  loss_mask: 0.1737  loss_rpn_cls: 0.006409  loss_rpn_loc: 0.02713  time: 0.8191  data_time: 0.0173  lr: 0.001  max_mem: 8226M
[32m[12/07 23:55:31 d2.data.datasets.coco]: [0mLoaded 5275 images in COCO format from /application/input/test_annotations_equal.json
[32m[12/07 23:55:31 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[12/07 23:55:31 d2.data.common]: [0mSerializing 5275 elements to byte tensors and concatenating them all ...
[32m[12/07 23:55:31 d2.data.common]: [0mSerial

[32m[12/08 00:00:59 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:04:59 (0.056803 s / img per device, on 1 devices)
[32m[12/08 00:00:59 d2.evaluation.coco_evaluation]: [0mPreparing results for COCO format ...
[32m[12/08 00:00:59 d2.evaluation.coco_evaluation]: [0mSaving results to coco_eval/coco_instances_results.json
[32m[12/08 00:00:59 d2.evaluation.coco_evaluation]: [0mEvaluating predictions with unofficial COCO API...
Loading and preparing results...
DONE (t=0.03s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
COCOeval_opt.evaluate() finished in 0.62 seconds.
Accumulating evaluation results...
COCOeval_opt.accumulate() finished in 0.06 seconds.
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.512
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.822
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.535
 Average Precision  (AP)

[32m[12/08 00:03:23 d2.evaluation.evaluator]: [0mInference done 2293/5275. 0.0568 s / img. ETA=0:03:03
[32m[12/08 00:03:28 d2.evaluation.evaluator]: [0mInference done 2372/5275. 0.0568 s / img. ETA=0:02:59
[32m[12/08 00:03:33 d2.evaluation.evaluator]: [0mInference done 2453/5275. 0.0568 s / img. ETA=0:02:54
[32m[12/08 00:03:38 d2.evaluation.evaluator]: [0mInference done 2533/5275. 0.0568 s / img. ETA=0:02:49
[32m[12/08 00:03:43 d2.evaluation.evaluator]: [0mInference done 2615/5275. 0.0568 s / img. ETA=0:02:44
[32m[12/08 00:03:48 d2.evaluation.evaluator]: [0mInference done 2696/5275. 0.0568 s / img. ETA=0:02:39
[32m[12/08 00:03:53 d2.evaluation.evaluator]: [0mInference done 2779/5275. 0.0568 s / img. ETA=0:02:34
[32m[12/08 00:03:58 d2.evaluation.evaluator]: [0mInference done 2860/5275. 0.0568 s / img. ETA=0:02:29
[32m[12/08 00:04:04 d2.evaluation.evaluator]: [0mInference done 2943/5275. 0.0568 s / img. ETA=0:02:23
[32m[12/08 00:04:09 d2.evaluation.evaluator]: [0mInfe

In [10]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

cfg.DATASETS.TRAIN = ("my_dataset_train_v2",) 
cfg.DATASETS.TEST = ("my_dataset_val_v2",)
cfg.TEST.EVAL_PERIOD = 5000
cfg.DATALOADER.NUM_WORKERS = 4 ## 4 per gpu
cfg.SOLVER.IMS_PER_BATCH = 10
cfg.SOLVER.BASE_LR = 0.001  # pick a good LR
cfg.SOLVER.MAX_ITER = 105000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ship)
cfg.MAX_SIZE_TRAIN = 756 #Max image size 
cfg.SOLVER.STEPS=(70000, 105000) #reduce gradually lr until 52500. 
cfg.OUTPUT_DIR = "./runs/run_50_anchortest"
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[16, 32, 64, 128, 256, 512]]
cfg.SOLVER.AMP.ENABLED = True  # Automatic Mixed Precision

In [None]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CocoTrainer(cfg) 
trainer.resume_or_load(resume=True) #True takes last checkpoint file which is saved below.
trainer.train() #Trainer will throw out non-annotated pictures. 

[32m[12/09 11:59:53 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[12/09 11:59:54 d2.data.datasets.coco]: [0mLoading /application/input/train_annotations_equal.json takes 1.40 seconds.
[32m[12/09 11:59:55 d2.data.datasets.coco]: [0mLoaded 100233 images in COCO format from /application/input/train_annotations_equal.json
[32m[12/09 11:59:56 d2.data.build]: [0mRemoved 59762 images with no usable annotations. 40471 images left.
[32m[12/09 11:59:58 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[12/09 11:59:58 d2.data.common]: [0mSerializing 40471 elements to byte tensors and concatenating them all ...
[32m[12/09 11:59:58 d2.data.common]: [0mSerialized dataset takes 20.74 MiB
[32m[12/09 11:59:59 d2.engine.train_loop]: [0mStarting training from iteration 90000
[32m[12/09 12:00:29 d2.utils.events]: [0m eta: 6:12:18  iter: 90019  total_loss: 0.2678  loss_cls: 0.03559  loss_box_reg: 0.08752  loss_mask: 0.1152  loss_rpn_cls: 0.002394  loss_rpn_loc: 0.01846  time: 1.4973  data_time: 0.1530  lr: 0.001  max_mem: 7821M
[32m[12/0

[32m[12/09 12:16:11 d2.utils.events]: [0m eta: 6:13:58  iter: 90619  total_loss: 0.2711  loss_cls: 0.03426  loss_box_reg: 0.08927  loss_mask: 0.1224  loss_rpn_cls: 0.002596  loss_rpn_loc: 0.02163  time: 1.5663  data_time: 0.1343  lr: 0.001  max_mem: 7821M
[32m[12/09 12:16:43 d2.utils.events]: [0m eta: 6:14:02  iter: 90639  total_loss: 0.2931  loss_cls: 0.03634  loss_box_reg: 0.09527  loss_mask: 0.1304  loss_rpn_cls: 0.002045  loss_rpn_loc: 0.01572  time: 1.5677  data_time: 0.1282  lr: 0.001  max_mem: 7821M
[32m[12/09 12:17:15 d2.utils.events]: [0m eta: 6:13:31  iter: 90659  total_loss: 0.2786  loss_cls: 0.03831  loss_box_reg: 0.08791  loss_mask: 0.1325  loss_rpn_cls: 0.00206  loss_rpn_loc: 0.01391  time: 1.5679  data_time: 0.1203  lr: 0.001  max_mem: 7821M
[32m[12/09 12:17:46 d2.utils.events]: [0m eta: 6:13:06  iter: 90679  total_loss: 0.2874  loss_cls: 0.03698  loss_box_reg: 0.08798  loss_mask: 0.1368  loss_rpn_cls: 0.002485  loss_rpn_loc: 0.01818  time: 1.5684  data_time: 0.1

In [1]:
# Create a submission for kaggle. 

# There will be an overload error: https://github.com/pytorch/vision/pull/2705
# With classifier predictions included
!python module_submit.py --model_path="runs/run_50_anchortest" \\
--submit_csv="submit_50_anchortest4.csv" \\
--score_thres=0.8 \\
--ship_proba_csv="test_ship_proba.csv" \\
--anchor_sizes="small"

# By itself
!python module_submit.py --model_path="runs/run_50_anchortest" \\ 
--submit_csv="submit_50_anchortest_5.csv" \\ 
--score_thres=0.8 \\ 
--anchor_sizes="small"


1.6.0 True
Start creating predictions, dataset from: input/test_v2/
Combining classifier result: test_ship_proba.csv
{'file_name': 'input/test_v2/000367c13.jpg', 'image_id': '000367c13'}
	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /opt/conda/conda-bld/pytorch_1595629403081/work/torch/csrc/utils/python_arg_parser.cpp:766.)
  filter_inds = filter_mask.nonzero()
0 3433
1000 3433
2000 3433
3000 3433
0 3433
1000 3433
2000 3433
3000 3433
Detectron2:  3861 instances,  2637 images
Detectron2:  3861 instances,  2637 images
Done!
1.6.0 True
Start creating predictions, dataset from: input/test_v2/
{'file_name': 'input/test_v2/00002bd58.jpg', 'image_id': '00002bd58'}
	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /opt/conda/conda-bld/pytorch_1595629403081/work/torch/csrc/utils/python_arg_parser.cpp:766.)
  filter_inds = filter_mask.nonzero()
0 15606
1