In [13]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
!pip install pyyaml==5.1
#!pip uninstall torch
!pip uninstall detectron2
!pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 -f https://download.pytorch.org/whl/torch_stable.html
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html

In [None]:
!pip install python-dotenv
!pip install attrdict

### Setup paths

In [14]:
import os
os.chdir('/content/drive/MyDrive/PyPSA_Africa_images/detect-energy')
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())

import sys

sys.path.append(os.environ.get('PROJECT_ROOT'))
data_path = os.environ.get('PROJECT_DATASETS')

### Register datasets

In [21]:
from itertools import product
from detectron2.data import DatasetCatalog
from detectron2.data import MetadataCatalog
from detectron2.data.datasets import register_coco_instances 
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())
DATASETS_PATH = os.environ.get('PROJECT_DATASETS')

def register_all():
    # register used datasets
    ds_names = ['fake_maxar', 'duke', 'duke_512']
    modes = ['train', 'val']

    for name, mode in product(ds_names, modes):

        ds_name = f'{name}_{mode}'
        json_path = os.path.join(DATASETS_PATH, f'{ds_name}/labels.json')
        ds_path = os.path.join(DATASETS_PATH, f'{ds_name}/data/')

        if ds_name in DatasetCatalog.list():
            DatasetCatalog.remove(ds_name)
            MetadataCatalog.remove(ds_name)

        register_coco_instances(ds_name, {}, json_path, ds_path)

    ds_name = 'manual_maxar_val'
    json_path = os.path.join(DATASETS_PATH, f'{ds_name}/labels.json')
    ds_path = os.path.join(DATASETS_PATH, f'{ds_name}/data/')

    if ds_name in DatasetCatalog.list():
        DatasetCatalog.remove(ds_name)
        MetadataCatalog.remove(ds_name)

    register_coco_instances(ds_name, {}, json_path, ds_path)

register_all()

### Define Trainer

In [25]:
import json
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader, DatasetMapper


class EvalTrainer(DefaultTrainer):
    def __init__(self, cfg):
        super().__init__(cfg)

        if isinstance(cfg.DATASETS.EVAL, str):
            self.eval_datasets = [cfg.DATASETS.EVAL]
        else:
            self.eval_datasets = cfg.DATASETS.EVAL

        # prepare evaluation
        self.eval_loaders = []
        self.evaluators = []
        for dataset in self.eval_datasets:

            loader = build_detection_test_loader(DatasetCatalog.get(dataset), 
                                                 mapper=DatasetMapper(cfg, is_train=False))

            self.eval_loaders.append(loader)
            self.evaluators.append(COCOEvaluator(dataset))


    def after_step(self):
        super().after_step()

        if (self.iter+1) % self.cfg.TEST.INTERVAL == 0:                                   

            for dataset, loader, evaluator in zip(self.DATASETS.EVAL, 
                                                  self.eval_loaders,
                                                  self.evaluators):

                results = inference_on_dataset(self.model,
                                              loader,
                                              evaluator)
                with open(
                    os.path.join(
                        self.cfg.OUTPUT_DIR,
                        'eval_'+dataset+'_iter_'+str(self.iter)+'.json'),
                        'w') as out:
                    json.dump(results, out)

### Function to test parameters

In [26]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
from attrdict import AttrDict


def run_parameters(params):
    print(f'Starting run for parameters: {params}')
    params = AttrDict(params)

    cfg = get_cfg()

    # From Detectron2 Model Zoo
    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/" + params.model_type))

    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

    cfg.DATASETS.TRAIN = ('duke_512_train')
    cfg.DATASETS.TRAIN = ('manual_maxar_val')
    cfg.DATASETS.EVAL = ['manual_maxar_val', 'duke_512_val', 'duke_512_train']

    cfg.TEST.INTERVAL = 5_000
    cfg.SOLVER.MAX_ITER = 100_000
    cfg.SOLVER.STEPS = (70_000, 85_000)

    # setup current parameters
    cfg.SOLVER.IMS_PER_BATCH = params['SOLVER.IMS_PER_BATCH']
    cfg.SOLVER.BASE_LR = params['SOLVER.BASE_LR']
    cfg.SOLVER.MOMENTUM = params['SOLVER.MOMENTUM']
    cfg.SOLVER.WEIGHT_DECAY = params['SOLVER.WEIGHT_DECAY']

    model_name = f"LR_{cfg.SOLVER.BASE_LR}_ \
                   IMSPERBATCH_{cfg.SOLVER.IMS_PER_BATCH} \
                   MOM_{cfg.SOLVER.MOMENTUM} \
                   WEIGHTDECAY_{cfg.SOLVER.WEIGHT_DECAY}"
    cfg.OUTPUT_DIR = '/content/drive/MyDrive/PyPSA_Africa_images/models/' + model_name

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = EvalTrainer(cfg) 
    trainer.resume_or_load(resume=False)

    trainer.train()

### Define parameter grid and run

In [None]:
from sklearn.model_selection import ParameterGrid

parameters = {
    'model_type': ['faster_rcnn_R_50_FPN_3x.yaml', 'faster_rcnn_R_101_FPN_3x.yaml'],
    'SOLVER.BASE_LR': [1e-4, 1e-3, 1e-2],
    'SOLVER.MOMENTUM': [0.9],           # default
    'SOLVER.IMS_PER_BATCH': [16],       # default
    'SOLVER.WEIGHT_DECAY': [0.0001],    # default
    }

parameter_sweep = list(ParameterGrid(parameters))

for params in parameter_sweep:
    run_parameters(params)

Starting run for parameters: {'SOLVER.BASE_LR': 0.0001, 'SOLVER.IMS_PER_BATCH': 16, 'SOLVER.MOMENTUM': 0.9, 'SOLVER.WEIGHT_DECAY': 0.0001, 'model_type': 'faster_rcnn_R_50_FPN_3x.yaml'}
[32m[03/19 15:14:34 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
  

R-50.pkl: 102MB [00:03, 32.5MB/s]                           

[32m[03/19 15:14:39 d2.checkpoint.c2_model_loading]: [0mRenaming Caffe2 weights ......





[32m[03/19 15:14:39 d2.checkpoint.c2_model_loading]: [0mFollowing weights matched with submodule backbone.bottom_up:
| Names in Model    | Names in Checkpoint      | Shapes                                          |
|:------------------|:-------------------------|:------------------------------------------------|
| res2.0.conv1.*    | res2_0_branch2a_{bn_*,w} | (64,) (64,) (64,) (64,) (64,64,1,1)             |
| res2.0.conv2.*    | res2_0_branch2b_{bn_*,w} | (64,) (64,) (64,) (64,) (64,64,3,3)             |
| res2.0.conv3.*    | res2_0_branch2c_{bn_*,w} | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| res2.0.shortcut.* | res2_0_branch1_{bn_*,w}  | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| res2.1.conv1.*    | res2_1_branch2a_{bn_*,w} | (64,) (64,) (64,) (64,) (64,256,1,1)            |
| res2.1.conv2.*    | res2_1_branch2b_{bn_*,w} | (64,) (64,) (64,) (64,) (64,64,3,3)             |
| res2.1.conv3.*    | res2_1_branch2c_{bn_*,w} | (256,) (256,) (256,) (256,) (256,64,1,1)

Some model parameters or buffers are not found in the checkpoint:
[34mbackbone.fpn_lateral2.{bias, weight}[0m
[34mbackbone.fpn_lateral3.{bias, weight}[0m
[34mbackbone.fpn_lateral4.{bias, weight}[0m
[34mbackbone.fpn_lateral5.{bias, weight}[0m
[34mbackbone.fpn_output2.{bias, weight}[0m
[34mbackbone.fpn_output3.{bias, weight}[0m
[34mbackbone.fpn_output4.{bias, weight}[0m
[34mbackbone.fpn_output5.{bias, weight}[0m
[34mproposal_generator.rpn_head.anchor_deltas.{bias, weight}[0m
[34mproposal_generator.rpn_head.conv.{bias, weight}[0m
[34mproposal_generator.rpn_head.objectness_logits.{bias, weight}[0m
[34mroi_heads.box_head.fc1.{bias, weight}[0m
[34mroi_heads.box_head.fc2.{bias, weight}[0m
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls_score.{bias, weight}[0m
The checkpoint state_dict contains keys that are not used by the model:
  [35mfc1000.{bias, weight}[0m
  [35mstem.conv1.bias[0m


[32m[03/19 15:14:39 d2.engine.train_loop]: [0mStarting training from iteration 0


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[32m[03/19 15:15:26 d2.utils.events]: [0m eta: 2 days, 14:51:15  iter: 19  total_loss: 1.554  loss_cls: 0.8526  loss_box_reg: 0.005869  loss_rpn_cls: 0.6823  loss_rpn_loc: 0.01072  time: 2.2973  data_time: 0.9024  lr: 1.9981e-06  max_mem: 11286M
[32m[03/19 15:16:08 d2.utils.events]: [0m eta: 2 days, 13:43:58  iter: 39  total_loss: 1.265  loss_cls: 0.5603  loss_box_reg: 0.004868  loss_rpn_cls: 0.6798  loss_rpn_loc: 0.01094  time: 2.2171  data_time: 0.7132  lr: 3.9961e-06  max_mem: 11286M
[32m[03/19 15:16:39 d2.utils.events]: [0m eta: 2 days, 11:12:57  iter: 59  total_loss: 0.9619  loss_cls: 0.2711  loss_box_reg: 0.004275  loss_rpn_cls: 0.6748  loss_rpn_loc: 0.01147  time: 1.9760  data_time: 0.0810  lr: 5.9941e-06  max_mem: 11286M
[32m[03/19 15:17:09 d2.utils.events]: [0m eta: 1 day, 18:43:32  iter: 79  total_loss: 0.8175  loss_cls: 0.1355  loss_box_reg: 0.00479  loss_rpn_cls: 0.6657  loss_rpn_loc: 0.01078  time: 1.8603  data_time: 0.0776  lr: 7.9921e-06  max_mem: 11286M
