# Training Script for MMDectection

In [3]:
from mmcv import Config, mkdir_or_exist
from mmdet.apis import set_random_seed
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
import os

In [7]:
# load the base configuration in to a variable and make changes
cfg = Config.fromfile('/home/liujqian/Documents/repos/mmdetection/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py')

In [8]:
# make changes to the base configuration based on the tutorial given on https://github.com/open-mmlab/mmdetection/blob/master/demo/MMDet_InstanceSeg_Tutorial.ipynb
cfg.dataset_type = 'COCODataset'

cfg.data.test.ann_file = 'coco-formatted-info-val.json'
cfg.data.test.img_prefix = 'webis-webseg-20-screenshots/'
cfg.data.test.classes = ('webpage-segmentation',)
cfg.data.test.seg_prefix = None

cfg.data.train.ann_file = 'coco-formatted-info-train.json'
cfg.data.train.img_prefix = 'webis-webseg-20-screenshots/'
cfg.data.train.classes = ('webpage-segmentation',)
cfg.data.train.seg_prefix = None

cfg.data.val.ann_file = 'coco-formatted-info-val.json'
cfg.data.val.img_prefix = 'webis-webseg-20-screenshots/'
cfg.data.val.classes = ('webpage-segmentation',)
cfg.data.test.seg_prefix = None

# modify num classes of the model in box head and mask head
for dictionary in cfg.model.roi_head.bbox_head:
    dictionary.num_classes = 1
for dictionary in cfg.model.roi_head.mask_head:
    dictionary.num_classes = 1
# We can still the pre-trained Mask RCNN model to obtain a higher performance
cfg.load_from = '/home/liujqian/Documents/projects/page-segmentation/checkpoints/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth'

# Set up working dir to save files and logs.
cfg.work_dir = '/home/liujqian/Documents/projects/page-segmentation/work_dir'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer.lr = 0.02 / 16
cfg.lr_config.warmup = None
cfg.log_config.interval = 10

# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 12
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 12

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

cfg.device = 'cuda'

# We can also use tensorboard to log the training process
cfg.log_config.hooks = [
    dict(type='TextLoggerHook'),
    dict(type='TensorboardLoggerHook')]

Config:
dataset_type = 'COCODataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size_divisor=32),
    dict(type='SegRescale', scale_factor=0.125),
    dict(type='DefaultFormatBundle'),
    dict(
        type='Collect',
        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_r

In [9]:
# Build dataset
datasets = [build_dataset(cfg.data.train)]

# Build the detector
model = build_detector(cfg.model)

# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES

# Create work_dir
mkdir_or_exist(os.path.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)

loading annotations into memory...
Done (t=0.37s)
creating index...
index created!




KeyboardInterrupt: 

2023-01-21 22:48:40,584 - mmdet - INFO - Automatic scaling of learning rate (LR) has been disabled.
2023-01-21 22:48:40,615 - mmdet - INFO - load checkpoint from local path: /home/liujqian/Documents/projects/page-segmentation/checkpoints/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth


loading annotations into memory...
Done (t=0.03s)
creating index...
index created!



size mismatch for roi_head.bbox_head.0.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([2, 1024]).
size mismatch for roi_head.bbox_head.0.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([2]).
size mismatch for roi_head.bbox_head.1.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([2, 1024]).
size mismatch for roi_head.bbox_head.1.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([2]).
size mismatch for roi_head.bbox_head.2.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([2, 1024]).
size mismatch for roi_head.bbox_head.2.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([2]

ValueError: need at least one array to concatenate