# Faster R-CNN

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Flags.py" data-toc-modified-id="Flags.py-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Flags.py</a></span></li><li><span><a href="#checkpoint.py" data-toc-modified-id="checkpoint.py-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>checkpoint.py</a></span></li><li><span><a href="#utils.py" data-toc-modified-id="utils.py-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>utils.py</a></span><ul class="toc-item"><li><span><a href="#get_network" data-toc-modified-id="get_network-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>get_network</a></span></li><li><span><a href="#get_optimizer" data-toc-modified-id="get_optimizer-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>get_optimizer</a></span></li><li><span><a href="#transform_bbox" data-toc-modified-id="transform_bbox-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>transform_bbox</a></span></li><li><span><a href="#SmoothedValue" data-toc-modified-id="SmoothedValue-3.4"><span class="toc-item-num">3.4&nbsp;&nbsp;</span>SmoothedValue</a></span></li><li><span><a href="#all_gather" data-toc-modified-id="all_gather-3.5"><span class="toc-item-num">3.5&nbsp;&nbsp;</span>all_gather</a></span></li><li><span><a href="#reduce_dict" data-toc-modified-id="reduce_dict-3.6"><span class="toc-item-num">3.6&nbsp;&nbsp;</span>reduce_dict</a></span></li><li><span><a href="#MetricLogger" data-toc-modified-id="MetricLogger-3.7"><span class="toc-item-num">3.7&nbsp;&nbsp;</span>MetricLogger</a></span></li><li><span><a href="#collate_fn" data-toc-modified-id="collate_fn-3.8"><span class="toc-item-num">3.8&nbsp;&nbsp;</span>collate_fn</a></span></li><li><span><a href="#warmup_lr_scheduler" data-toc-modified-id="warmup_lr_scheduler-3.9"><span class="toc-item-num">3.9&nbsp;&nbsp;</span>warmup_lr_scheduler</a></span></li><li><span><a href="#mkdir" data-toc-modified-id="mkdir-3.10"><span class="toc-item-num">3.10&nbsp;&nbsp;</span>mkdir</a></span></li><li><span><a href="#setup_for_distributed" data-toc-modified-id="setup_for_distributed-3.11"><span class="toc-item-num">3.11&nbsp;&nbsp;</span>setup_for_distributed</a></span></li><li><span><a href="#is_dist_avail_and_initialized" data-toc-modified-id="is_dist_avail_and_initialized-3.12"><span class="toc-item-num">3.12&nbsp;&nbsp;</span>is_dist_avail_and_initialized</a></span></li><li><span><a href="#get_world_size" data-toc-modified-id="get_world_size-3.13"><span class="toc-item-num">3.13&nbsp;&nbsp;</span>get_world_size</a></span></li><li><span><a href="#get_rank" data-toc-modified-id="get_rank-3.14"><span class="toc-item-num">3.14&nbsp;&nbsp;</span>get_rank</a></span></li><li><span><a href="#is_main_process" data-toc-modified-id="is_main_process-3.15"><span class="toc-item-num">3.15&nbsp;&nbsp;</span>is_main_process</a></span></li><li><span><a href="#save_on_master" data-toc-modified-id="save_on_master-3.16"><span class="toc-item-num">3.16&nbsp;&nbsp;</span>save_on_master</a></span></li><li><span><a href="#init_distributed_mode" data-toc-modified-id="init_distributed_mode-3.17"><span class="toc-item-num">3.17&nbsp;&nbsp;</span>init_distributed_mode</a></span></li></ul></li><li><span><a href="#dataset.py" data-toc-modified-id="dataset.py-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>dataset.py</a></span></li><li><span><a href="#engine.py" data-toc-modified-id="engine.py-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>engine.py</a></span></li><li><span><a href="#train.py" data-toc-modified-id="train.py-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>train.py</a></span></li><li><span><a href="#test.py" data-toc-modified-id="test.py-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>test.py</a></span></li><li><span><a href="#Reference" data-toc-modified-id="Reference-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Reference</a></span></li></ul></div>

In [1]:
import os
import sys
import fire
import gzip
import yaml
import glob
import errno
import random
import shutil
import platform
import warnings
import collections
from psutil import virtual_memory
from tqdm import tqdm, tqdm_notebook
from collections import defaultdict, deque

In [2]:
import re
import requests
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from PIL import Image

In [3]:
import cv2
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.distributed as dist
import torchvision.transforms as T
from torchvision import transforms, utils
from torch.utils.data import Sampler, Dataset, DataLoader 
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from tensorboardX import SummaryWriter
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [4]:
# 현재 OS 및 라이브러리 버전 체크 체크
current_os = platform.system()
print(f"Current OS: {current_os}")
print(f"CUDA: {torch.cuda.is_available()}")
print(f"Python Version: {platform.python_version()}")
print(f"torch Version: {torch.__version__}")
print(f"torchvision Version: {torchvision.__version__}")

# 중요하지 않은 에러 무시
warnings.filterwarnings(action='ignore')

# 유니코드 깨짐현상 해결
mpl.rcParams['axes.unicode_minus'] = False

# 나눔고딕 폰트 적용
plt.rcParams["font.family"] = 'NanumGothic'

Current OS: Linux
CUDA: True
Python Version: 3.7.10
torch Version: 1.8.2
torchvision Version: 0.9.2


## Flags.py
---

In [5]:
def dict_to_namedtuple(d):
    """
    Convert dictionary to named tuple.
    """
    FLAGSTuple = collections.namedtuple('FLAGS', sorted(d.keys()))

    for k, v in d.items():
        if k == 'PREFIX' or k == 'prefix':
            v = os.path.join('./', v)
        if type(v) is dict:
            d[k] = dict_to_namedtuple(v)
        elif type(v) is str:
            try:
                d[k] = eval(v)
            except:
                d[k] = v
    nt = FLAGSTuple(**d)

    return nt

In [6]:
class Flags:
    """
    Flags object.
    """

    def __init__(self, config_file):
        try:
            with open(config_file, 'r') as f:
                d = yaml.safe_load(f)
        except:
            d = config_file

        self.flags = dict_to_namedtuple(d)

    def get(self):
        return self.flags

In [7]:
config_file = "configs/faster-rcnn.yaml"
cfg = Flags(config_file).get()
cfg

FLAGS(BATCH_SIZE=1, CHECKPOINT='', DATA=FLAGS(TRAIN_ANN_PATH='/DATASET/COCO2017/annotations/instances_train2017.json', TRAIN_IMG_PATH='/DATASET/COCO2017/images/train2017', VALID_ANN_PATH='/DATASET/COCO2017/annotations/instances_val2017.json', VALID_IMG_PATH='/DATASET/COCO2017/images/val2017'), DEVICE='cuda', DROPOUT_RATE=0.1, EARLY_STOPPING_EPOCH=5, LR_SCHEDULER=FLAGS(GAMMA=0.1, STEP_SIZE=5), NETWORK=FLAGS(NAME='Faster R-CNN', PRETRAINED=False), NUM_EPOCHS=1, NUM_WORKERS=0, OPTIMIZER=FLAGS(LR=0.0001, TYPE='Adam', WEIGHT_DECAY_RATE=0.01), PREFIX='././log/FASTER-RCNN', PRINT_EPOCHS=1, PRINT_FREQ=100, SEED=42, SHUFFLE=False, experiment=FLAGS(vis_input='samples/fake.jpg', vis_output='./results'), test_checkpoint='./log/VGG_NP/checkpoints/0020.pth')

## checkpoint.py
---

In [8]:
use_cuda = torch.cuda.is_available()
use_cuda

True

In [9]:
default_checkpoint = {
    "epoch": 0,

    # train
    "train_loss": [],
    "train_loss_classifier": [],
    "train_loss_box_reg": [],
    "train_loss_objectness": [],
    "train_loss_rpn_box_reg": [],

    # valid
    "valid_loss": [],
    "valid_loss_classifier": [],
    "valid_loss_box_reg": [],
    "valid_loss_objectness": [],
    "valid_loss_rpn_box_reg": [],

    "lr": [], 
    "model": {},
    "configs":{},
}

In [10]:
def save_checkpoint(checkpoint, dir="./checkpoints", prefix=""):
    # Padded to 4 digits because of lexical sorting of numbers.
    # e.g. 0009.pth
    filename = "{num:0>4}.pth".format(num=checkpoint["epoch"])
    if not os.path.exists(os.path.join(prefix, dir)):
        os.makedirs(os.path.join(prefix, dir))
    torch.save(checkpoint, os.path.join(prefix, dir, filename))

In [11]:
def load_checkpoint(path, cuda=use_cuda):
    if cuda:
        return torch.load(path)
    else:
        # Load GPU model on CPU
        return torch.load(path, map_location=lambda storage, loc: storage)

In [12]:
def init_tensorboard(name="", base_dir="./tensorboard"):
    return SummaryWriter(os.path.join(name, base_dir))

In [13]:
def write_tensorboard(
    writer,
    epoch,
    train_loss,
    train_accuracy,
    train_precision,
    train_recall,
    valid_loss,
    valid_accuracy,
    valid_precision,
    valid_recall,
    model,
):
    writer.add_scalar("train_loss", train_loss, epoch)
    writer.add_scalar("train_accuracy", train_accuracy, epoch)
    writer.add_scalar("train_precision", train_precision, epoch)
    writer.add_scalar("train_recall", train_recall, epoch)
    writer.add_scalar("valid_loss", valid_loss, epoch)
    writer.add_scalar("valid_accuracy", valid_accuracy, epoch)
    writer.add_scalar("valid_precision", valid_precision, epoch)
    writer.add_scalar("valid_recall", valid_recall, epoch)

    for name, param in model.named_parameters():
        writer.add_histogram(
            "{}".format(name), param.detach().cpu().numpy(), epoch
        )
        if param.grad is not None:
            writer.add_histogram(
                "{}/grad".format(name), param.grad.detach().cpu().numpy(), epoch
            )

In [14]:
checkpoint = (
    load_checkpoint(cfg.CHECKPOINT, cuda=is_cuda)
    if cfg.CHECKPOINT != ""
    else default_checkpoint
)

In [15]:
checkpoint

{'epoch': 0,
 'train_loss': [],
 'train_loss_classifier': [],
 'train_loss_box_reg': [],
 'train_loss_objectness': [],
 'train_loss_rpn_box_reg': [],
 'valid_loss': [],
 'valid_loss_classifier': [],
 'valid_loss_box_reg': [],
 'valid_loss_objectness': [],
 'valid_loss_rpn_box_reg': [],
 'lr': [],
 'model': {},
 'configs': {}}

## utils.py
---

### get_network

In [16]:
def get_network(
    cfg
):
    model = None
    if cfg.NETWORK.NAME == 'Faster R-CNN':
        model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
            pretrained=cfg.NETWORK.PRETRAINED
        )
    else:
        raise NotImplementedError

    return model.to(cfg.DEVICE)

In [17]:
model = get_network(cfg)
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

### get_optimizer

In [18]:
def get_optimizer(
    params,
    cfg
):
    if cfg.OPTIMIZER.TYPE == "Adam":
        optimizer = optim.Adam(
            params,
             lr=cfg.OPTIMIZER.LR, 
             weight_decay=cfg.OPTIMIZER.WEIGHT_DECAY_RATE
        )
    else:
        raise NotImplementedError

    return optimizer

In [54]:
optimizer = get_optimizer(params_to_optimise, cfg)
optimizer_state = checkpoint.get("optimizer")
if optimizer_state:
    optimizer.load_state_dict(optimizer_state)
for param_group in optimizer.param_groups:
    param_group["initial_lr"] = cfg.OPTIMIZER.LR

lr_scheduler = optim.lr_scheduler.StepLR(
    optimizer,
    step_size=cfg.LR_SCHEDULER.STEP_SIZE,
    gamma=cfg.LR_SCHEDULER.GAMMA
)

In [19]:
model_state = checkpoint.get("model")

if model_state:
    model.load_state_dict(model_state)
    print(
        "\033[31m[+] Checkpoint\033[00m\n",
        "Resuming from epoch : {}\n".format(checkpoint["epoch"]),
        "Train Accuracy : {:.5f}\n".format(
            checkpoint["train_accuracy"][-1]),
        "Train Loss : {:.5f}\n".format(checkpoint["train_losses"][-1]),
        "Valid Accuracy : {:.5f}\n".format(
            checkpoint["valid_accuracy"][-1]),
        "Valid Loss : {:.5f}\n".format(checkpoint["valid_losses"][-1]),
    )

params_to_optimise = [
    param for param in model.parameters() if param.requires_grad
]

### transform_bbox

In [20]:
def transform_bbox(bbox):
    """
    COCO Bounding box: (x-top left, y-top left, width, height)
    Pascal VOC Bounding box :(x-top left, y-top left,x-bottom right, y-bottom right)
    """
    xmin = bbox[0]
    ymin = bbox[1]
    xmax = bbox[0]+bbox[2]
    ymax = bbox[1]+bbox[3]
    
    return xmin, ymin, xmax, ymax

### SmoothedValue

In [21]:
class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total],
                         dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)

### all_gather

In [22]:
def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]
    data_list = [None] * world_size
    dist.all_gather_object(data_list, data)
    return data_list

### reduce_dict

In [23]:
def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict

### MetricLogger

In [24]:
class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "\033[32m{}\033[00m: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                '\033[33m[{current_time}]\033[00m ',
                header,
                '({0' + space_fmt + '}/{1})',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                '\033[33m[{current_time}]\033[00m ',
                header,
                '({0' + space_fmt + '}/{1})',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        current_time = str(datetime.datetime.now())[:-7],
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        current_time = str(datetime.datetime.now())[:-7],
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))

In [52]:
MetricLogger(delimiter="  ").meters

defaultdict(__main__.SmoothedValue, {})

### collate_fn

In [25]:
def collate_fn(batch):
    return tuple(zip(*batch))

### warmup_lr_scheduler

In [26]:
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)

### mkdir

In [27]:
def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

### setup_for_distributed

In [28]:
def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print

### is_dist_avail_and_initialized

In [29]:
def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True

### get_world_size

In [30]:
def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()

### get_rank

In [31]:
def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()

### is_main_process

In [32]:
def is_main_process():
    return get_rank() == 0

### save_on_master

In [33]:
def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)

### init_distributed_mode

In [34]:
def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

## dataset.py
---

In [36]:
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [93]:
class CustomCOCODataset(Dataset):
    def __init__(self, img_path=None, ann_path=None, transforms=None):

        if not img_path or not ann_path:
            raise Exception('You must check your image or annotations path')

        self.img_path = img_path
        self.ann_path = ann_path
        self.transforms = transforms

        self.imgs = sorted(glob.glob(os.path.join(self.img_path, '*.jpg')))
        self.anns = COCO(self.ann_path)
        self.anns_ids = self.anns.getCatIds()
        self.anns_iscrowd = False

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx: int):

        #########
        # Image #
        #########
        imgId = int(self.imgs[idx].split('/')[-1].split('.')[0])
        img = Image.open(self.imgs[idx]).convert("RGB")

        # This is RBG data
        img_origin = cv2.imread(self.imgs[idx])
        img_origin = cv2.cvtColor(img_origin, cv2.COLOR_BGR2RGB)

        ################################################################################
        # Target                                                                       #
        # ['segmentation', 'area', 'iscrowd', 'image_id', 'bbox', 'category_id', 'id'] #
        ################################################################################
        annsId = self.anns.getAnnIds(
            imgIds=imgId,
            catIds=self.anns_ids,
            iscrowd=self.anns_iscrowd
        )

        anns = self.anns.loadAnns(annsId)
        print(anns)

        targets = {}
        targets["boxes"] = torch.tensor(
            [utils.transform_bbox(ann['bbox']) for ann in anns])
        targets["labels"] = torch.tensor([ann['category_id'] for ann in anns])
        targets["image_id"] = torch.tensor(anns[0]['image_id'])
        targets["area"] = torch.tensor([ann['area'] for ann in anns])
        targets["iscrowd"] = torch.tensor([ann['iscrowd'] for ann in anns])

        #############
        # Transform #
        #############
        if self.transforms is not None:
            img = self.transforms(img)

        return img, targets

In [94]:
class CustomCOCOSampler(Sampler):
    def __init__(self, data_source):
        self.data_source = data_source

        self.existing_ann = {}
        self.missing_ann = {}
        
        imgIds = {idx:int(path.split('/')[-1].split('.')[0])
                  for idx, path in enumerate(self.data_source.imgs)}

        for idx, imgId in imgIds.items():
            annsId = self.data_source.anns.getAnnIds(
                imgIds=imgId,
                catIds=self.data_source.anns.getCatIds(),
                iscrowd=False,
            )

            anns = self.data_source.anns.loadAnns(annsId)

            if len(anns) <= 0:
                self.missing_ann[idx] = imgId
            else:
                self.existing_ann[idx] = imgId

    def __iter__(self):
        return iter(list(self.existing_ann.keys()))

    def __len__(self):
        return len(self.data_source)

In [95]:
def get_coco_dataset_dataloader(cfg):

    # DATASET
    dataset_train = CustomCOCODataset(
        img_path=cfg.DATA.TRAIN_IMG_PATH,
        ann_path=cfg.DATA.TRAIN_ANN_PATH,
        transforms=T.Compose([
            T.ToTensor(),
            ])
    )

    dataset_valid = CustomCOCODataset(
        img_path=cfg.DATA.VALID_IMG_PATH,
        ann_path=cfg.DATA.VALID_ANN_PATH,
        transforms=T.Compose([
            T.ToTensor(),
            ])
    )

    # SAMPLER
    sampler_train = CustomCOCOSampler(
        data_source=dataset_train
    )

    sampler_valid = CustomCOCOSampler(
        data_source=dataset_valid
    )

    # DATALOADER
    dataloader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=cfg.BATCH_SIZE,
        shuffle=cfg.SHUFFLE, 
        sampler=sampler_train,
        num_workers=cfg.NUM_WORKERS,  # default: 0
        collate_fn=collate_fn
    )

    dataloader_valid = torch.utils.data.DataLoader(
        dataset_valid,
        batch_size=1,  # default: 1 for validation
        shuffle=cfg.SHUFFLE, 
        sampler=sampler_valid,
        num_workers=cfg.NUM_WORKERS,  # default: 0
        collate_fn=collate_fn
    )

    return dataset_train, dataset_valid, dataloader_train, dataloader_valid

In [96]:
dataset_train, dataset_valid, dataloader_train, dataloader_valid = get_coco_dataset_dataloader(cfg)

loading annotations into memory...
Done (t=17.21s)
creating index...
index created!
loading annotations into memory...
Done (t=0.44s)
creating index...
index created!


In [97]:
dataset_train[0]

[{'segmentation': [[500.49, 473.53, 599.73, 419.6, 612.67, 375.37, 608.36, 354.88, 528.54, 269.66, 457.35, 201.71, 420.67, 187.69, 389.39, 192.0, 19.42, 360.27, 1.08, 389.39, 2.16, 427.15, 20.49, 473.53]], 'area': 120057.13925, 'iscrowd': 0, 'image_id': 9, 'bbox': [1.08, 187.69, 611.59, 285.84], 'category_id': 51, 'id': 1038967}, {'segmentation': [[357.03, 69.03, 311.73, 15.1, 550.11, 4.31, 631.01, 62.56, 629.93, 88.45, 595.42, 185.53, 513.44, 230.83, 488.63, 232.99, 437.93, 190.92, 429.3, 189.84, 434.7, 148.85, 410.97, 121.89, 359.19, 74.43, 358.11, 65.8]], 'area': 44434.751099999994, 'iscrowd': 0, 'image_id': 9, 'bbox': [311.73, 4.31, 319.28, 228.68], 'category_id': 51, 'id': 1039564}, {'segmentation': [[249.6, 348.99, 267.67, 311.72, 291.39, 294.78, 304.94, 294.78, 326.4, 283.48, 345.6, 273.32, 368.19, 269.93, 385.13, 268.8, 388.52, 257.51, 393.04, 250.73, 407.72, 240.56, 425.79, 230.4, 441.6, 229.27, 447.25, 237.18, 447.25, 256.38, 456.28, 254.12, 475.48, 263.15, 486.78, 271.06, 49

(tensor([[[0.0078, 0.0039, 0.0000,  ..., 0.5255, 0.5255, 0.5137],
          [0.0196, 0.0118, 0.0039,  ..., 0.5373, 0.5294, 0.5137],
          [0.0039, 0.0000, 0.0000,  ..., 0.5412, 0.5333, 0.5176],
          ...,
          [0.0118, 0.0235, 0.0275,  ..., 0.0000, 0.0157, 0.0392],
          [0.0196, 0.0196, 0.0275,  ..., 0.0510, 0.0235, 0.0118],
          [0.0157, 0.0196, 0.0275,  ..., 0.0039, 0.0078, 0.0078]],
 
         [[0.0941, 0.0902, 0.0863,  ..., 0.6706, 0.6706, 0.6588],
          [0.0980, 0.0902, 0.0863,  ..., 0.6824, 0.6745, 0.6588],
          [0.0824, 0.0784, 0.0745,  ..., 0.6863, 0.6784, 0.6627],
          ...,
          [0.0039, 0.0039, 0.0039,  ..., 0.0431, 0.0392, 0.0157],
          [0.0000, 0.0000, 0.0118,  ..., 0.0314, 0.0078, 0.0078],
          [0.0000, 0.0000, 0.0118,  ..., 0.0118, 0.0118, 0.0196]],
 
         [[0.4275, 0.4275, 0.4235,  ..., 0.7725, 0.7725, 0.7647],
          [0.4392, 0.4392, 0.4353,  ..., 0.7843, 0.7765, 0.7608],
          [0.4314, 0.4275, 0.4353,  ...,

## engine.py
---

In [46]:
import utils

In [61]:
def train_one_epoch(cfg, model, optimizer, data_loader, epoch):

    # now = str(re.sub('[^0-9]', '', str(datetime.datetime.now())))

    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = '\033[31mEpoch: {}\033[00m'.format(epoch)

    lr_scheduler = None

    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, cfg.PRINT_FREQ, header):
        images = list(image.to(cfg.DEVICE) for image in images)
        targets = [{k: v.to(cfg.DEVICE) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])
        break

    return metric_logger

In [62]:
def _get_iou_types(model):
    model_without_ddp = model
    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
        model_without_ddp = model.module
    iou_types = ["bbox"]
    if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
        iou_types.append("segm")
    if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
        iou_types.append("keypoints")
    return iou_types

In [63]:
@torch.no_grad()
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [64]:
import math

In [66]:
for epoch in range(cfg.NUM_EPOCHS):
    metric_logger = train_one_epoch(cfg, model, optimizer, dataloader_train, epoch)
    lr_scheduler.step()
#     evaluate(model, dataloader_valid, device=cfg.DEVICE)

In [72]:
metric_logger.meters

defaultdict(utils.SmoothedValue,
            {'lr': <utils.SmoothedValue at 0x7f8e120626d0>,
             'loss': <utils.SmoothedValue at 0x7f8e11fbcb90>,
             'loss_classifier': <utils.SmoothedValue at 0x7f8e11fbc550>,
             'loss_box_reg': <utils.SmoothedValue at 0x7f8e11fbcfd0>,
             'loss_objectness': <utils.SmoothedValue at 0x7f8e11fbcc10>,
             'loss_rpn_box_reg': <utils.SmoothedValue at 0x7f8e11fbc450>})

In [73]:
metric_logger.meters['lr'].value

1.999e-07

In [75]:
metric_logger.meters['loss'].value

2.0176215171813965

## train.py
---

## test.py

## Reference
---