In [3]:
!python setup.py build_ext --inplace

import os
import random
import math

import numpy as np
import pandas as pd
from PIL import Image, ImageDraw


import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
from torchvision import datasets, models, transforms
from torchvision.models.detection.retinanet import RetinaNet
from torchvision.models.detection.faster_rcnn import FasterRCNN

import  torchvision.transforms.functional as F

from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import xml.etree.ElementTree as ET
import collections
from torchvision.datasets.voc import VisionDataset

from metrics import *

running build_ext
skipping 'compute_overlap.c' Cython extension (up-to-date)
copying build/lib.linux-x86_64-3.8/compute_overlap.cpython-38-x86_64-linux-gnu.so -> 


In [4]:
from collections import defaultdict, deque
import time
import datetime

import torch.distributed as dist
import  torchvision.transforms.functional as F

class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)

class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))
        
        

def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)

def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()

def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger

@torch.no_grad()
def evaluate(model, data_loader, device='cuda'):
    n_threads = torch.get_num_threads()
    torch.set_num_threads(1) # Does it nessesary? Who knows...
    cpu_device = torch.device("cpu")
    inference_res = []
    model.eval()
    
    for images, targets in data_loader:
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        res = targets, outputs
        inference_res.append(res)

    torch.set_num_threads(n_threads)
    return inference_res


def collate_fn(batch):
    """
    This function helps when we have different number of object instances
    in the batches in the dataset.
    """
    return tuple(zip(*batch))

class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target
    
class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)
            bbox = target["boxes"]
            if bbox.shape[0]>0:
                bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
                target["boxes"] = bbox
        return image, target
    
class Resize(object):
    def __init__(self, target_size):
        self.target_size = target_size
    
    def __call__(self, image, target):
        old_height, old_width = image.shape[-2:]
        image = F.resize(image, self.target_size, interpolation=F.InterpolationMode.BILINEAR)
        new_height, new_width = image.shape[-2:]
        bbox = target["boxes"]
        if bbox.shape[0]>0:
            bbox[:, [0, 2]] = bbox[:, [0, 2]] * new_width / old_width
            bbox[:, [1, 3]] = bbox[:, [1, 3]] * new_height / old_height
            target["boxes"] = bbox
        
        
        return image, target 
    
def get_transform(train, target_size):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(ToTensor())
    transforms.append(Resize(target_size)) 
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(RandomHorizontalFlip(0.5))
    return Compose(transforms)

In [11]:
params = {}
params['target_size']=(1500,2000)
params['batch_size'] = 1
params['lr'] = 0.001

voc_root = '../data/TrainingData'

In [12]:
# Reworked class from pytorch (see https://pytorch.org/vision/0.8/_modules/torchvision/datasets/voc.html#VOCDetection)

class LADDDataSET(torchvision.datasets.VisionDataset):
    def __init__(
            self,
            root: str,
            image_set: str,
            transforms: Optional[Callable] = None):     
        super(LADDDataSET, self).__init__(root, transforms=transforms)
        self.image_set = image_set

        voc_root = root
        image_dir = os.path.join(voc_root, 'JPEGImages')
        annotation_dir = os.path.join(voc_root, 'Annotations')

        if not os.path.isdir(voc_root):
            raise RuntimeError('Dataset not found or corrupted.')

        splits_dir = os.path.join(voc_root, 'ImageSets/Main')
        split_f = os.path.join(splits_dir, image_set.rstrip('\n') + '.txt')

        with open(os.path.join(split_f), "r") as f:
            file_names = [x.strip() for x in f.readlines()]

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.annotations = [os.path.join(annotation_dir, x + ".xml") for x in file_names]
        assert (len(self.images) == len(self.annotations))
        
    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is a dictionary of the XML tree.
        """
        img = Image.open(self.images[index]).convert('RGB')
        description = LADDDataSET.parse_voc_xml(
            ET.parse(self.annotations[index]).getroot())

        # get bounding box coordinates 
        num_objs = len(description['annotation']['object'])
        boxes = []
        for l in description['annotation']['object']:
            bb = l['bndbox']
            boxes.append([int(bb['xmin']), int(bb['ymin']), int(bb['xmax']), int(bb['ymax'])])

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)         # there is only one class            
        target["labels"] = labels = torch.ones((num_objs,), dtype=torch.int64)
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    

    def __len__(self) -> int:
        return len(self.images)

    @staticmethod
    def parse_voc_xml(node: ET.Element) -> Dict[str, Any]:
        voc_dict: Dict[str, Any] = {}
        children = list(node)
        if children:
            def_dic: Dict[str, Any] = collections.defaultdict(list)
            for dc in map(LADDDataSET.parse_voc_xml, children):
                for ind, v in dc.items():
                    def_dic[ind].append(v)
            if node.tag == 'annotation':
                def_dic['object'] = [def_dic['object']]
            voc_dict = {
                node.tag:
                    {ind: v[0] if len(v) == 1 else v
                     for ind, v in def_dic.items()}
            }
        if node.text:
            text = node.text.strip()
            if not children:
                voc_dict[node.tag] = text
        return voc_dict

In [13]:
# Pytorch implemenation of retinanet doesn't supports train on Images without any objects (which, probably need to be fixed)
# see https://github.com/pytorch/vision/blob/master/torchvision/models/detection/retinanet.py#L475
# As a temporary solution, yet, we just filtering out empty images

splits_dir = os.path.join(voc_root, 'ImageSets/Main') 
annotation_dir = os.path.join(voc_root, 'Annotations')

with open(os.path.join(splits_dir,'train.txt'), "r") as f:
    file_names = [x.strip() for x in f.readlines()]

non_empty = []
for a in file_names:
    description = LADDDataSET.parse_voc_xml(
        ET.parse(os.path.join(annotation_dir, a + ".xml")).getroot()
    )
    num_objs = len(description['annotation']['object'])
    if num_objs > 0:
        non_empty.append(a+'\n')
        
with open(os.path.join(splits_dir,'train_non_empty.txt'), "w") as f:
    f.writelines(non_empty)

print('Total images '+str(len(file_names)), ' non empty: '+str(len(non_empty)))

Total images 1020  non empty: 987


In [14]:
# test DS
im_idx = 99

dataset = LADDDataSET(voc_root,'test',get_transform(train=True,target_size=params['target_size'])) 
(image,target) = dataset[im_idx] 
im = F.to_pil_image(image)
draw = ImageDraw.Draw(im)

for bb in target['boxes']:
    draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
               (bb[2], bb[1]), (bb[0], bb[1])], width=4, fill=(255, 0, 0))

im.show()

In [15]:
dataset_train = LADDDataSET(voc_root,'train_non_empty',get_transform(train=True,target_size=params['target_size'])) 
dataset_val = LADDDataSET(voc_root,'val',get_transform(train=False,target_size=params['target_size'])) 

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset_train, batch_size=params['batch_size'], shuffle=True, num_workers=4
     ,collate_fn=collate_fn
)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=4
     ,collate_fn=collate_fn
)

In [16]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
    pretrained=False,
    num_classes=2,
    pretrained_backbone=True, 
    min_size=params['target_size'][0],
    max_size=params['target_size'][1],
    trainable_backbone_layers = 0)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/gosha20777/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
24.0%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

60.4%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

95.1%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp

In [21]:
device = torch.device('cuda')
model = model.to(device)

optimizer = torch.optim.SGD(
    model.parameters(), 
    lr=params['lr'],
    momentum=0.9,
    weight_decay=0.0005) 
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1)

In [22]:
for epoch in range(2):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    print ("Train done, evaluating.")
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    inference_res = evaluate(model,data_loader_val)
    print('Inference done, computing mAp : ')
    print(evaluate_res(inference_res, iou_threshold = 0.5, score_threshold = 0.05))    
    print(evaluate_res(inference_res, iou_threshold = 0.6, score_threshold = 0.05))
    print('Epoch Done')
    
torch.save(model.state_dict(), 'weights/resnet50_FRCNN_baseline.pth')

RuntimeError: CUDA out of memory. Tried to allocate 186.00 MiB (GPU 0; 3.82 GiB total capacity; 1.90 GiB already allocated; 197.06 MiB free; 2.10 GiB reserved in total by PyTorch)

In [23]:
dataset_test = LADDDataSET(voc_root,'test',get_transform(train=False,target_size=params['target_size'])) 
data_loader_test = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=1
     ,collate_fn=collate_fn
)

image_idx = 0

cpu_device = torch.device("cpu")
model.eval()
for images, targets in data_loader_test:
    g_images = list(img.to(device) for img in images)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    outputs = model(g_images)

    outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
    res = targets, outputs
    break


im = F.to_pil_image(images[image_idx])
targets
# im = to_pil_image(dataset[10][0])
draw = ImageDraw.Draw(im)

for idx in range(len(outputs[image_idx]['boxes'])):
    width = math.ceil(outputs[image_idx]['scores'][idx]*10)
    bb = outputs[0]['boxes'][idx]
    draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
               (bb[2], bb[1]), (bb[0], bb[1])], width=width, fill=(255, 0, 0))

for bb in targets[image_idx]['boxes'][:10]:
    draw.line([(bb[0], bb[1]), (bb[0], bb[3]), (bb[2], bb[3]),
               (bb[2], bb[1]), (bb[0], bb[1])], width=4, fill=(0,255, 0))
im.show()