In [19]:
import torch
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

In [32]:
import os
import numpy as np
import torch
from PIL import Image


class AirplaneDataset(object):
    def __init__(self, root, transforms):
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join("..", "rcnn-keras", "Airplane_images"))))
        self.annot = list(sorted(os.listdir(os.path.join("..", "rcnn-keras", "Airplanes_Annotations"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join("..", "rcnn-keras", "Airplane_images", self.imgs[idx])
        annot_path = os.path.join("..", "rcnn-keras", "Airplanes_Annotations", self.annot[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        boxes = pd.read_csv(annot_path)
        box_values=[]
        num = 0
        for row in boxes.iterrows():
            x1 = int(row[1][0].split(" ")[0])
            y1 = int(row[1][0].split(" ")[1])
            x2 = int(row[1][0].split(" ")[2])
            y2 = int(row[1][0].split(" ")[3])
            box_values.append([x1, y1, x2, y2])
            num += 1

        # convert everything into a torch.Tensor
        box_values = torch.as_tensor(box_values, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num,), dtype=torch.int64)

        image_id = torch.tensor([idx])
        try:
            area = (box_values[:, 3] - box_values[:, 1]) * (box_values[:, 2] - box_values[:, 0])
        except IndexError as ie:
            box_values = torch.tensor([[0.,0.,0.,0.]])
            area = torch.tensor([[0.,0.,0.,0.]]) * torch.tensor([[0]])
            labels = torch.zeros((1,), dtype=torch.int64)
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num,), dtype=torch.int64)

        target = {}
        target["boxes"] = box_values
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [21]:
from torchvision import transforms as T

def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

In [22]:
from collections import defaultdict, deque
import time
import math
import datetime

def collate_fn(batch):
    return tuple(zip(*batch))

class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)

class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        if torch.cuda.is_available():
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}',
                'max mem: {memory:.0f}'
            ])
        else:
            log_msg = self.delimiter.join([
                header,
                '[{0' + space_fmt + '}/{1}]',
                'eta: {eta}',
                '{meters}',
                'time: {time}',
                'data: {data}'
            ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time),
                        memory=torch.cuda.max_memory_allocated() / MB))
                else:
                    print(log_msg.format(
                        i, len(iterable), eta=eta_string,
                        meters=str(self),
                        time=str(iter_time), data=str(data_time)))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))

def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)

def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict


def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger, losses_reduced

In [23]:
import torch.distributed as dist
import pandas as pd

def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()

In [24]:
def main():
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2
    # use our dataset and defined transformations
    dataset = AirplaneDataset('Airplane', get_transform(train=True))
    dataset_test = AirplaneDataset('Airplane', get_transform(train=False))

    # split the dataset in train and test set
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=4,
        collate_fn=collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=4,
        collate_fn=collate_fn)

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    model.train()

    # move model to the right device
    model.to(device)


    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


    num_epochs = 10

    for epoch in range(num_epochs):
        print(epoch)
        if os.path.exists('faster_rcnn_checkpoint.tar'):
            checkpoint = torch.load('faster_rcnn_checkpoint.tar')
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            epoch = checkpoint['epoch']

        _, loss = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, 'faster_rcnn_checkpoint.tar')
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)
        

    print("That's it!")

[positional arguments error](https://discuss.pytorch.org/t/t-compose-typeerror-call-takes-2-positional-arguments-but-3-were-given/62529) 

In [34]:
main()

0
Epoch: [0]  [  0/342]  eta: 3:03:28  lr: 0.000020  loss: 0.9814 (0.9814)  loss_classifier: 0.8234 (0.8234)  loss_box_reg: 0.1425 (0.1425)  loss_objectness: 0.0071 (0.0071)  loss_rpn_box_reg: 0.0085 (0.0085)  time: 32.1896  data: 0.2668
Loss is inf, stopping training
{'loss_classifier': tensor(0.4268, grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0819, grad_fn=<DivBackward0>), 'loss_objectness': tensor(5.9240, grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(inf, grad_fn=<DivBackward0>)}


NameError: name 'sys' is not defined

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-7-263240bbee7e> in <module>
----> 1 main()

<ipython-input-4-fbb153b4d482> in main()
     50             loss = checkpoint['loss']
     51         # train for one epoch, printing every 10 iterations
---> 52         _, loss = train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
     53         # update the learning rate
     54         lr_scheduler.step()

<ipython-input-5-165e96873bc2> in train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq)
    211         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    212 
--> 213         loss_dict = model(images, targets)
    214 
    215         losses = sum(loss for loss in loss_dict.values())

~/python/virtualenvs/rcnn-pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

~/python/virtualenvs/rcnn-pytorch/lib/python3.7/site-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
     64             original_image_sizes.append((val[0], val[1]))
     65 
---> 66         images, targets = self.transform(images, targets)
     67         features = self.backbone(images.tensors)
     68         if isinstance(features, torch.Tensor):

~/python/virtualenvs/rcnn-pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    548             result = self._slow_forward(*input, **kwargs)
    549         else:
--> 550             result = self.forward(*input, **kwargs)
    551         for hook in self._forward_hooks.values():
    552             hook_result = hook(self, input, result)

~/python/virtualenvs/rcnn-pytorch/lib/python3.7/site-packages/torchvision/models/detection/transform.py in forward(self, images, targets)
     43                                  "of shape [C, H, W], got {}".format(image.shape))
     44             image = self.normalize(image)
---> 45             image, target_index = self.resize(image, target_index)
     46             images[i] = image
     47             if targets is not None and target_index is not None:

~/python/virtualenvs/rcnn-pytorch/lib/python3.7/site-packages/torchvision/models/detection/transform.py in resize(self, image, target)
     96 
     97         bbox = target["boxes"]
---> 98         bbox = resize_boxes(bbox, (h, w), image.shape[-2:])
     99         target["boxes"] = bbox
    100 

~/python/virtualenvs/rcnn-pytorch/lib/python3.7/site-packages/torchvision/models/detection/transform.py in resize_boxes(boxes, original_size, new_size)
    218     ]
    219     ratio_height, ratio_width = ratios
--> 220     xmin, ymin, xmax, ymax = boxes.unbind(1)
    221 
    222     xmin = xmin * ratio_width

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = AirplaneDataset('Airplane', get_transform(train=True))
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=2, shuffle=True, num_workers=4,
 collate_fn=collate_fn)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x) 