# Import Cool Stuff

In [1]:
from __future__ import print_function

from collections import defaultdict, deque
import datetime
import pickle
import time
import torch.distributed as dist
import errno

import collections
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image, ImageFile
import pandas as pd
from tqdm import tqdm
from torchvision import transforms
import torchvision
import random
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

ImageFile.LOAD_TRUNCATED_IMAGES = True

# Utility Functions (hidden)

In [2]:
class SmoothedValue(object):
    """Track a series of values and provide access to smoothed values over a
    window or the global series average.
    """

    def __init__(self, window_size=20, fmt=None):
        if fmt is None:
            fmt = "{median:.4f} ({global_avg:.4f})"
        self.deque = deque(maxlen=window_size)
        self.total = 0.0
        self.count = 0
        self.fmt = fmt

    def update(self, value, n=1):
        self.deque.append(value)
        self.count += n
        self.total += value * n

    def synchronize_between_processes(self):
        """
        Warning: does not synchronize the deque!
        """
        if not is_dist_avail_and_initialized():
            return
        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
        self.count = int(t[0])
        self.total = t[1]

    @property
    def median(self):
        d = torch.tensor(list(self.deque))
        return d.median().item()

    @property
    def avg(self):
        d = torch.tensor(list(self.deque), dtype=torch.float32)
        return d.mean().item()

    @property
    def global_avg(self):
        return self.total / self.count

    @property
    def max(self):
        return max(self.deque)

    @property
    def value(self):
        return self.deque[-1]

    def __str__(self):
        return self.fmt.format(
            median=self.median,
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
            value=self.value)


def all_gather(data):
    """
    Run all_gather on arbitrary picklable data (not necessarily tensors)
    Args:
        data: any picklable object
    Returns:
        list[data]: list of data gathered from each rank
    """
    world_size = get_world_size()
    if world_size == 1:
        return [data]

    # serialized to a Tensor
    buffer = pickle.dumps(data)
    storage = torch.ByteStorage.from_buffer(buffer)
    tensor = torch.ByteTensor(storage).to("cuda")

    # obtain Tensor size of each rank
    local_size = torch.tensor([tensor.numel()], device="cuda")
    size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
    dist.all_gather(size_list, local_size)
    size_list = [int(size.item()) for size in size_list]
    max_size = max(size_list)

    # receiving Tensor from all ranks
    # we pad the tensor because torch all_gather does not support
    # gathering tensors of different shapes
    tensor_list = []
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
        data_list.append(pickle.loads(buffer))

    return data_list


def reduce_dict(input_dict, average=True):
    """
    Args:
        input_dict (dict): all the values will be reduced
        average (bool): whether to do average or sum
    Reduce the values in the dictionary from all processes so that all processes
    have the averaged results. Returns a dict with the same fields as
    input_dict, after reduction.
    """
    world_size = get_world_size()
    if world_size < 2:
        return input_dict
    with torch.no_grad():
        names = []
        values = []
        # sort the keys so that they are consistent across processes
        for k in sorted(input_dict.keys()):
            names.append(k)
            values.append(input_dict[k])
        values = torch.stack(values, dim=0)
        dist.all_reduce(values)
        if average:
            values /= world_size
        reduced_dict = {k: v for k, v in zip(names, values)}
    return reduced_dict


class MetricLogger(object):
    def __init__(self, delimiter="\t"):
        self.meters = defaultdict(SmoothedValue)
        self.delimiter = delimiter

    def update(self, **kwargs):
        for k, v in kwargs.items():
            if isinstance(v, torch.Tensor):
                v = v.item()
            assert isinstance(v, (float, int))
            self.meters[k].update(v)

    def __getattr__(self, attr):
        if attr in self.meters:
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, attr))

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
            loss_str.append(
                "{}: {}".format(name, str(meter))
            )
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
        for meter in self.meters.values():
            meter.synchronize_between_processes()

    def add_meter(self, name, meter):
        self.meters[name] = meter

    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
            header = ''
        start_time = time.time()
        end = time.time()
        iter_time = SmoothedValue(fmt='{avg:.4f}')
        data_time = SmoothedValue(fmt='{avg:.4f}')
        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
        log_msg = self.delimiter.join([
            header,
            '[{0' + space_fmt + '}/{1}]',
            'eta: {eta}',
            '{meters}',
            'time: {time}',
            'data: {data}',
            'max mem: {memory:.0f}'
        ])
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
            yield obj
            iter_time.update(time.time() - end)
            if i % print_freq == 0 or i == len(iterable) - 1:
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                print(log_msg.format(
                    i, len(iterable), eta=eta_string,
                    meters=str(self),
                    time=str(iter_time), data=str(data_time),
                    memory=torch.cuda.max_memory_allocated() / MB))
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
        print('{} Total time: {} ({:.4f} s / it)'.format(
            header, total_time_str, total_time / len(iterable)))


def collate_fn(batch):
    return tuple(zip(*batch))


def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):

    def f(x):
        if x >= warmup_iters:
            return 1
        alpha = float(x) / warmup_iters
        return warmup_factor * (1 - alpha) + alpha

    return torch.optim.lr_scheduler.LambdaLR(optimizer, f)


def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise


def setup_for_distributed(is_master):
    """
    This function disables printing when not in master process
    """
    import builtins as __builtin__
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
        force = kwargs.pop('force', False)
        if is_master or force:
            builtin_print(*args, **kwargs)

    __builtin__.print = print


def is_dist_avail_and_initialized():
    if not dist.is_available():
        return False
    if not dist.is_initialized():
        return False
    return True


def get_world_size():
    if not is_dist_avail_and_initialized():
        return 1
    return dist.get_world_size()


def get_rank():
    if not is_dist_avail_and_initialized():
        return 0
    return dist.get_rank()


def is_main_process():
    return get_rank() == 0


def save_on_master(*args, **kwargs):
    if is_main_process():
        torch.save(*args, **kwargs)


def init_distributed_mode(args):
    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
        args.rank = int(os.environ["RANK"])
        args.world_size = int(os.environ['WORLD_SIZE'])
        args.gpu = int(os.environ['LOCAL_RANK'])
    elif 'SLURM_PROCID' in os.environ:
        args.rank = int(os.environ['SLURM_PROCID'])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
        print('Not using distributed mode')
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(
        args.rank, args.dist_url), flush=True)
    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                         world_size=args.world_size, rank=args.rank)
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

# Training Function

In [3]:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

# RLE to Mask

In [4]:
def rle2mask(rle, width, height):
    mask= np.zeros(width* height)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        current_position += start
        mask[current_position:current_position+lengths[index]] = 1
        current_position += lengths[index]

    return mask.reshape(width, height)

# SIIM Dataset Class

In [5]:
class SIIMDataset(torch.utils.data.Dataset):
    def __init__(self, df_path, img_dir):
        self.df = pd.read_csv(df_path)
        self.height = 1024
        self.width = 1024
        self.image_dir = img_dir
        self.image_info = collections.defaultdict(dict)

        counter = 0
        for index, row in tqdm(self.df.iterrows(), total=len(self.df)):
            image_id = row['ImageId']
            image_path = os.path.join(self.image_dir, image_id)
            if os.path.exists(image_path + '.png') and row[" EncodedPixels"].strip() != "-1":
                self.image_info[counter]["image_id"] = image_id
                self.image_info[counter]["image_path"] = image_path
                self.image_info[counter]["annotations"] = row[" EncodedPixels"].strip()
                counter += 1

    def __getitem__(self, idx):
        img_path = self.image_info[idx]["image_path"]
        img = Image.open(img_path + '.png').convert("RGB")
        width, height = img.size
        img = img.resize((self.width, self.height), resample=Image.BILINEAR)
        info = self.image_info[idx]

        mask = rle2mask(info['annotations'], width, height)
        mask = Image.fromarray(mask.T)
        mask = mask.resize((self.width, self.height), resample=Image.BILINEAR)
        mask = np.expand_dims(mask, axis=0)

        pos = np.where(np.array(mask)[0, :, :])
        xmin = np.min(pos[1])
        xmax = np.max(pos[1])
        ymin = np.min(pos[0])
        ymax = np.max(pos[0])

        boxes = torch.as_tensor([[xmin, ymin, xmax, ymax]], dtype=torch.float32)
        labels = torch.ones((1,), dtype=torch.int64)
        masks = torch.as_tensor(mask, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((1,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        img = transforms.ToTensor()(img)
        
        if random.random() < 0.8:
            height, width = img.shape[-2:]
            img = img.flip(-1)
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
            target["boxes"] = bbox
            target["masks"] = target["masks"].flip(-1)
        
        return img, target

    def __len__(self):
        return len(self.image_info)

# Create Dataset

In [6]:
dataset_train = SIIMDataset("../data/train-rle.csv", "../data/train_png/")

100%|██████████| 11582/11582 [00:00<00:00, 14675.66it/s]


In [7]:
len(dataset_train)

3286

# Create Mask-RCNN Model

In [8]:
# create mask rcnn model
num_classes = 2
device = torch.device('cuda:0')

model_ft = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
in_features = model_ft.roi_heads.box_predictor.cls_score.in_features
model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
in_features_mask = model_ft.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
model_ft.to(device)

for param in model_ft.parameters():
    param.requires_grad = True

# Create Data Loader

In [9]:
data_loader = torch.utils.data.DataLoader(
    dataset_train, batch_size=2, shuffle=True, num_workers=8,
    collate_fn=lambda x: tuple(zip(*x)))

# Define Training Parameters

In [10]:
params = [p for p in model_ft.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=5,
                                               gamma=0.1)

# Train Model

In [11]:
num_epochs = 6
for epoch in range(num_epochs):
    train_one_epoch(model_ft, optimizer, data_loader, device, epoch, print_freq=100)
    lr_scheduler.step()

Epoch: [0]  [   0/1643]  eta: 0:42:32  lr: 0.000002  loss: 1.9609 (1.9609)  loss_classifier: 0.4290 (0.4290)  loss_box_reg: 0.0027 (0.0027)  loss_mask: 1.3446 (1.3446)  loss_objectness: 0.1689 (0.1689)  loss_rpn_box_reg: 0.0156 (0.0156)  time: 1.5533  data: 0.6868  max mem: 3183
Epoch: [0]  [ 100/1643]  eta: 0:08:09  lr: 0.000102  loss: 0.7390 (1.1599)  loss_classifier: 0.0920 (0.1788)  loss_box_reg: 0.0446 (0.0343)  loss_mask: 0.5236 (0.8230)  loss_objectness: 0.0423 (0.1113)  loss_rpn_box_reg: 0.0066 (0.0126)  time: 0.3067  data: 0.0056  max mem: 4327
Epoch: [0]  [ 200/1643]  eta: 0:07:31  lr: 0.000202  loss: 0.6877 (0.9398)  loss_classifier: 0.0983 (0.1360)  loss_box_reg: 0.0478 (0.0418)  loss_mask: 0.5145 (0.6738)  loss_objectness: 0.0245 (0.0769)  loss_rpn_box_reg: 0.0084 (0.0112)  time: 0.3107  data: 0.0052  max mem: 4331
Epoch: [0]  [ 300/1643]  eta: 0:06:59  lr: 0.000302  loss: 0.6451 (0.8513)  loss_classifier: 0.0911 (0.1244)  loss_box_reg: 0.0602 (0.0485)  loss_mask: 0.4406 (

Epoch: [1]  [1200/1643]  eta: 0:02:19  lr: 0.001000  loss: 0.5156 (0.5141)  loss_classifier: 0.0852 (0.0844)  loss_box_reg: 0.0482 (0.0441)  loss_mask: 0.3415 (0.3574)  loss_objectness: 0.0192 (0.0201)  loss_rpn_box_reg: 0.0064 (0.0082)  time: 0.3128  data: 0.0049  max mem: 4368
Epoch: [1]  [1300/1643]  eta: 0:01:47  lr: 0.001000  loss: 0.4854 (0.5143)  loss_classifier: 0.0810 (0.0846)  loss_box_reg: 0.0433 (0.0442)  loss_mask: 0.3381 (0.3570)  loss_objectness: 0.0133 (0.0204)  loss_rpn_box_reg: 0.0051 (0.0081)  time: 0.3138  data: 0.0051  max mem: 4372
Epoch: [1]  [1400/1643]  eta: 0:01:16  lr: 0.001000  loss: 0.4602 (0.5127)  loss_classifier: 0.0867 (0.0847)  loss_box_reg: 0.0428 (0.0442)  loss_mask: 0.2882 (0.3549)  loss_objectness: 0.0140 (0.0206)  loss_rpn_box_reg: 0.0066 (0.0083)  time: 0.3134  data: 0.0049  max mem: 4372
Epoch: [1]  [1500/1643]  eta: 0:00:44  lr: 0.001000  loss: 0.4744 (0.5129)  loss_classifier: 0.0931 (0.0845)  loss_box_reg: 0.0444 (0.0442)  loss_mask: 0.2913 (

Epoch: [3]  [ 500/1643]  eta: 0:06:00  lr: 0.001000  loss: 0.3972 (0.4562)  loss_classifier: 0.0662 (0.0787)  loss_box_reg: 0.0326 (0.0426)  loss_mask: 0.2637 (0.3104)  loss_objectness: 0.0165 (0.0165)  loss_rpn_box_reg: 0.0043 (0.0081)  time: 0.3130  data: 0.0049  max mem: 4372
Epoch: [3]  [ 600/1643]  eta: 0:05:28  lr: 0.001000  loss: 0.4159 (0.4584)  loss_classifier: 0.0717 (0.0785)  loss_box_reg: 0.0302 (0.0419)  loss_mask: 0.3024 (0.3133)  loss_objectness: 0.0097 (0.0166)  loss_rpn_box_reg: 0.0051 (0.0080)  time: 0.3131  data: 0.0049  max mem: 4372
Epoch: [3]  [ 700/1643]  eta: 0:04:56  lr: 0.001000  loss: 0.3756 (0.4526)  loss_classifier: 0.0638 (0.0779)  loss_box_reg: 0.0361 (0.0415)  loss_mask: 0.2570 (0.3094)  loss_objectness: 0.0105 (0.0160)  loss_rpn_box_reg: 0.0064 (0.0078)  time: 0.3143  data: 0.0049  max mem: 4372
Epoch: [3]  [ 800/1643]  eta: 0:04:25  lr: 0.001000  loss: 0.4670 (0.4538)  loss_classifier: 0.0759 (0.0780)  loss_box_reg: 0.0377 (0.0413)  loss_mask: 0.2969 (

KeyboardInterrupt: 

# Mask to RLE helper

In [None]:
def mask_to_rle(img, width, height):
    rle = []
    lastColor = 0
    currentPixel = 0
    runStart = -1
    runLength = 0

    for x in range(width):
        for y in range(height):
            currentColor = img[x][y]
            if currentColor != lastColor:
                if currentColor == 1:
                    runStart = currentPixel
                    runLength = 1
                else:
                    rle.append(str(runStart))
                    rle.append(str(runLength))
                    runStart = -1
                    runLength = 0
                    currentPixel = 0
            elif runStart > -1:
                runLength += 1
            lastColor = currentColor
            currentPixel+=1
    return " " + " ".join(rle)

# Convert Model to Evaluation Mode

In [None]:
for param in model_ft.parameters():
    param.requires_grad = False

model_ft.eval();

# Get Test Data

In [None]:
sample_df = pd.read_csv("../data/sample_submission.csv")

# this part was taken from @raddar's kernel: https://www.kaggle.com/raddar/better-sample-submission
masks_ = sample_df.groupby('ImageId')['ImageId'].count().reset_index(name='N')
masks_ = masks_.loc[masks_.N > 1].ImageId.values
###
sample_df = sample_df.drop_duplicates('ImageId', keep='last').reset_index(drop=True)

In [None]:
sample_df['ImageId'].nunique()

In [None]:
sample_df.shape

In [None]:
tt = transforms.ToTensor()
sublist = []
counter = 0
threshold = 0.3
for index, row in tqdm(sample_df.iterrows(), total=len(sample_df)):
    image_id = row['ImageId']
    img_path = os.path.join('../data/test_png', image_id + '.png')
    img = Image.open(img_path).convert("RGB")
    width, height = img.size
    img = img.resize((1024, 1024), resample=Image.BILINEAR)
    img = tt(img)
    result = model_ft([img.to(device)])[0]
    if len(result["masks"]) > 0:
        counter += 1
        mask_added = 0
        for ppx in range(len(result["masks"])):
            if result["scores"][ppx] >= threshold:
                mask_added += 1
                res = transforms.ToPILImage()(result["masks"][ppx].permute(1, 2, 0).cpu().numpy())
                res = np.asarray(res.resize((width, height), resample=Image.BILINEAR))
                res = (res[:, :] * 255. > 127).astype(np.uint8).T
                rle = mask_to_rle(res, width, height)
                sublist.append([image_id, rle])
        if mask_added == 0:
            rle = " -1"
            sublist.append([image_id, rle])
    else:
        rle = " -1"
        sublist.append([image_id, rle])
    

submission_df = pd.DataFrame(sublist, columns=sample_df.columns.values)
submission_df.to_csv("submission.csv", index=False)
print(counter)