# Maskrcnn

A maskrcnn model based on the model here:  
https://colab.research.google.com/github/pytorch/vision/blob/temp-tutorial/tutorials/torchvision_finetuning_instance_segmentation.ipynb#scrollTo=at-h4OWK0aoc 
is used in this notebook.

In [1]:
import os
import numpy as np, pandas as pd
from matplotlib import colors
import torch, torchvision
import torch.utils.data
from PIL import Image

import sys
sys.path.append('/home/jupyter/fastai_dev/dev')
from local.test import *
from local.basics import *
from local.callback.all import *
from local.vision.all import *

### Data

In [2]:
! ls data/

sample_submission.csv  train.csv     train_images.zip
test_images.zip        train_images  understanding_cloud_organization.zip


In [3]:
items = get_image_files('data/train_images/')
items

(#5546) [data/train_images/0a7a247.jpg,data/train_images/2f52d76.jpg,data/train_images/6b272fe.jpg,data/train_images/01eecc1.jpg,data/train_images/f3dad96.jpg,data/train_images/93aafb4.jpg,data/train_images/f157992.jpg,data/train_images/4fa9d86.jpg,data/train_images/c71b0dc.jpg,data/train_images/547ad87.jpg...]

In [4]:
def load_train_annotation(fpath):
    df = pd.read_csv(fpath)
    df['Image'] = df.Image_Label.apply(lambda o: o.split('.')[0])
    df['Label'] = df.Image_Label.apply(lambda o: o.split('_')[1].lower())
    df.drop('Image_Label', axis=1, inplace=True)
    df = df[['Image', 'Label', 'EncodedPixels']]
    return df

annots = load_train_annotation('data/train.csv')

In [5]:
def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    
    Copied from https://www.kaggle.com/artgor/segmentation-in-pytorch-using-convenient-tools
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')


class CloudTypesImage(Tuple):
    def show(self, ax=None, figsize=None):
        imgid, img, masks = self
        if ax is None: _, ax = plt.subplots(figsize=figsize)
        ax.imshow(img)
        for cloud, m, in masks.items():
            if m.sum() == 0: continue
            m = np.ma.masked_where(m < 1, m)
            ax.imshow(m, alpha=.7, 
                      cmap=colors.ListedColormap([COLORS[cloud]]))
        present_clouds = [cloud for cloud, m in masks.items() if m.sum() > 0]
        ax.set_title(f"{imgid}:{','.join(present_clouds)}")
        ax.axis('off')
        
            
class CloudTypesTfm(Transform):
    def __init__(self, items, annots):
        self.items, annots = items, annots
        
    def encodes(self, i):
        fn = self.items[i]
        img = PILImage.create(fn)
        
        imgid = fn.stem
        df = annots[annots.Image==imgid]
        df.EncodedPixels.fillna(value='', inplace=True)
        df.loc[:,'Mask'] = df.EncodedPixels.apply(partial(rle_decode, shape=img.shape))
        masks = {o:df[df.Label==o].Mask.values[0] for o in df.Label}
        return imgid, img, masks
        
    def decodes(self, o): return CloudTypesImage(*o)

In [6]:
CATS = Category.create(['fish', 'flower', 'gravel', 'sugar'], add_na=True)
COLORS = dict(fish='b', flower='r', gravel='y', sugar='c')

def get_random_cmap(length):
    return colors.ListedColormap([np.random.rand(3,) for _ in range(length)])

In [42]:
CATS('flower')

tensor(2)

In [56]:
class MaskRTargetTfm(Transform):
    def __init__(self, items, annots, cats): 
        self.items, self.annots, self.cats = items, annots, cats
        
    def encodes(self, i): 
        fn = self.items[i]
        img = PILImage.create(fn)
        
        imgid = fn.stem
        df = annots[(annots.Image == imgid) & (annots.EncodedPixels.notnull())]
        num_objs = len(df)
        
        boxes, masks, labels = [], [], []
        for _, r in df.iterrows(): 
            mask = rle_decode(r.EncodedPixels, shape=img.shape)
            
            pos = np.where(mask)
            xmin, xmax = pos[1].min(), pos[1].max()
            ymin, ymax = pos[0].min(), pos[0].max()
            
            labels.append(self.cats(r.Label))
            boxes.append([xmin, ymin, xmax, ymax])
            masks.append(mask)

        labels = torch.as_tensor(labels, dtype=torch.int64)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        
        area = (boxes[:,3] - boxes[:,1]) * (boxes[:,2] - boxes[:,0])
        
        target = {}
        target['image_id'] = torch.tensor([i])
        target['labels'] = labels
        target['boxes'] = boxes
        target['masks'] = masks
        target['area'] = area
        target['is_crowd'] = torch.zeros((num_objs,), dtype=torch.int64)
        return target
    
    def decodes(self, o): return o

In [57]:
maskr = MaskRTargetTfm(items, annots, CATS)

In [59]:
maskr.decode(maskr(11))

{'image_id': tensor([11]),
 'labels': tensor([1, 2, 3, 4]),
 'boxes': tensor([[  32.,  253.,  637.,  608.],
         [1120.,  740., 1870., 1309.],
         [1637., 1123., 2098., 1398.],
         [1211.,   24., 2098.,  470.]]),
 'masks': tensor([[[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]],
 
         [[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0]],
 
         [[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 0, 0, 0],
          ...,
          [0, 0, 0,  ..., 1, 1, 0],
          [0, 0, 0,  ..., 1, 1, 0],
          [0, 0, 0,  ..., 0, 0, 0]],
 
         [[0, 0, 0,  ..., 0, 0, 0],
          [0, 0, 0,  ..., 

### Model

In [6]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

In [7]:
model.roi_heads

RoIHeads(
  (box_roi_pool): MultiScaleRoIAlign()
  (box_head): TwoMLPHead(
    (fc6): Linear(in_features=12544, out_features=1024, bias=True)
    (fc7): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (box_predictor): FastRCNNPredictor(
    (cls_score): Linear(in_features=1024, out_features=91, bias=True)
    (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)
  )
  (mask_roi_pool): MultiScaleRoIAlign()
  (mask_head): MaskRCNNHeads(
    (mask_fcn1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (mask_fcn2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (mask_fcn3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (mask_fcn4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu4): ReLU(inplace=True)
  )
  (mask_predictor): MaskRCNNPredictor(
    (conv5_mask): ConvTr

In [9]:
model.eval()
model([torch.randn(3, 100, 120)])

[{'boxes': tensor([[  7.7471,  80.5660,  19.2970,  89.3608],
          [ 16.5200,  65.4092,  22.5903,  71.5422],
          [101.9586,  87.7781, 112.9115,  98.8642],
          [ 77.5966,  49.5098,  84.0162,  58.8973],
          [  0.0000,   0.0000,   4.2290,   3.9732],
          [ 34.5164,  16.7644,  36.3853,  18.6858],
          [ 18.0366,  66.1654,  21.6986,  69.6559],
          [  9.9811,  81.3504,  19.2893,  89.4333]], grad_fn=<StackBackward>),
  'labels': tensor([ 3, 10,  3, 10, 10, 10, 10, 10]),
  'scores': tensor([0.1208, 0.1037, 0.0863, 0.0812, 0.0770, 0.0696, 0.0662, 0.0656],
         grad_fn=<IndexBackward>),
  'masks': tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            ...,
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0., 0., 0.]]],
  
  
          [[[0., 0., 0.,  ..., 0., 0., 0.],
            [0., 0., 0.,  ..., 0