In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!rsync -a ../input/mmdetection-v280/mmdetection ../
!pip install ../input/mmdetection-v280/src/mmdet-2.8.0/mmdet-2.8.0/
!pip install ../input/mmdetection-v280/src/mmpycocotools-12.0.3/mmpycocotools-12.0.3/
!pip install ../input/mmdetection-v280/src/addict-2.4.0-py3-none-any.whl
!pip install ../input/mmdetection-v280/src/yapf-0.30.0-py2.py3-none-any.whl
!pip install ../input/mmdetection-v280/src/mmcv_full-1.2.6-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
import pandas as pd
import numpy as np
import os
import cv2
from tqdm import tqdm
import pickle
from itertools import groupby
from pycocotools import mask as mutils
from pycocotools import _mask as coco_mask
import matplotlib.pyplot as plt
import os
import base64
import typing as t
import zlib
import random
random.seed(0)

exp_name = "v4"
conf_name = "mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco"
model_name = 'mask_rcnn_resnest101_v5_ep9'
ROOT = '../input/hpa-single-cell-image-classification/'
train_or_test = 'test'
df = pd.read_csv(os.path.join(ROOT, 'sample_submission.csv'))
if len(df) == 559:
    debug = True
    df = df[:3]
else:
    debug = False

In [None]:
def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str.decode()

def read_img(image_id, color, train_or_test='train', image_size=None):
    filename = f'{ROOT}/{train_or_test}/{image_id}_{color}.png'
    assert os.path.exists(filename), f'not found {filename}'
    img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    if image_size is not None:
        img = cv2.resize(img, (image_size, image_size))
    if img.dtype == 'uint16':
        img = (img/256).astype('uint8')
    return img

def load_RGBY_image(image_id, train_or_test='train', image_size=None):
    red = read_img(image_id, "red", train_or_test, image_size)
    green = read_img(image_id, "green", train_or_test, image_size)
    blue = read_img(image_id, "blue", train_or_test, image_size)
    # using rgb only here
    #yellow = read_img(image_id, "yellow", train_or_test, image_size)
    stacked_images = np.transpose(np.array([red, green, blue]), (1,2,0))
    return stacked_images

def print_masked_img(image_id, mask):
    img = load_RGBY_image(image_id, train_or_test)
    
    plt.figure(figsize=(15, 15))
    plt.subplot(1, 3, 1)
    plt.imshow(img)
    plt.title('Image')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(mask)
    plt.title('Mask')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(img)
    plt.imshow(mask, alpha=0.6)
    plt.title('Image + Mask')
    plt.axis('off')
    plt.show()

In [None]:
out_image_dir = f'../work/mmdet_{exp_name}_{train_or_test}/'
!mkdir -p {out_image_dir}

annos = []
for idx in tqdm(range(len(df))):
    image_id = df.iloc[idx].ID
    img = load_RGBY_image(image_id, train_or_test)
    
    cv2.imwrite(f'{out_image_dir}/{image_id}.jpg', img)
    ann = {
        'filename': image_id+'.jpg',
        'width': img.shape[1],
        'height': img.shape[0],
        'ann': {
            'bboxes': None,
            'labels': None,
            'masks': None
        }
    }
    annos.append(ann)
    
with open(f'../work/mmdet_{exp_name}_tst.pkl', 'wb') as f:
    pickle.dump(annos, f)

In [None]:
# I just made following config files based on default mask_rcnn.
# The main changes are CustomDataset, num_classes, data path, etc.
# Other than that, I used it as is for mmdetection.
!ls -l ../mmdetection/configs/hpa/

In [None]:
config = f'configs/hpa_{exp_name}/{conf_name}.py'
model_file = f'../input/hpa-models/{model_name}.pth'
result_pkl = f'../work/{model_name}.pkl'
additional_conf = '--cfg-options'
additional_conf += ' test_cfg.rcnn.score_thr=0.001'
cmd = f'python tools/test.py {config} {model_file} --out {result_pkl} {additional_conf}'
!cd ../mmdetection; {cmd}
result = pickle.load(open('../mmdetection/'+result_pkl, 'rb'))

In [None]:
for ii in range(3):
    image_id = annos[ii]['filename'].replace('.jpg','').replace('.png','')
    for class_id in range(19):
        #print(ii,class_id,len(result[ii][0][class_id]), len(result[ii][1][class_id]))
        bbs = result[ii][0][class_id]
        sgs = result[ii][1][class_id]
        for bb, sg in zip(bbs,sgs):
            box = bb[:4]
            cnf = bb[4]
            h = sg['size'][0]
            w = sg['size'][0]
            if cnf > 0.3:
                print(f'class_id:{class_id}, image_id:{image_id}, confidence:{cnf}')
                mask = mutils.decode(sg).astype(bool)
                print_masked_img(image_id, mask)

In [None]:
with open('submission.csv', 'w') as outf:
    print('ID,ImageWidth,ImageHeight,PredictionString', file=outf)
    for ii in range(len(annos)):
        image_id = annos[ii]['filename'].replace('.jpg','').replace('.png','')
        pred_strs = []
        for class_id in range(19):
            #print(ii,class_id,len(result[ii][0][class_id]), len(result[ii][1][class_id]))
            bbs = result[ii][0][class_id]
            sgs = result[ii][1][class_id]
            for bb, sg in zip(bbs,sgs):
                box = bb[:4]
                cnf = bb[4]
                h = sg['size'][0]
                w = sg['size'][1]
                #convert coco format to kaggle format
                mask = mutils.decode(sg).astype(bool)
                rle = encode_binary_mask(mask)
                pred_strs.append(f'{class_id} {cnf} {rle}')
        print(f'{image_id},{w},{h},{" ".join(pred_strs)}', file=outf)

In [None]:
!ls -l 