This notebook provides a function decodeToBinaryMask to reconstruct the mask from an encoded string

Two motivations for this notebook:

1. Wanting to check that the encoded masks really contain the mask information I expect - the proof is being able to reconstruct the mask out of the RLE encoded string.

2. There are some masks in the provided "sample_submission.csv" an I was curious what was in them.  They turn out to be empty (i.e. all zero) masks in the size of the images

Acknowledgment:
The frame of this notebook comes from here (Thanks Tito !) https://www.kaggle.com/its7171/mmdetection-for-segmentation-inference

In [None]:

!pip install -q "/kaggle/input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl"


In [None]:
import pandas as pd
import numpy as np
import os
import cv2 
from tqdm import tqdm
import pickle
from itertools import groupby
from pycocotools import mask as mutils
from pycocotools import _mask as coco_mask
import matplotlib.pyplot as plt
import os
import base64
import typing as t
import zlib
import random
random.seed(0)

exp_name = "v4"

ROOT = '../input/hpa-single-cell-image-classification/'
train_or_test = 'test'
df = pd.read_csv(os.path.join(ROOT, 'sample_submission.csv'))
if len(df) == 559:
    debug = True
    df = df[:5]
else:
    debug = False


# helper funcs

In [None]:
def encode_binary_mask(mask: np.ndarray) -> t.Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str.decode()

def read_img(image_id, color, train_or_test='train', image_size=None):
    filename = f'{ROOT}/{train_or_test}/{image_id}_{color}.png'
    assert os.path.exists(filename), f'not found {filename}'
    img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    if image_size is not None:
        img = cv2.resize(img, (image_size, image_size))
    if img.dtype == 'uint16':
        img = (img/256).astype('uint8')
    return img

def load_RGBY_image(image_id, train_or_test='train', image_size=None):
    red = read_img(image_id, "red", train_or_test, image_size)
    green = read_img(image_id, "green", train_or_test, image_size)
    blue = read_img(image_id, "blue", train_or_test, image_size)
    # using rgb only here
    #yellow = read_img(image_id, "yellow", train_or_test, image_size)
    stacked_images = np.transpose(np.array([red, green, blue]), (1,2,0))
    return stacked_images

def print_masked_img(image_id, mask):
    img = load_RGBY_image(image_id, train_or_test)
    
    plt.figure(figsize=(15, 15))
    plt.subplot(1, 3, 1)
    plt.imshow(img)
    plt.title('Image')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(mask)
    plt.title('Mask')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(img)
    plt.imshow(mask, alpha=0.6)
    plt.title('Image + Mask')
    plt.axis('off')
    plt.show()

The following function decodeToBinaryMask regenerates the mask from the RLE encoded string:

In [None]:
def decodeToBinaryMask(rleCodedStr, imWidth, imHeight):
    uncodedStr = base64.b64decode(rleCodedStr)
    uncompressedStr = zlib.decompress(uncodedStr,wbits = zlib.MAX_WBITS)   
    detection ={
        'size': [imWidth, imHeight],
        'counts': uncompressedStr
    }
    detlist = []
    detlist.append(detection)
    mask = coco_mask.decode(detlist)
    binaryMask = mask.astype('bool') 
    return binaryMask
   

So let's use the function and decode those masks from the sample_submission.
Note that the first three are the same and the fourth is different because the fourth image has a different size.
In showing the masks on the images however, you can see they are all empty.

In [None]:
out_image_dir = f'../work/mmdet_{exp_name}_{train_or_test}/'
!mkdir -p {out_image_dir}

for idx in range(len(df)):
    image_id = df.iloc[idx].ID
    image_width =  df.iloc[idx].ImageWidth
    image_height =  df.iloc[idx].ImageHeight
    PredString = df.iloc[idx].PredictionString
    
    # The predictionString starts with label and confidence fields
    # so split the field contents on space to leave only the RLE string
    chunks = PredString.split(' ')
    print(idx)
    print(chunks)
    codedStr = chunks[2]

    # Now we can uncode it
    binary_mask = decodeToBinaryMask(codedStr, image_width, image_height )

    # showing it has a plausible form
    print(binary_mask.shape)
    print(binary_mask.dtype)

    
    # And just to prove we have an inverse function
    # let's convert it back to a coded string
    recodedStr =  encode_binary_mask(binary_mask) 
    print(recodedStr)
    
    # and let's display the mask now against the image
    print_masked_img(image_id, binary_mask)    
       
    # When we display the masks they look empty 
    # as a further check to prove the mask is really as empty as it looks
    # findNonZero on the mask returns "None", so there are no non-zero pixels
    intmask = binary_mask.astype('uint8')
    nz = cv2.findNonZero(intmask)
    print(nz)


But that's not very convincing with empty masks (how do we know it's not simply incorrectly displaying the mask ?), 
so now let's try it with a non-trivial mask, a single circle

In [None]:
# set up a black greyscale image 
testmask = np.zeros((2000,3000), np.uint8)

# draw a circle
grayimg = cv2.circle(testmask, (500,500), 200,255, cv2.FILLED )

# check it's shape and type
print(grayimg.shape)
print(grayimg.dtype)

# get a binary mask of the same size
binmask = np.zeros((2000,3000), np.bool8)

# and transfer the circle to the binary mask
binmask[grayimg>0]= 1

# check the same and type
print(binmask.shape)
print(binmask.dtype)

# disply the binary mask
plt.figure
plt.imshow(binmask, cmap='gray', vmin=0, vmax=1)
plt.show()


> Now we encode the circle mask:

In [None]:
#Encode the mask to run length encoding
rle = encode_binary_mask(binmask)
print(rle)



and now we decode it again and show that it is still a circle:

In [None]:
# Decode the mask from the RLE string
decodedMask = decodeToBinaryMask(rle, 2000,3000)

# check it has a plausible form
print(decodedMask.shape)
print(decodedMask.dtype)

# And show it as an image to prove it's the circle we started with
showmask = np.zeros((2000,3000,1), np.uint8)
showmask[decodedMask>0]= 1

plt.figure
plt.imshow(showmask, cmap='gray', vmin=0, vmax=1)
plt.show()
