In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io

In [None]:
DIR_INPUT = '/kaggle/input/hubmap-kidney-segmentation'

In [None]:
# https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 
def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T


In [None]:
train_df = pd.read_csv(f"{DIR_INPUT}/train.csv")
train_df.head()

In [None]:
image = skimage.io.imread(f"{DIR_INPUT}/train/{train_df.iloc[0,0]}.tiff")
image.shape

In [None]:
mask = rle2mask(train_df.iloc[0, 1], (image.shape[1], image.shape[0]))
mask.shape

In [None]:
plt.figure(figsize=(20,10))
plt.imshow(image[20000:25000, 12000:17000, :])
plt.imshow(mask[20000:25000, 12000:17000], cmap='jet', alpha=0.5)

In [None]:
assert mask2rle(mask) == train_df.iloc[0, 1]