In [None]:
import pandas as pd
import imagesize
import numpy as np

In [None]:
def mask2rle_old(img):
    '''
    Original implementation of mask2rle, this is memory hungry
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    Source: https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 
def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    Source: https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask2rle(img):
    '''
    Efficient implementation of mask2rle, from @paulorzp
    --
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    Source: https://www.kaggle.com/xhlulu/efficient-mask2rle
    '''
    pixels = img.T.flatten()
    pixels = np.pad(pixels, ((1, 1), ))
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
 

First let's get the RLE string from the training images:

In [None]:
train = pd.read_csv('/kaggle/input/hubmap-kidney-segmentation/train.csv')

file, enc = train.loc[3]

path = f"/kaggle/input/hubmap-kidney-segmentation/train/{file}.tiff"

width, height = imagesize.get(path)
print(width, height)

Now, let's first convert it to mask:

In [None]:
mask = rle2mask(enc, (width, height))
mask.shape

The classical way (from this [awesome kernel](https://www.kaggle.com/paulorzp/run-length-encode-and-decode)) will use too much memory (might crash):
```python
re_enc = mask2rle_old(mask)
```

Instead, this is the efficient implementation:

In [None]:
re_enc = mask2rle(mask)

In [None]:
re_enc == enc