In [2]:
import os
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import cv2

In [3]:
SAVE_DIR = '../data/hubmap/train_masks_multiclass'
if not os.path.isdir(SAVE_DIR):
    os.makedirs(SAVE_DIR)

In [4]:
mask_map = dict(
    kidney=1,
    prostate=2,
    largeintestine=3,
    spleen=4,
    lung=5)

In [5]:
def enc2mask(mask_rle, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    ends = starts + lengths
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [6]:
df = pd.read_csv('../data/hubmap/train.csv')[['id', 'organ', 'rle', 'img_height', 'img_width']].set_index('id')
df.head()

Unnamed: 0_level_0,organ,rle,img_height,img_width
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10044,prostate,1459676 77 1462675 82 1465674 87 1468673 92 14...,3000,3000
10274,prostate,715707 2 718705 8 721703 11 724701 18 727692 3...,3000,3000
10392,spleen,1228631 20 1231629 24 1234624 40 1237623 47 12...,3000,3000
10488,lung,3446519 15 3449517 17 3452514 20 3455510 24 34...,3000,3000
10610,spleen,478925 68 481909 87 484893 105 487863 154 4908...,3000,3000


### Save

In [7]:
for index,(organ, encs, h, w) in tqdm(df.iterrows(),total=len(df)):
    mask = enc2mask(encs, (h, w))
    mask = mask * mask_map[organ]
    assert mask.shape==(h, w)
    
    cv2.imwrite(os.path.join(SAVE_DIR, str(index)+'.png'), mask)

  0%|          | 0/351 [00:00<?, ?it/s]

### Check data

In [8]:
from PIL import Image
import matplotlib.pyplot as plt

for index,(organ, encs, h, w) in tqdm(df.iterrows(),total=len(df)):
    img = np.array(Image.open(os.path.join('../data/hubmap/train_images', f"{index}.tiff")))
    mask = np.array(Image.open(os.path.join(SAVE_DIR, f"{index}.png")))
    print(organ, (np.unique(mask)))
    
    # plt.imshow(img)
    # plt.imshow(mask, alpha=0.2)
    # plt.show()

  0%|          | 0/351 [00:00<?, ?it/s]

prostate [0 2]
prostate [0 2]
spleen [0 4]
lung [0 5]
spleen [0 4]
kidney [0 1]
largeintestine [0 3]
prostate [0 2]
spleen [0 4]
largeintestine [0 3]
prostate [0 2]
prostate [0 2]
spleen [0 4]
lung [0 5]
spleen [0 4]
spleen [0 4]
kidney [0 1]
kidney [0 1]
lung [0 5]
spleen [0 4]
largeintestine [0 3]
largeintestine [0 3]
prostate [0 2]
largeintestine [0 3]
spleen [0 4]
largeintestine [0 3]
lung [0 5]
prostate [0 2]
prostate [0 2]
prostate [0 2]
lung [0 5]
spleen [0 4]
largeintestine [0 3]
lung [0 5]
spleen [0 4]
lung [0 5]
kidney [0 1]
lung [0 5]
kidney [0 1]
lung [0 5]
kidney [0 1]
largeintestine [0 3]
prostate [0 2]
largeintestine [0 3]
prostate [0 2]
largeintestine [0 3]
lung [0 5]
prostate [0 2]
spleen [0 4]
prostate [0 2]
largeintestine [0 3]
kidney [0 1]
prostate [0 2]
kidney [0 1]
lung [0 5]
lung [0 5]
kidney [0 1]
lung [0 5]
largeintestine [0 3]
spleen [0 4]
prostate [0 2]
prostate [0 2]
kidney [0 1]
largeintestine [0 3]
kidney [0 1]
prostate [0 2]
kidney [0 1]
kidney [0 1]
kidn