In [None]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import cv2
import tifffile

In [None]:
BASE_PATH = "../input/hubmap-kidney-segmentation/"
TRAIN_PATH = os.path.join(BASE_PATH, "train")

print(os.listdir(BASE_PATH))


In [None]:
df_train = pd.read_csv(
    os.path.join(BASE_PATH, "train.csv")
)
df_train

In [None]:
df_sub = pd.read_csv(
    os.path.join(BASE_PATH, "sample_submission.csv"))
df_sub

In [None]:
print(f"Number of train images: {df_train.shape[0]}") 
print(f"Number of test images: {df_sub.shape[0]}")

In [None]:
df_info = pd.read_csv(
    os.path.join(BASE_PATH, "HuBMAP-20-dataset_information.csv")
)
df_info

In [None]:
def rle2mask(mask_rle, shape):  
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [
        np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])
    ]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = 1
    return img.reshape(shape).T

def read_image(image_id, scale=None, verbose=1):  
    image = tifffile.imread(
        os.path.join(BASE_PATH, f"train/{image_id}.tiff")
    )
    if len(image.shape) == 5:
        image = image.squeeze().transpose(1, 2, 0)
    
    mask = rle2mask(
        df_train[df_train["id"] == image_id]["encoding"].values[0], 
        (image.shape[1], image.shape[0])
    )
    
    if verbose:
        print(f"[{image_id}] Image shape: {image.shape}")
        print(f"[{image_id}] Mask shape: {mask.shape}")
    
    if scale:
        new_size = (image.shape[1] // scale, image.shape[0] // scale)
        image = cv2.resize(image, new_size)
        mask = cv2.resize(mask, new_size)
        
        if verbose:
            print(f"[{image_id}] Resized Image shape: {image.shape}")
            print(f"[{image_id}] Resized Mask shape: {mask.shape}")
        
    return image, mask
def read_test_image(image_id, scale=None, verbose=1):  
    image = tifffile.imread(
        os.path.join(BASE_PATH, f"test/{image_id}.tiff")
    )
    if len(image.shape) == 5:
        image = image.squeeze().transpose(1, 2, 0)
    
    if verbose:
        print(f"[{image_id}] Image shape: {image.shape}")
    
    if scale:
        new_size = (image.shape[1] // scale, image.shape[0] // scale)
        image = cv2.resize(image, new_size)
        
        if verbose:
            print(f"[{image_id}] Resized Image shape: {image.shape}")
        
    return image

In [None]:
def plot_image_and_mask(image, mask, image_id):
    plt.figure(figsize=(16, 10))
    
    plt.subplot(1, 3, 1)
    plt.imshow(image)
    plt.title(f"Image {image_id}", fontsize=18)
    
    plt.subplot(1, 3, 2)
    plt.imshow(image)
    plt.imshow(mask, cmap="hot", alpha=0.5)
    plt.title(f"Image {image_id} + mask", fontsize=18)    
    
    plt.subplot(1, 3, 3)
    plt.imshow(mask, cmap="hot")
    plt.title(f"Mask", fontsize=18)    
    
    plt.show()
    
def plot_grid_image_with_mask(image, mask):
    plt.figure(figsize=(16, 16))
    
    w_len = image.shape[0]
    h_len = image.shape[1]
    
    min_len = min(w_len, h_len)
    w_start = (w_len - min_len) // 2
    h_start = (h_len - min_len) // 2
    
    plt.imshow(image[w_start : w_start + min_len, h_start : h_start + min_len])
    plt.imshow(
        mask[w_start : w_start + min_len, h_start : h_start + min_len], cmap="hot", alpha=0.5,
    )
    plt.axis("off")
            
    plt.show()
    
def plot_slice_image_and_mask(image, mask, start_h, end_h, start_w, end_w):
    plt.figure(figsize=(16, 5))
    
    sub_image = image[start_h:end_h, start_w:end_w, :]
    sub_mask = mask[start_h:end_h, start_w:end_w]
    
    plt.subplot(1, 3, 1)
    plt.imshow(sub_image)
    plt.axis("off")
    
    plt.subplot(1, 3, 2)
    plt.imshow(sub_image)
    plt.imshow(sub_mask, cmap="hot", alpha=0.5)
    plt.axis("off")
    
    plt.subplot(1, 3, 3)
    plt.imshow(sub_mask, cmap="hot")
    plt.axis("off")
    
    plt.show()    

In [None]:
#Training images
small_ids = [
    "0486052bb", "095bf7a1f", "1e2425f28", "2f6ecfcdf",
    "54f2eec69", "aaa6a05cc", "cb2d976f4", "e79de561c",
]  # 训练集的8张图片的ids
small_images = []
small_masks = []

for small_id in small_ids:
    tmp_image, tmp_mask = read_image(small_id, scale=20, verbose=0)
    small_images.append(tmp_image)
    small_masks.append(tmp_mask)

In [None]:
plt.figure(figsize=(16, 16))
for ind, (tmp_id, tmp_image) in enumerate(zip(small_ids, small_images)):
    plt.subplot(3, 3, ind + 1)
    plt.imshow(tmp_image)
    plt.axis("off")

In [None]:
#training + Masks
plt.figure(figsize=(16, 16))
for ind, (tmp_id, tmp_image, tmp_mask) in enumerate(zip(small_ids, small_images, small_masks)):
    plt.subplot(3, 3, ind + 1)
    plt.imshow(tmp_image)
    plt.imshow(tmp_mask, cmap="hot", alpha=0.5)
    plt.axis("off")

In [None]:
#Test Images
small_ids = [
    "26dc41664", "afa5e8098", "b2dc8411c", "b9a3865fc", "c68fe75ea",
]
small_images = []

for small_id in small_ids:
    tmp_image = read_test_image(small_id, scale=20, verbose=0)
    small_images.append(tmp_image)

In [None]:
plt.figure(figsize=(16, 11))
for ind, (tmp_id, tmp_image) in enumerate(zip(small_ids, small_images)):
    plt.subplot(2, 3, ind + 1)
    plt.imshow(tmp_image)
    plt.axis("off")

In [None]:
image_id = "0486052bb"
image, mask = read_image(image_id, 2)

In [None]:
plot_image_and_mask(image, mask, image_id)

In [None]:
plot_slice_image_and_mask(image, mask, 5000, 7500, 2500, 5000)
plot_slice_image_and_mask(image, mask, 5250, 5720, 3500, 4000)
plot_slice_image_and_mask(image, mask, 5375, 5575, 3650, 3850)

In [None]:
plot_grid_image_with_mask(image, mask)

In [None]:
image_id = "095bf7a1f"
image, mask = read_image(image_id, scale=2)

In [None]:
plot_image_and_mask(image, mask, image_id)


In [None]:
plot_slice_image_and_mask(image, mask, 7500, 10000, 10000, 12500)

In [None]:
plot_grid_image_with_mask(image, mask)

In [None]:

image_id = "1e2425f28"
image, mask = read_image(image_id, scale=2)

In [None]:
plot_image_and_mask(image, mask, image_id)


In [None]:
image_id = "2f6ecfcdf"
image, mask = read_image(image_id, scale=2)

In [None]:
plot_image_and_mask(image, mask, image_id)


In [None]:
plot_slice_image_and_mask(image, mask, 10000, 12000, 8000, 10000)

In [None]:
image_id = "aaa6a05cc"
image, mask = read_image(image_id)

In [None]:
plot_image_and_mask(image, mask, image_id)

In [None]:
plot_slice_image_and_mask(image, mask, 6500, 8500, 7000, 9000)

In [None]:
image_id = "e79de561c"
image, mask = read_image(image_id)

In [None]:
plot_image_and_mask(image, mask, image_id)

In [None]:
plot_slice_image_and_mask(image, mask, 4000, 6000, 2000, 4000)