![](https://upload.wikimedia.org/wikipedia/commons/d/d6/Blausen_0592_KidneyAnatomy_01.png)
# **Hacking the kidney**

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pylab as pl
import seaborn as sns
import cv2
import json
import tifffile

#color
from colorama import Fore, Back, Style

In [None]:
BASE_PATH = "../input/hubmap-kidney-segmentation"
TRAIN_PATH = os.path.join(BASE_PATH, "train")

print(os.listdir(BASE_PATH))

In [None]:
Datatrain = pd.read_csv(
    os.path.join(BASE_PATH, "train.csv")
)
Datatrain

In [None]:
DataSub = pd.read_csv(
    os.path.join(BASE_PATH, "sample_submission.csv"))
DataSub

In [None]:
print(f"Number of train images: {Datatrain.shape[0]}")
print(f"Number of test images: {DataSub.shape[0]}")

In [None]:
DataInfo = pd.read_csv(
    os.path.join(BASE_PATH, "HuBMAP-20-dataset_information.csv")
)
DataInfo.sample(3)

In [None]:
DataInfo.isnull().sum()

In [None]:
cols=['weight_kilograms']
DataInfo[cols]=DataInfo[cols].fillna(DataInfo.mode().iloc[0])

cols=['height_centimeters']
DataInfo[cols]=DataInfo[cols].fillna(DataInfo.mode().iloc[0])

cols=['bmi_kg/m^2']
DataInfo[cols]=DataInfo[cols].fillna(DataInfo.mode().iloc[0])

In [None]:
DataInfo.isnull().sum()

In [None]:
DataInfo.shape

In [None]:
# Statistics description of each features
DataInfo.describe().T

In [None]:
# features which have dtypes object
DataInfo.select_dtypes(include = ['object']).head(3)

In [None]:
# Features which have dtypes int and float
DataInfo.select_dtypes(include = ['int64','float64']).head(3)

In [None]:
categorical_feature_columns = list(set(DataInfo.columns) - set(DataInfo._get_numeric_data().columns))
categorical_feature_columns

In [None]:
numerical_feature_columns = list(DataInfo._get_numeric_data().columns)
numerical_feature_columns

In [None]:
# Laterality
print(DataInfo.laterality.value_counts())
sns.countplot(data=DataInfo, x='laterality', color ='Red')
plt.ylabel('Count')
plt.show()

In [None]:
# Laterality Vs Sex
ax = DataInfo.groupby(['laterality', 'sex']).size().plot(kind='bar',color = 'Green')
ax.set_title("Total Laterality vs Sex")
ax.set_xlabel("(laterality, sex)")
ax.set_ylabel("Count")
for p in ax.patches:
    ax.annotate(str(format(int(p.get_height()), ',d')), (p.get_x(), p.get_height()*1.01))

In [None]:
ax = DataInfo.groupby(['percent_cortex', 'sex']).size().plot(kind='bar',color = 'Yellow')
ax.set_title("Percentage Cortex Vs Sex")
ax.set_xlabel("(percent_cortex, sex)")
ax.set_ylabel("Count")
for p in ax.patches:
    ax.annotate(str(format(int(p.get_height()), ',d')), (p.get_x(), p.get_height()*1.01))

In [None]:
ax = DataInfo.groupby(['percent_medulla', 'sex']).size().plot(kind='bar',color = 'ORANGE')
ax.set_title("Percentage of Medulla Vs Sex")
ax.set_xlabel("(percent_medulla, sex)")
ax.set_ylabel("Count")
for p in ax.patches:
    ax.annotate(str(format(int(p.get_height()), ',d')), (p.get_x(), p.get_height()*1.01))

In [None]:
pl.rcParams['figure.figsize'] = (14, 10)
pl.rcParams['ytick.labelsize'] = 12
pl.rcParams['xtick.labelsize'] = 11
pl.rcParams['axes.labelsize'] = 23
pl.rcParams['legend.fontsize'] = 20
sns.set_style('ticks')
c1, c2, c3, c4 = sns.color_palette("Set1", 4)
DataInfo.hist()
sns.despine(offset=10, trim=True)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(15, 15))
tmp1 = DataInfo.loc[(DataInfo.laterality == 'Right'), :]
a = sns.boxplot(x = 'percent_cortex', y = 'weight_kilograms', data = tmp1,ax=axs[0][0])
a.set_title("Right Laterality")

tmp2 = DataInfo.loc[(DataInfo.laterality == 'Left'), :]
b = sns.boxplot(x = 'percent_cortex', y = 'weight_kilograms', data = tmp2,ax=axs[0][1])
b.set_title("Left Laterality")

tmp3 = DataInfo.loc[(DataInfo.laterality == 'Right'), :]
c = sns.boxplot(x = 'percent_medulla', y = 'weight_kilograms', data = tmp3,ax=axs[1][0])
c.set_title("Right Laterality")

tmp4 = DataInfo.loc[(DataInfo.laterality == 'Left'), :]
d = sns.boxplot(x = 'percent_medulla', y = 'weight_kilograms', data = tmp4,ax=axs[1][1])
d.set_title("Left Laterality")
plt.show()

In [None]:
os.makedirs('../output')
input_dir = '../input/hubmap-kidney-segmentation/train'
output_dir = '../output'
image_list = ['0486052bb', '2f6ecfcdf', 'aaa6a05cc']

In [None]:
def resize_im(im_name, scale_percent):
    image_path = os.path.join(input_dir, im_name+'.tiff')
    im_read = tifffile.imread(image_path)
    width = int(im_read.shape[1] * scale_percent / 100)
    height = int(im_read.shape[0] * scale_percent / 100)
    dim = (width, height)
    print('File name: {}, original size: {}, resized to: {}'.format(im_name, 
                                                                    (im_read.shape[0], im_read.shape[1]), 
                                                                    (width, height)))
    resized = cv2.resize(im_read, dim, interpolation=cv2.INTER_AREA)
    image_path = os.path.join(output_dir, ('r_' + im_name+'.tiff'))
    tifffile.imwrite(image_path, resized)

In [None]:
for im in image_list:
    resize_im(im, 5)

In [None]:
def rle2mask(mask_rle, shape):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

In [None]:
def resize_mask(im_name, scale_percent):
    im_read = tifffile.imread(os.path.join(input_dir, im_name+'.tiff'))
    mask_rle = Datatrain[Datatrain["id"] == im_name]["encoding"].values[0]
    mask = rle2mask(Datatrain[Datatrain["id"] == im_name]["encoding"].values[0], (im_read.shape[1], im_read.shape[0]))*255
    width = int(im_read.shape[1] * scale_percent / 100)
    height = int(im_read.shape[0] * scale_percent / 100)
    dim = (width, height)
    print('File name: {}, original size: {}, resized to: {}'.format(im_name, 
                                                                (im_read.shape[0], im_read.shape[1]), 
                                                                (width, height)))
    resized = cv2.resize(mask, dim, interpolation=cv2.INTER_AREA)
    image_path = os.path.join(output_dir, ('r_' + im_name+'_m.tiff'))
    tifffile.imwrite(image_path, resized)

In [None]:
for im in image_list:
    resize_mask(im, 5)

In [None]:
os.listdir(output_dir)

In [None]:
def show_image(image_id):
    fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(16, 32))
    image_path = os.path.join(output_dir, 'r_{}.tiff'.format(image_id))
    mask_path = os.path.join(output_dir, 'r_{}_m.tiff'.format(image_id))
    
    image = tifffile.imread(image_path)
    mask = tifffile.imread(mask_path)
    if len(mask.shape)==2:    
        hybr = image[:, :, 0] + mask[:, :]/2
    else:
        hybr = image[:, :, 0] + mask[:,: , 0]/2
    ax[0].imshow(image)
    ax[0].axis('off')
    ax[0].set_title('Real Image')
    ax[1].imshow(hybr)
    ax[1].axis('off')
    ax[1].set_title('Masks')
    plt.show()

In [None]:
%matplotlib inline
show_image(image_list[0])

In [None]:
%matplotlib inline
show_image(image_list[1])

In [None]:
%matplotlib inline
show_image(image_list[2])

In [None]:
def rle2mask(mask_rle, shape):
    # https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask_to_contours(image, mask_layer, thickness):
    """ converts a mask to contours using OpenCV and draws them on the image"""
    contours, hierarchy = cv2.findContours(mask_layer.astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    image = cv2.drawContours(image, contours, -1, color=(255,0,0),thickness=thickness)
    return image

In [None]:
#Data paths
img_path = "../input/hubmap-kidney-segmentation/train/0486052bb.tiff"
train_path = "../input/hubmap-kidney-segmentation/train.csv"
img_id = img_path.split("/")[-1].split(".")[0]
struct_path = "../input/hubmap-kidney-segmentation/train/" + img_id + "-anatomical-structure.json"
glm_path = "../input/hubmap-kidney-segmentation/train/" + img_id + ".json"

#Read the image an generate the binary mask from the RL encoded data
org_img = cv2.imread(img_path)
img = np.copy(org_img)
df_train = pd.read_csv(train_path)
mask = rle2mask(
    df_train[df_train["id"] == img_id]["encoding"].values[0], 
    (img.shape[1], img.shape[0])
)

#Merge image with glomeruli mask 
img = mask_to_contours(img, mask, thickness=-1)

#Extract contours of anathomical structures from json file
file = open(struct_path)
structures_json = json.load(file)

#Merge the image with the contours of the anathomical structures 
for structure in structures_json:
    my_class = structure['properties']['classification']['name']
    if my_class == 'Medulla': #Note that Medulla will be delimited by a green contour
        A = np.asarray(structure['geometry']['coordinates'])
        A = [np.swapaxes(A,0,1)]
        img = cv2.drawContours(img, A, -1, color=(0,255,0),thickness=50)
    else: #Note that Cortex will be delimited by a blue contour
        A = np.asarray(structure['geometry']['coordinates'])
        A = [np.swapaxes(A,0,1)]
        img = cv2.drawContours(img, A, -1, color=(0,0,255),thickness=50)

In [None]:
fig = plt.gcf()
fig.set_size_inches(7,6)
plt.imshow(img)
plt.grid(False)
print("Notice that the",Fore.GREEN, "Medulla",Style.RESET_ALL ,"is delimited by the", Fore.GREEN,"green",Style.RESET_ALL,"line, and the" ,Fore.BLUE, "Cortex",Style.RESET_ALL , "is delimited by a", Fore.BLUE,"blue",Style.RESET_ALL,"line")
print("The",Fore.RED, "red",Style.RESET_ALL ,"dots correspond to the",Fore.RED, "glomerulli",Style.RESET_ALL )
plt.show()

In [None]:
train = pd.read_csv('../input/hubmap-kidney-segmentation/sample_submission.csv')
data_to_submit = pd.DataFrame(train)
data_to_submit.to_csv('sample_submission.csv', index = False)