# Kidney Segmentation Starter Notebook

# Anatomies

## Kidney Anatomy

<img align='left' width=600, heigh=800, src='https://image.noelshack.com/fichiers/2020/47/3/1605712133-cross-section-kidney-blood-ves.png'>

## Glomerulus anatomy

<img align='left' width=600, heigh=800, src='https://upload.wikimedia.org/wikipedia/commons/thumb/6/69/Renal_corpuscle-en.svg/600px-Renal_corpuscle-en.svg.png'>

For more medical information, read this awesome [Raphael Bourgade Notebook](https://www.kaggle.com/raphaelbourgade/all-the-medical-data-you-need-to-know)

In [None]:
import numpy as np
import pandas as pd

from tqdm import tqdm

import tifffile as tiff
import matplotlib.pyplot as plt

from numpy import savez_compressed
from numpy import load

import cv2

# Prepare Pandas Dataframes

In [None]:
main_path = '../input/hubmap-kidney-segmentation/'
train_path = main_path + 'train/'

In [None]:
# train data
df = pd.read_csv(main_path + 'train.csv')

# patient data
df_info = pd.read_csv(main_path + '/HuBMAP-20-dataset_information.csv')

# intersection of train data with patient data
train_ds = df_info[df_info.image_file.isin((df.id + '.tiff').values)]

In [None]:
df_masks = pd.read_csv('../input/hubmap-kidney-segmentation/train.csv').set_index('id')

# Image Class

Images are split into 256x256 tiles and same as the masks.<br>
Each patient kidney images/masks are saved into 'mask/img_id.npz'<br>
That way we take care of the Kaggle RAM to not go over the limit.<br>
Thanks to the [iafoss notebook](https://www.kaggle.com/iafoss/256x256-images), a notebook that shows how to split the images into 256x256 tiles.

In [None]:
class Image():
    '''
        Image
    '''
    def __init__(self, ds, df_mask, path):
        self.image_file_name = ds['image_file']
        self.width_pixels = ds['width_pixels']
        self.height_pixels = ds['height_pixels']
        self.path = path
        
        self.idx = self.image_file_name.split('.tiff')[0]
        self.df_mask = df_mask

        self.path_mask = path.split('train/')[0] + 'train.csv'
        self.df_masks = pd.read_csv(self.path_mask).set_index('id')
        
        # size of tiles
        self.sz = 256
        # reduce the original images by 4 times
        self.reduce = 4
        
        # save images and masks
        self.save_images_masks()
        
    def enc2mask(self, encs, shape):
        '''
            Encode 2 Mask
        '''
        img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
        for m,enc in enumerate(encs):
            if isinstance(enc,np.float) and np.isnan(enc): continue
            s = enc.split()
            for i in range(len(s)//2):
                start = int(s[2*i]) - 1
                length = int(s[2*i+1])
                img[start:start+length] = 1 + m
                
        return img.reshape(shape).T

    def mask2enc(self, mask, n=1):
        '''
            Mask 2 Encoding
        '''
        pixels = mask.T.flatten()
        encs = []
        for i in range(1,n+1):
            p = (pixels == i).astype(np.int8)
            if p.sum() == 0: encs.append(np.nan)
            else:
                p = np.concatenate([[0], p, [0]])
                runs = np.where(p[1:] != p[:-1])[0] + 1
                runs[1::2] -= runs[::2]
                encs.append(' '.join(str(x) for x in runs))
                
        return encs
    
    def add_padding(self, img, mask):
        shape = img.shape
        reduce, sz = self.reduce, self.sz
        pad0 = (reduce*sz - shape[0]%(reduce*sz))%(reduce*sz)
        pad1 = (reduce*sz - shape[1]%(reduce*sz))%(reduce*sz)
        img = np.pad(img,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2],[0,0]],
                    constant_values=0)
        mask = np.pad(mask,[[pad0//2,pad0-pad0//2],[pad1//2,pad1-pad1//2]],
                    constant_values=0)
        
        return img, mask
    
    def split_into_tiles(self, img, mask):
        reduce, sz = self.reduce, self.sz
        img = cv2.resize(img,(img.shape[1]//reduce,img.shape[0]//reduce),
                         interpolation = cv2.INTER_AREA)
        img = img.reshape(img.shape[0]//sz,sz,img.shape[1]//sz,sz,3)
        img = img.transpose(0,2,1,3,4).reshape(-1,sz,sz,3)

        mask = cv2.resize(mask,(mask.shape[1]//reduce,mask.shape[0]//reduce),
                          interpolation = cv2.INTER_NEAREST)
        mask = mask.reshape(mask.shape[0]//sz,sz,mask.shape[1]//sz,sz)
        mask = mask.transpose(0,2,1,3).reshape(-1,sz,sz)
        
        return img, mask
    
    def save_images_masks(self):
        idx = self.idx
        image_path = self.path + self.df_masks[self.df_masks.index == idx].index[0] + '.tiff'
        encs = self.df_masks[self.df_masks.index == idx].encoding
        
        img = tiff.imread(image_path)
        if len(img.shape) == 5:img = np.transpose(img.squeeze(), (1,2,0))
        # read image and generate the mask
        mask = self.enc2mask(encs,(img.shape[1],img.shape[0]))
        
        img, mask = self.add_padding(img, mask)
        img, mask = self.split_into_tiles(img, mask)
        
        savez_compressed('img_'+ idx +'.npz', img)
        savez_compressed('mask_'+ idx +'.npz', mask)
        
    def get_images(self):
        img_path = 'img_' + self.idx + '.npz'
        dict_data = load(img_path)
        return dict_data['arr_0']
    
    def get_masks(self):
        msk_path = 'mask_' + self.idx + '.npz'
        dict_data = load(msk_path)
        return dict_data['arr_0']

# Patient Class

In [None]:
class Glomerulus():
    '''
        Glomerulus
    '''
    def __init__(self, ds):
        self.type = ds['type']
        self.id = ds['id']
        self.geometry = ds['geometry']
        self.properties = ds['properties']

class Anatomical():
    '''
        Anatomical Structures
    '''
    def __init__(self, ds):
        self.type = ds['type']
        self.id = ds['id']
        self.geometry = ds['geometry']
        self.properties = ds['properties']

class Patient():
    '''
        Contains the main class
    '''
    def __init__(self, ds, ds_masks, path):
        self.patient_number = ds['patient_number']
        self.race = ds['race']
        self.ethnicity = ds['ethnicity']
        self.sex = ds['sex']
        self.age = ds['age']
        self.weight_kilograms = ds['weight_kilograms']
        self.height_centimeters = ds['height_centimeters']
        self.bmi_kg = ds['bmi_kg/m^2']
        self.laterality = ds['laterality']
        self.percent_cortex = ds['percent_cortex']
        self.percent_medulla = ds['percent_medulla']
        
        self.image = Image(ds, df_masks, path)
        
        self.anatomical_structures_segmention_file = ds['anatomical_structures_segmention_file']
        self.glomerulus_segmentation_file = ds['glomerulus_segmentation_file']
        
        self.anatomicals = []
        ds_anatomicalies = pd.read_json(path  + self.anatomical_structures_segmention_file)
        for i in range(len(ds_anatomicalies)):
            self.anatomicals.append(Anatomical(ds_anatomicalies.iloc[i]))
            
        self.glomeruluses = []
        ds_glomerulus = pd.read_json(path + self.glomerulus_segmentation_file)
        for i in range(len(ds_glomerulus)):
            self.glomeruluses.append(Glomerulus(ds_glomerulus.iloc[i]))

In [None]:
df_masks = pd.read_csv('../input/hubmap-kidney-segmentation/train.csv').set_index('id')

In [None]:
patients = [Patient(train_ds.iloc[i], df_masks, train_path) for i in tqdm(range(len(train_ds)), desc="Creating images/masks", ncols=100, bar_format='{l_bar}{bar}|')]

# Display a Patient Kidney Image/Mask tiles

In [None]:
# load a patient images
images = patients[1].image.get_images()
images.shape

In [None]:
f, axarr = plt.subplots(1,10, figsize=(20,20))
for i in range(400,410):
    axarr[i - 400].imshow(images[i])

In [None]:
# load a patient masks
masks = patients[1].image.get_masks()
masks.shape

In [None]:
f, axarr = plt.subplots(1,10, figsize=(20,20))
for i in range(400,410):
    axarr[i - 400].imshow(masks[i])

# Conclusion

Using 256x256 and numpy .npz temp files for images/masks, we could use images as features and masks as labels.<br>
One of the easiest, machine learning starting architecture to look into is the U-Net model.