# Summary

The following notebook is used to separate the glomeruli tiles and visualize them in three different ways. 
* Firstly as raw tiles which can be used for further processing and analysis. 
![](http://)
* Secondly as annotated tiles which show the glomeruli polygon on top of the raw data
* Finally as mosaics of all the glomeruli of an image ordered by their size and labelled by their number. 

Furthermore, a preliminary analysis of the glomeruli sizes and shapes is presented at the final section


# Output dataset
The output of the notebook is stored in the dataset [HuBMAP: glomeruli tiles and mosaics](https://www.kaggle.com/anadelta/hubmap-glomeruli-tiles)


# Future work 

I am considering continuing with: 
* A more extended analysis of glomeruli geometry
* Some type of color analysis/clustering of the glomeruli

Suggestions are always welcome. 

# Utilities
The following sections contain utilities that are required for loading both the images and their annotations.

## Paths

In [None]:
import os

input_path = '/kaggle/input/hubmap-kidney-segmentation'
train_folder = os.path.join(input_path, 'train')
test_folder = os.path.join(input_path, 'test')

output_path = 'output'


class TrainingPaths:

    def __init__(self):

        self._tiff_names = [name for name in os.listdir(train_folder) if name.endswith('.tiff')]

        self._len = len(self._tiff_names)
        self._index = 0

    def __next__(self):

        while True:

            if not self._index < self._len:
                raise StopIteration

            tiff_name = self._tiff_names[self._index]
            tiff_path = os.path.join(train_folder, tiff_name)
            name = os.path.splitext(tiff_name)[0]
            annotation_path = os.path.splitext(tiff_path)[0] + '.json'

            self._index += 1

            if (os.path.isfile(tiff_path) and os.path.isfile(annotation_path)):
                break

        return name, tiff_path, annotation_path

    def __iter__(self):

        return self

## Tiff loader

In [None]:
from numpy import squeeze, transpose
from tifffile import imread


def read_tiff(file_path):

    tiff_array = imread(file_path)
    tiff_array = squeeze(tiff_array)
    if(tiff_array.shape[0] == 3):
        tiff_array = transpose(tiff_array, (1, 2, 0))

    return tiff_array

## Annotations iterator

In [None]:

from json import load

class Annotations:

    def __init__(self, file_path):

        with open(file_path) as af:
            self._glomeruli_polygons = load(af)
        self._len = len(self._glomeruli_polygons)
        self._index = 0

    def __next__(self):

        if not self._index < self._len:
            raise StopIteration

        coords = self._glomeruli_polygons[self._index]['geometry']['coordinates'][0]

        i = self._index
        x = [c[0] for c in coords]
        y = [c[1] for c in coords]

        self._index += 1

        return i, x, y

    def __iter__(self):
        return self


## Geometry methods

In [None]:

import numpy as np


def get_polygon_area(x, y):
    return 0.5*np.abs(np.dot(x, np.roll(y, 1))-np.dot(y, np.roll(x, 1)))


def get_polygon_perimeter(x, y):
    return np.sum(np.sqrt(np.power(x - np.roll(x, 1), 2) + np.power(y - np.roll(y, 1), 2)))


# Separate glomeruli tiles

Separate and store the raw and annotated glomeruli tiles

In [None]:

import os

from PIL import Image


class Tiles:

    def __init__(self, image_name):
        self._image_name = image_name
        self._tiles_folder = os.path.join(output_path,
                                          self._get_tiles_folder(),
                                          image_name)
        if not os.path.exists(self._tiles_folder):
            os.makedirs(self._tiles_folder)

    def _get_tiles_folder(self):
        return 'raw_tiles'

    def _get_tile_path(self, index):
        return os.path.join(self._tiles_folder,
                            f'{self._image_name}_{index:04}.png')

    def save(self, tile_image, index):
        tile_image.save(self._get_tile_path(index))

    def open(self, index):
        return Image.open(self._get_tile_path(index))


class AnnotatedTiles(Tiles):

    def _get_tiles_folder(self):
        return 'annotated_tiles'

In [None]:
import os

from PIL import Image, ImageDraw

def separate_glomeruli_tiles():
    print("Separating glomeruli tiles:")

    for name, tiff_path, annotation_path in TrainingPaths():
        print(name)

        kidney_array = read_tiff(tiff_path)

        tiles = Tiles(name)
        annotated_tiles = AnnotatedTiles(name)

        for index, x, y in Annotations(annotation_path):

            padd = 20  # Additional pixels outside the glomeruli bounding box

            # Calculate the borders of the tile

            x_min = max(min(x)-padd, 0)
            x_max = min(max(x)+padd, kidney_array.shape[1])
            y_min = max(min(y)-padd, 0)
            y_max = min(max(y)+padd, kidney_array.shape[0])

            # Get raw tile

            tile_image = Image.fromarray(kidney_array[y_min:y_max, x_min:x_max, :])

            # Save raw tile

            tiles.save(tile_image, index)

            # Draw the annotation polygon

            draw = ImageDraw.Draw(tile_image)
            draw.line(tuple(tuple([xd-x_min, yd-y_min]) for xd, yd in zip(x, y)),
                      fill='#3dfe22', width=4)

            # Save annotated tile

            annotated_tiles.save(tile_image, index)




In [None]:
separate_glomeruli_tiles()

# Create glomeruli mosaics

Sort all the glomeruli of an image according to their size and combine then into one representative mosaic.

The output of this step can be used in order to quickly separate and delete mislabelled glomeruli.

For higher resolution images please checkout the extracted [HuBMAP: glomeruli tiles and mosaics dataset](https://www.kaggle.com/anadelta/hubmap-glomeruli-tiles)


In [None]:
import os

import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont


class Mosaic:

    def __init__(self, width, margin):

        # Initialize mosaic image

        self._margin = margin
        self._mosaic_image = Image.new('RGB', (width, 0))

        # Initialize title font

        # self._font = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', size=self._margin)

        # Initialize the row corners

        self._row_ul = [self._margin, self._margin]  # Row upper left
        self._row_lr = [self._margin, self._margin]  # Row lower right

    def _extend_mosaic(self, new_width, new_height):

        width, height = self._mosaic_image.size

        if (new_width <= width) and (new_height <= height):
            return

        draft_mosaic = Image.new('RGB', (new_width, new_height))
        draft_mosaic.paste(self._mosaic_image, (0, 0))
        self._mosaic_image = draft_mosaic

    def _crop_mosaic(self, new_width, new_height):

        width, height = self._mosaic_image.size

        if (new_width >= width) and (new_height >= height):
            return

        self._mosaic_image = self._mosaic_image.crop((0, 0, new_width, new_height))

    def _get_mosaic_path(self, image_name):

        mosaics_folder = os.path.join(output_path, 'annotated_mosaics')
        if not os.path.exists(mosaics_folder):
            os.makedirs(mosaics_folder)

        return os.path.join(mosaics_folder, f'{image_name}.png')

    def add_tile(self, tile, title):

        width, _ = self._mosaic_image.size
        tile_width, tile_height = tile.size

        # Change row when necessary

        if(self._row_lr[0]+tile_width > width):
            self._row_ul = [self._margin, self._row_lr[1]+self._margin]
            self._row_lr = [self._margin, self._row_lr[1]+self._margin]

        # Paste tile into the mosaic

        self._extend_mosaic(width, self._row_ul[1]+tile_width)
        self._mosaic_image.paste(tile, (self._row_lr[0], self._row_ul[1]))

        # Draw the title of the tile

        # draw = ImageDraw.Draw(self._mosaic_image)
        # draw.text((self._row_lr[0], self._row_ul[1]-self._margin), str(title),
        #           font=self._font, fill='#3dfe22')

        # Update the row corners

        self._row_lr = [self._row_lr[0] + tile_width+self._margin,
                        max(self._row_lr[1], self._row_ul[1]+tile_height)]

    def save(self, image_name):

        width, _ = self._mosaic_image.size

        # Crop unused area of the mosaic

        self._crop_mosaic(width, self._row_lr[1])

        # Save the mosaic

        self._mosaic_image.save(self._get_mosaic_path(image_name))

    def show(self):

        width, height = self._mosaic_image.size
        figure_width = 20
        figure_height = figure_width * height // width

        fig, ax = plt.subplots(1, 1, sharex=True, figsize=(figure_width, figure_height))
        ax.imshow(self._mosaic_image)
        ax.axis('off')
        plt.show()

In [None]:

import os


def create_glumeruli_mosaics():

    for name, _, annotation_path in TrainingPaths():

        annotated_tiles = AnnotatedTiles(name)

        all_tiles = []
        max_tile_size = [0, 0]

        for index, x, y in Annotations(annotation_path):

            area = get_polygon_area(x, y)

            tile_image = annotated_tiles.open(index)

            max_tile_size[0] = max(max_tile_size[0], tile_image.size[0])
            max_tile_size[1] = max(max_tile_size[1], tile_image.size[1])

            all_tiles.append({'image': tile_image, 'area': area, 'index': index})

        max_width = 12000  # Maximum width of the mosaic
        margin = 40       # Empty margin between tiles in the mosaic

        mosaic = Mosaic(max_width, margin)

        for tile in sorted(all_tiles,  key=lambda x: x['area']):

            mosaic.add_tile(tile['image'], tile['index'])

        mosaic.save(name)

        mosaic.show()

In [None]:
create_glumeruli_mosaics()

# Study glomeruli

In [None]:

import os
from math import pi, log

import matplotlib.pyplot as plt


def get_glomeruli_properties():

    all_props = []
    for name, _, annotation_path in TrainingPaths():

        props = []
        for index, x, y in Annotations(annotation_path):

            area = get_polygon_area(x, y)
            perimeter = get_polygon_perimeter(x, y)

            props.append({'name': name, 'index': index, 'area': area, 'perimeter': perimeter})

        all_props.append(props)

    return all_props


def hist_glomeruli_properties(properties, properties_function, x_label, output_filename):

    fig, ax = plt.subplots(1, 2, sharex=True, figsize=(10, 4))

    all_hist_data = []
    for props in all_props:

        hist_data = [properties_function(p) for p in props]
        ax[0].hist(hist_data, histtype='step', label=props[0]['name'])

        all_hist_data.extend(hist_data)

    ax[0].legend()
    ax[0].set_xlabel(x_label)
    ax[0].set_ylabel('Frequency []')

    ax[1].hist(all_hist_data, color='black', histtype='step', label='all')
    ax[1].legend()
    ax[1].set_xlabel(x_label)
    ax[1].set_ylabel('Frequency []')

    # Save plot

    analysis_folder_path = os.path.join(output_path, 'analysis')
    if not os.path.exists(analysis_folder_path):
        os.makedirs(analysis_folder_path)

    fig.savefig(os.path.join(analysis_folder_path, output_filename))


def hist_glomeruli_area(all_props):
    hist_glomeruli_properties(all_props, lambda p: p['area']/10000,
                              'Area [dpixel^2]',
                              'hist_glomeruli_area.png')


def hist_glomeruli_roundness(all_props):
    hist_glomeruli_properties(all_props, lambda p: 4*pi*p['area']/p['perimeter']/p['perimeter'],
                              'Roundness []',
                              'hist_glomeruli_roundness.png')

In [None]:
all_props = get_glomeruli_properties()
hist_glomeruli_area(all_props)

In [None]:
hist_glomeruli_roundness(all_props)

Cleanup output

In [None]:
import shutil


def cleanup_output():

    try:
        shutil.rmtree(output_path)
    except OSError as e:
        print("Error: %s - %s." % (e.filename, e.strerror))


cleanup_output()
