### Prepare Data

#### Download Dataset

In [None]:
! pip install -q kaggle
! mkdir -p ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle competitions download -c hubmap-organ-segmentation
! mkdir -p dataset
! unzip -q hubmap-organ-segmentation.zip -d dataset
! rm -rf hubmap-organ-segmentation.zip

Downloading hubmap-organ-segmentation.zip to /content
100% 5.76G/5.78G [00:25<00:00, 249MB/s]
100% 5.78G/5.78G [00:25<00:00, 239MB/s]


#### Install and Import

In [None]:
! pip install -q patchify
! pip install -q segmentation-models

[?25l[K     |█████▋                          | 10 kB 27.0 MB/s eta 0:00:01[K     |███████████▎                    | 20 kB 27.5 MB/s eta 0:00:01[K     |████████████████▉               | 30 kB 27.8 MB/s eta 0:00:01[K     |██████████████████████▌         | 40 kB 30.2 MB/s eta 0:00:01[K     |████████████████████████████    | 51 kB 32.6 MB/s eta 0:00:01[K     |████████████████████████████████| 58 kB 3.8 MB/s 
[?25hLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting segmentation-models
  Downloading segmentation_models-1.0.1-py3-none-any.whl (33 kB)
Collecting image-classifiers==1.0.0
  Downloading image_classifiers-1.0.0-py3-none-any.whl (19 kB)
Collecting keras-applications<=1.0.8,>=1.0.7
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 1.3 MB/s 
[?25hCollec

In [None]:
import json
import os
import pathlib
import warnings
import random

import cv2
import matplotlib.pyplot as plt
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import patchify
import PIL
import tensorflow as tf
from matplotlib import animation
from PIL import Image
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tqdm import tqdm

from Manager import InputMode, Manager
from Augment import Augment
from DiceLoss import DiceLoss
from DiceMetric import DiceCoefficient
from UNet import UNet

# import segmentation_models as sm

# from segmentation_models.losses import DiceLoss
# from segmentation_models.metrics import IOUScore

# sm.set_framework('tf.keras')
os.environ['TF_CUDNN_DETERMINISTIC'] = 'false'
os.environ['TF_DETERMINISTIC_OPS'] = 'false'
os.environ['TF_DISABLE_SEGMENT_REDUCTION_OP_DETERMINISM_EXCEPTIONS'] = 'true'


Segmentation Models: using `keras` framework.


#### Create Image Masks

In [None]:
train_df = pd.read_csv('dataset/train.csv')
# sort train_df by id and reset index
train_df = train_df.sort_values('id')
train_df = train_df.reset_index(drop=True)

In [None]:
!mkdir -p dataset/train_masks

def rle2mask(mask_rle, shape=(1600,256)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1 * 255
    return img.reshape(shape).T

for id in tqdm(train_df['id']):
    rle = train_df[train_df['id'] == id]['rle'].values[0]
    width = train_df[train_df['id'] == id]['img_width'].values[0]
    height = train_df[train_df['id'] == id]['img_height'].values[0]

    mask = rle2mask(rle, (height, width))
    cv2.imwrite(f'dataset/train_masks/{id}.png', mask)

100%|██████████| 351/351 [00:39<00:00,  8.97it/s]


#### Patchify Images and Masks

In [None]:
def get_correct_dimension(number, divisible):
    if number % divisible == 0:
        return number
    else:
        return number + divisible - (number % divisible)

def correct_image_size(img, size):
    if img.shape[0] != size:
        img = cv2.resize(img, (size, size))
    return img

def display_overlay(image, mask):
    plt.figure(figsize = (7,7))
    plt.imshow(image)
    plt.imshow(mask, alpha=0.5)

In [None]:
train_masks_dir = 'dataset/train_masks'
train_masks = [name for name in os.listdir(train_masks_dir)]
train_masks = sorted(train_masks, key=lambda x: int(x.split('.')[0]))

train_images_dir = 'dataset/train_images'
train_images = [name for name in os.listdir(train_images_dir)]
train_images = sorted(train_images, key=lambda x: int(x.split('.')[0]))

assert len(train_masks) == train_df.shape[0]
assert len(train_images) == len(train_masks)

In [None]:
# Equal number of elements in each sub-list
def subdivide_list(the_list, n_elems):
    return [the_list[i:i+n_elems] for i in range(0,len(the_list), n_elems)]

# Different number of elements for validation and test sub-list
def subdivide_list_2(the_list, test_size):
    train_list_size = len(the_list) - (2*test_size)
    train_list = [the_list[i:i+train_list_size] for i in range(0, train_list_size, train_list_size)]
    val_list = [the_list[i:i+test_size] for i in range(train_list_size, train_list_size+test_size,train_list_size+test_size )]
    test_list = [the_list[i:i+test_size] for i in range(train_list_size+test_size, train_list_size+test_size+test_size, train_list_size+test_size+test_size)]
    return train_list + val_list + test_list

PATCH_SIZE = 1024
NUMBER_OF_MEMBERS = 117
TEST_SIZE = 65

train_images = subdivide_list_2(train_images, TEST_SIZE)
train_masks = subdivide_list_2(train_masks, TEST_SIZE)

print("Train Images")
for index, value in enumerate(train_images):
    group_path = r'dataset/train_images_' + str(PATCH_SIZE) + '_' + str(index)
    if not os.path.exists(group_path):
        os.makedirs(group_path)

    for train_image_filename in tqdm(value):
        image_id = int(train_image_filename.split('.')[0])
        train_image_file = train_images_dir + '/' + train_image_filename
        image = cv2.imread(train_image_file)
        image = correct_image_size(image, get_correct_dimension(image.shape[0], PATCH_SIZE))

        patched_image = patchify.patchify(image, (PATCH_SIZE, PATCH_SIZE, 3), step=PATCH_SIZE)
        for i in range(patched_image.shape[0]):
            for j in range(patched_image.shape[1]):
                cv2.imwrite(group_path + f'/{image_id}_{i}_{j}.png', patched_image[i, j, 0, :, :, :])

print("Train Masks")
for index, value in enumerate(train_masks):
    group_path = r'dataset/train_masks_' + str(PATCH_SIZE) + '_' + str(index)
    if not os.path.exists(group_path):
        os.makedirs(group_path)

    for train_mask_filename in tqdm(value):
        image_id = int(train_mask_filename.split('.')[0])
        train_mask_file = train_masks_dir + '/' + train_mask_filename
        image = cv2.imread(train_mask_file, cv2.IMREAD_GRAYSCALE)
        image = correct_image_size(image, get_correct_dimension(image.shape[0], PATCH_SIZE))

        patched_mask = patchify.patchify(image, (PATCH_SIZE, PATCH_SIZE), step=PATCH_SIZE)
        for i in range(patched_mask.shape[0]):
            for j in range(patched_mask.shape[1]):
                mask = patched_mask[i, j, :, :]
                mask_location = group_path + f'/{image_id}_{i}_{j}.png'
                cv2.imwrite(mask_location, mask)

                if index < 1 :
                    # Adding augmented duplicate images and masks for non black masks
                    mask = mask / 255.0
                    n_pixels = mask.shape[0] * mask.shape[1]
                    n_white_pixels = mask.sum()
                    white_ratio = n_white_pixels/n_pixels
                    if white_ratio >= 0.05:
                        mask = mask * 255.0
                        corresponding_image_file = 'dataset/train_images_' + str(PATCH_SIZE) + '_' + str(index) + f'/{image_id}_{i}_{j}.png'
                        corresponding_image = cv2.imread(corresponding_image_file)

                        augment = Augment(seed=random.randint(0,1000))

                        mask = np.expand_dims(mask, 2)

                        augmented_image, augmented_mask = augment(corresponding_image, mask)

                        cv2.imwrite(corresponding_image_file.replace('.png', '_copy_1.png'), augmented_image.numpy())
                        cv2.imwrite(mask_location.replace('.png', '_copy_1.png'), augmented_mask.numpy())

                        augment = Augment(seed=random.randint(0,1000))
                        augmented_image, augmented_mask = augment(corresponding_image, mask)

                        cv2.imwrite(corresponding_image_file.replace('.png', '_copy_2.png'), augmented_image.numpy())
                        cv2.imwrite(mask_location.replace('.png', '_copy_2.png'), augmented_mask.numpy())

                        augment = Augment(seed=random.randint(0,1000))
                        augmented_image, augmented_mask = augment(corresponding_image, mask)

                        cv2.imwrite(corresponding_image_file.replace('.png', '_copy_3.png'), augmented_image.numpy())
                        cv2.imwrite(mask_location.replace('.png', '_copy_3.png'), augmented_mask.numpy())

Train Images


100%|██████████| 221/221 [02:31<00:00,  1.46it/s]
100%|██████████| 65/65 [00:47<00:00,  1.37it/s]
100%|██████████| 65/65 [00:46<00:00,  1.40it/s]


Train Masks


100%|██████████| 221/221 [05:54<00:00,  1.60s/it]
100%|██████████| 65/65 [00:05<00:00, 12.29it/s]
100%|██████████| 65/65 [00:05<00:00, 12.52it/s]
