# Intro
Hello! This rather quick and dirty kernel shows how to get started on segmenting nuclei using a neural network in Keras. 

The architecture used is the so-called [U-Net](https://arxiv.org/abs/1505.04597), which is very common for image segmentation problems such as this. I believe they also have a tendency to work quite well even on small datasets.

Let's get started importing everything we need!

In [1]:
import os
import sys
import random
import warnings
import numpy as np
import pandas as pd
import glob

import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from scipy import ndimage as ndi

from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
K.set_session(session)

# Set some parameters
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3
TEST_PATH = '/media/peter/DATA/stage2_test_final/'

warnings.filterwarnings('ignore', category=UserWarning, module='skimage')

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Get train and test IDs
test_ids = next(os.walk(TEST_PATH))[1]

In [3]:
# train_ids = train_ids[:70]

In [4]:
from skimage.draw import circle
from skimage.color import gray2rgb
import math

# Get and resize test images
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
sizes_test = []
print('Getting and resizing test images ... ')
sys.stdout.flush()
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = imread(path + '/images/' + id_ + '.png', as_grey=False)
    if img.ndim==2: img = gray2rgb(img)
    img = img[:,:,:IMG_CHANNELS]
    sizes_test.append(img.shape[:2])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

print('Done!')

Getting and resizing test images ... 


100%|██████████| 3019/3019 [00:32<00:00, 92.84it/s]

Done!





In [5]:
from keras.layers import Lambda, Add, Activation, UpSampling2D, Dropout, Average
from keras.optimizers import Adam
from keras.losses import mean_squared_error
from keras.metrics import binary_accuracy

MODELS_DIR = '/media/peter/DATA/model_to_ensemble'

def ensemble(models):
    model_input = Input(shape=(IMG_HEIGHT, IMG_HEIGHT, IMG_CHANNELS))
    input_branchs = [Dropout(0)(model_input) for _ in range(len(models))]
    input_model = Model([model_input], input_branchs)
    model_outputs = []
    input_ = [Input(shape=(IMG_HEIGHT, IMG_HEIGHT, IMG_CHANNELS), name='t_input_c_%d'%n) for n in range(len(models))]
    for n, (input_l, model) in enumerate(zip(input_, models)):
        model.name = 'ensemble_t_output_%d'%n
        model_outputs.append(model(input_l))
    ensemble_m_output = Average() (model_outputs)
    ensemble_t_model = Model(input_, [ensemble_m_output])
    ensemble_s_output = ensemble_t_model(input_model(model_input))
    return Model([model_input], [ensemble_s_output], name='ensemble')
            
models = [load_model(model_name, custom_objects={'mean_iou': binary_accuracy, 'custom_loss': mean_squared_error, 'mean_iou_marker': binary_accuracy}) for model_name in glob.glob(MODELS_DIR+'/*.h5')]
model = ensemble(models)
model.compile(optimizer='sgd', loss='mean_squared_error') # need to compile
model.summary()
model.save('ensemble.h5')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 256, 256, 3)  0                                            
__________________________________________________________________________________________________
model_1 (Model)                 [(None, 256, 256, 3) 0           input_1[0][0]                    
__________________________________________________________________________________________________
model_2 (Model)                 (None, 256, 256, 3)  31522694    model_1[1][0]                    
                                                                 model_1[1][1]                    
                                                                 model_1[1][2]                    
                                                                 model_1[1][3]                    
Total para

In [6]:
from keras.utils import Sequence
import cv2
from sklearn.utils import shuffle
from skimage.transform import AffineTransform, warp
import copy

BS=1


In [7]:
from skimage.morphology import closing, square, remove_small_objects
from skimage.segmentation import clear_border
from skimage.filters import threshold_otsu

# Predict on testing set
preds_test = model.predict(X_test, verbose=1, batch_size=BS)
preds_test, preds_test_marker, preds_test_dt = np.transpose(preds_test, (3,0,1,2))

# Create list of upsampled test masks
preds_test_upsampled = []
for i in range(len(preds_test)):
    preds_test_upsampled.append((resize(np.squeeze(preds_test[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True),
                                 resize(np.squeeze(preds_test_marker[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True),
                                 resize(np.squeeze(preds_test_dt[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True)
                                ))



In [8]:
from skimage.morphology import watershed

# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def lb(image, marker, distance):
    if np.sum(image) < np.sum(marker):
        image = marker
    else:
        marker = np.array((marker==1) & (image==1))
    markers = ndi.label(marker)[0]
    labels = watershed(-distance, markers, mask=image)
    if np.sum(labels) == 0:
        labels[0,0] = 1
    return labels

def prob_to_rles(x, marker, dt):
    x_thres = threshold_otsu(x)
    marker_thres = threshold_otsu(marker)
    lab_img = lb(x > x_thres, marker > marker_thres, dt)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)

In [9]:
new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(*preds_test_upsampled[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))

In [10]:
# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = new_test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
sub.to_csv('sub-dsbowl2018-ensemble.csv', index=False)