Notebook obtained from https://github.com/abhinavsagar/Kaggle-Solutions/

MIT License

Copyright (c) 2019 Abhinav Sagar

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

modified by Tobias Rasse tobias.rasse@mpi-bn.mpg.de

### Importing the needed libraries

In [1]:
import os
import sys
import random
import warnings

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label

from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Dropout, Lambda
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

import tensorflow as tf

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import tifffile as tif

In [3]:
import inspect
import pickle
from glob import glob
import os
from csbdeep.utils import Path, normalize
from stardist import fill_label_holes, random_label_cmap, calculate_extents, gputools_available,_draw_polygons
from stardist.models import Config2D, StarDist2D, StarDistData2D


In [4]:
main_folder = os.path.dirname(os.path.abspath(inspect.stack()[0][1])).replace("TrainUNet","Augment")
file_path = "{}/my_runs/augment_settings_xl.pkl".format(main_folder)
infile = open(file_path,'rb')
parameter = pickle.load(infile)
print("Loading processing pipeline from",file_path)
infile.close()
aug_sets,pre_defined_pipelines,data_main_GT,Datasets_Download = parameter

Loading processing pipeline from /home/trasse/github/OpSeF-IV/Train/Augment/my_runs/augment_settings_xl.pkl


In [5]:
# define training 1
trainModelSettings = {}
trainModelSettings["root"] = data_main_GT
trainModelSettings["data"] = "DSB2018_FL_Nuc_Subset_Basic_Nuc_512"
trainModelSettings["path"] = os.path.join(trainModelSettings["root"],trainModelSettings["data"])

In [6]:
print(trainModelSettings["path"])

/mnt/ag-microscopy/SampleDataML/OpSeF_XL_Data/GT/DSB2018_FL_Nuc_Subset_Basic_Nuc_512


In [7]:
X = sorted(glob('{}/train/images/*.tif'.format(trainModelSettings["path"])))
Y = sorted(glob('{}/train/masks/*.tif'.format(trainModelSettings["path"])))
assert all(Path(x).name==Path(y).name for x,y in zip(X,Y))

# load data
X = list(map(tif.imread,X))
Y = list(map(tif.imread,Y))
n_channel = 1 if X[0].ndim == 2 else X[0].shape[-1]
    
# Normalize images and fill small label holes
axis_norm = (0,1)   # normalize channels independently
# axis_norm = (0,1,2) # normalize channels jointly

if n_channel > 1:
    print("Normalizing image channels %s." % ('jointly' if axis_norm is None or 2 in axis_norm else 'independently'))
    sys.stdout.flush()
X = [normalize(x,1,99.8,axis=axis_norm) for x in tqdm(X)]
Y = [fill_label_holes(y) for y in tqdm(Y)]
    
# Split into train and validation datasets.
assert len(X) > 1, "not enough training data"
rng = np.random.RandomState(42)
ind = rng.permutation(len(X))
n_val = max(1, int(round(0.15 * len(ind))))
ind_train, ind_val = ind[:-n_val], ind[-n_val:]
X_val, Y_val = [X[i] for i in ind_val]  , [Y[i] for i in ind_val]
X_trn, Y_trn = [X[i] for i in ind_train], [Y[i] for i in ind_train] 
print('number of images: %3d' % len(X))
print('- training:       %3d' % len(X_trn))
print('- validation:     %3d' % len(X_val))
    

100%|██████████| 693/693 [00:01<00:00, 414.79it/s]
100%|██████████| 693/693 [00:05<00:00, 125.36it/s]

number of images: 693
- training:       589
- validation:     104





seed = 42

# Get and resize train images and masks
X_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
Y_train = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)

print('Getting and resizing train images and masks ... ')
sys.stdout.flush()
for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
    path = TRAIN_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_train[n] = img
    mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.bool)
    for mask_file in next(os.walk(path + '/masks/'))[2]:
        mask_ = imread(path + '/masks/' + mask_file)
        mask_ = np.expand_dims(resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant', 
                                      preserve_range=True), axis=-1)
        mask = np.maximum(mask, mask_)
    Y_train[n] = mask

# Get and resize test images
X_test = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
sizes_test = []
print('Getting and resizing test images ... ')
sys.stdout.flush()
for n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
    path = TEST_PATH + id_
    img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]
    sizes_test.append([img.shape[0], img.shape[1]])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    X_test[n] = img

print('Done!')

In [8]:
print(X[0].shape)
if len(X[0].shape) == 2:
    IMG_HEIGHT, IMG_WIDTH = X[0].shape
    IMG_CHANNELS = 1
else:
    IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS = X[0].shape

(512, 512)


In [9]:
jkjkjk

NameError: name 'jkjkjk' is not defined

###  3. Creating the U-net model

In [10]:
# Define IoU metric
def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tf.to_int32(y_pred > t)
        score, up_opt = tf.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tf.local_variables_initializer())
        with tf.control_dependencies([up_opt]):
            score = tf.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)

In [11]:
import keras

In [12]:
from keras.layers import Input

In [13]:
# Build U-Net model
inputs = keras.layers.Input((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS))
s = Lambda(lambda x: x / 255) (inputs)

c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (s)
c1 = Dropout(0.1) (c1)
c1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c1)
p1 = MaxPooling2D((2, 2)) (c1)

c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p1)
c2 = Dropout(0.1) (c2)
c2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c2)
p2 = MaxPooling2D((2, 2)) (c2)

c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p2)
c3 = Dropout(0.2) (c3)
c3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c3)
p3 = MaxPooling2D((2, 2)) (c3)

c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p3)
c4 = Dropout(0.2) (c4)
c4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c4)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)

c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (p4)
c5 = Dropout(0.3) (c5)
c5 = Conv2D(256, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c5)

u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
u6 = concatenate([u6, c4])
c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u6)
c6 = Dropout(0.2) (c6)
c6 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c6)

u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
u7 = concatenate([u7, c3])
c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u7)
c7 = Dropout(0.2) (c7)
c7 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c7)

u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
u8 = concatenate([u8, c2])
c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u8)
c8 = Dropout(0.1) (c8)
c8 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c8)

u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
u9 = concatenate([u9, c1], axis=3)
c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (u9)
c9 = Dropout(0.1) (c9)
c9 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same') (c9)

outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)

model = Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[mean_iou])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 1)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 512, 512, 1)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 512, 512, 16) 160         lambda_1[0][0]                   
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 512, 512, 16) 0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (

###  4. Training

In [45]:
trainModelSettings["epochs"] = 2
trainModelSettings["steps_per_epoch"] = 100

In [46]:
def to_array(img_list,IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS,my_datatype,add_axis = False):
    # convers list to array
    IMG_arr = np.zeros((len(img_list),1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=my_datatype)
    for n in range(len(X_val)):
        if add_axis:
            IMG_arr[n,0,:,:,0] = img_list[n]
        else:
            IMG_arr[n,0,:,:,:] = img_list[n]
    return IMG_arr

In [47]:
# go from lists to arrays
X_trn_arr = to_array(X_trn,IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS,np.uint8,True)
Y_trn_arr = to_array(Y_trn,IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS,np.uint8)
X_val_arr = to_array(X_val,IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS,np.uint8,True)
Y_val_arr = to_array(Y_val,IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS,np.uint8)

In [48]:
print(X_trn_arr.shape)

(589, 1, 512, 512, 1)


In [49]:
#creating a training and validation generator that generate masks and images
train_generator = zip(X_trn_arr, Y_trn_arr)
val_generator = zip(X_val_arr, Y_val_arr)

In [28]:
# go from list to arrays
X_val_arr = np.zeros((len(X_val), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=my_datatype)
for n in range(len(X_val)):
    X_val_arr[n,:,:,0] = X_val[n]
Y_val_arr = np.zeros((len(Y_val), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
for n in range(len(X_val)):
    Y_val_arr[n,:,:,0] = Y_val[n]  

X_val_arr = np.zeros((len(X_val), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
for n in range(len(X_val)):
    X_val_arr[n,:,:,0] = X_val[n]
Y_val_arr = np.zeros((len(Y_val), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)
for n in range(len(X_val)):
    Y_val_arr[n,:,:,0] = Y_val[n]   
    
    

In [24]:
#creating a training and validation generator that generate masks and images
train_generator = zip(X_trn, Y_trn)
val_generator = zip(X_val, Y_val)

In [50]:
# Fit model
results = model.fit_generator(train_generator, validation_data=val_generator, validation_steps=10, steps_per_epoch=100,
                              epochs=3)

Epoch 1/3


InvalidArgumentError: assertion failed: [`labels` out of bound] [Condition x < y did not hold element-wise:] [x (metrics/mean_iou/mean_iou_5/confusion_matrix/control_dependency:0) = ] [0 0 0...] [y (metrics/mean_iou/mean_iou_5/ToInt64_2:0) = ] [2]
	 [[{{node metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert}} = Assert[T=[DT_STRING, DT_STRING, DT_STRING, DT_INT64, DT_STRING, DT_INT64], summarize=3, _device="/job:localhost/replica:0/task:0/device:CPU:0"](metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/Switch/_559, metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/data_0, metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/data_1, metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/data_2, metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/Switch_1/_561, metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/data_4, metrics/mean_iou/mean_iou_5/confusion_matrix/assert_less/Assert/AssertGuard/Assert/Switch_2/_563)]]

In [None]:
# Fit model
earlystopper = EarlyStopping(patience=3, verbose=1)
checkpointer = ModelCheckpoint('model-dsbowl2018-1.h5', verbose=1, save_best_only=True)
results = model.fit_generator(train_generator, validation_data=val_generator, validation_steps=10, steps_per_epoch=250,
                              epochs=3, callbacks=[earlystopper, checkpointer])

In [18]:
model.fit(X_trn, Y_trn, validation_data=(X_val,Y_val),
            epochs=trainModelSettings["epochs"] , steps_per_epoch=trainModelSettings["steps_per_epoch"])

ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 array(s), but instead got the following list of 589 arrays: [array([[0.2       , 0.14117648, 0.15294118, ..., 0.05098039, 0.07058824,
        0.03529412],
       [0.09411765, 0.04313726, 0.07843138, ..., 0.06666667, 0.07058824,
        0.03529412],
       [0.0...

In [None]:
    # train model
    model = StarDist2D(conf, name=trainModelSettings["name"], basedir = trainModelSettings["basedir_StarDist_Train"])
    median_size = calculate_extents(list(Y), np.median)
    fov = np.array(model._axes_tile_overlap('YX'))
    if any(median_size > fov):
        print("WARNING: median object size larger than field of view of the neural network.")
    model.train(X_trn, Y_trn, validation_data=(X_val,Y_val), augmenter=augmenter,
                epochs=trainModelSettings["epochs"] , steps_per_epoch=trainModelSettings["steps_per_epoch"])
    model.optimize_thresholds(X_val, Y_val)

###  5. Prediction

In [None]:
# Predict on train, val and test
model = load_model('model-dsbowl2018-1.h5', custom_objects={'mean_iou': mean_iou})
preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1)
preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1)
preds_test = model.predict(X_test, verbose=1)

# Threshold predictions
preds_train_t = (preds_train > 0.5).astype(np.uint8)
preds_val_t = (preds_val > 0.5).astype(np.uint8)
preds_test_t = (preds_test > 0.5).astype(np.uint8)

# Create list of upsampled test masks
preds_test_upsampled = []
for i in range(len(preds_test)):
    preds_test_upsampled.append(resize(np.squeeze(preds_test[i]), 
                                       (sizes_test[i][0], sizes_test[i][1]), 
                                       mode='constant', preserve_range=True))

In [None]:
# Perform a sanity check on some random training samples
ix = random.randint(0, len(preds_train_t))
imshow(X_train[ix])
plt.show()
imshow(np.squeeze(Y_train[ix]))
plt.show()
imshow(np.squeeze(preds_train_t[ix]))
plt.show()

In [None]:
# Perform a sanity check on some random validation samples
ix = random.randint(0, len(preds_val_t))
imshow(X_train[int(X_train.shape[0]*0.9):][ix])
plt.show()
imshow(np.squeeze(Y_train[int(Y_train.shape[0]*0.9):][ix]))
plt.show()
imshow(np.squeeze(preds_val_t[ix]))
plt.show()

In [None]:
# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)

In [None]:
new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(preds_test_upsampled[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))