In [1]:
#################
# path config
#################
ROOT_FOLDER = '/Users/keerat/Documents/Research/'

FILE_PATTERN = '*.jpg'

OUTPUT_FILE_EXT = '.png'

### Set to True if testset you are predicting stage2 folder
is_stg2 = False

### How much extra margin we want to include when cropping the output images
margin = 0.15
#margin = 0.4   #0.4 seems to work best for my classifier

### Input folders
TRAINSET_INPUT_FOLDER = ROOT_FOLDER + '/input/train'
TESTSET_INPUT_FOLDER = ROOT_FOLDER + '/input/test_stg2' if is_stg2 else ROOT_FOLDER + '/input/test'
ADDSET_INPUT_FOLDER = ROOT_FOLDER + '/input/additional'

### Output folders
TESTSET_OUTPUT_FOLDER = ROOT_FOLDER + '/input/test_stg2_roi_{}'.format(margin) if is_stg2 else ROOT_FOLDER + '/input/test_roi_{}'.format(margin)
TRAINSET_OUTPUT_FOLDER = ROOT_FOLDER + '/input/train_roi_{}'.format(margin)
ADDSET_OUTPUT_FOLDER = ROOT_FOLDER + '/input/additional_roi_{}'.format(margin)


### Temp working folders
TRAINSET_RESIZED_FOLDER = ROOT_FOLDER + '/input/train_resized'
TESTSET_RESIZED_FOLDER = ROOT_FOLDER + '/input/test_stg2_resized' if is_stg2 else ROOT_FOLDER + '/input/test_resized'
ADDSET_RESIZED_FOLDER = ROOT_FOLDER + '/input/additional_resized'
VISUAL_RESIZED_FOLDER = ROOT_FOLDER + '/input/visual_resized'
TRAINSET_RESIZED_MASK_FOLDER = ROOT_FOLDER + '/input/train_resized_mask'

UNET_TRAIN_SPLIT_FOLDER = ROOT_FOLDER + '/input/split_unet/train_split/'
UNET_TRAINMASK_SPLIT_FOLDER = ROOT_FOLDER + '/input/split_unet/train_mask_split/'

UNET_VAL_SPLIT_FOLDER = ROOT_FOLDER + '/input/split_unet/val_split/'
UNET_VALMASK_SPLIT_FOLDER = ROOT_FOLDER + '/input/split_unet/val_mask_split/'

#################
# other parameters
#################
ClassNames = ['Type_1', 'Type_2', 'Type_3']

from sys import platform
use_symlinks = platform == "linux" or platform == "linux2" or platform == "darwin"

seed = 20170804
split_proportion = 0.8

learning_rate = 0.0001
nbr_epochs = 400
batch_size = 32

# Size could be: 64, 80, 144, 128
img_width = 128
img_height = 128
nb_channels = 3

# Augmentation
shear_range = 0.78
zoom_range = 0.4
rotation_range = 180
vflip = True
hflip = True
width_shift_range = 0.3
height_shift_range = 0.3

# preprocessing
rescale = 1. / 255.
preprocessing_function = None

# folder name
info = 'unet' \
       + '_' + str(img_height) + 'x' + str(img_width) + 'x' + str(nb_channels) \
       + '_sp' + str(split_proportion) \
       + '_sh' + str(shear_range) \
       + '_zm' + str(zoom_range) \
       + '_rt' + str(rotation_range) \
       + '_vf' + str(int(vflip)) \
       + '_hf' + str(int(hflip)) \
       + '_ws' + str(width_shift_range) \
       + '_hs' + str(height_shift_range)

In [2]:
from keras import backend as K
from keras.layers import Input, MaxPooling2D, UpSampling2D, Conv2D
from keras.layers import concatenate
from keras.models import Model
from keras.optimizers import Adam


smooth = 1.


def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)


def create_model(img_height, img_width, nb_channels, learning_rate):
    if K.image_dim_ordering() == 'th':
        channel_axis = 1
        inputs = Input((nb_channels, img_height, img_width))
    else:
        channel_axis = 3
        inputs = Input((img_height, img_width, nb_channels))
    print('K.image_dim_ordering={} Channel axis={}'.format(K.image_dim_ordering(), channel_axis))

    # inputs = Input((1, img_rows, img_cols))
    conv1 = Conv2D(32, (3, 3), padding="same", activation="relu")(inputs)
    conv1 = Conv2D(32, (3, 3), padding="same", activation="relu")(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), padding="same", activation="relu")(pool1)
    conv2 = Conv2D(64, (3, 3), padding="same", activation="relu")(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), padding="same", activation="relu")(pool2)
    conv3 = Conv2D(128, (3, 3), padding="same", activation="relu")(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), padding="same", activation="relu")(pool3)
    conv4 = Conv2D(256, (3, 3), padding="same", activation="relu")(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), padding="same", activation="relu")(pool4)
    conv5 = Conv2D(512, (3, 3), padding="same", activation="relu")(conv5)

    up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=channel_axis)
    conv6 = Conv2D(256, (3, 3), padding="same", activation="relu")(up6)
    conv6 = Conv2D(256, (3, 3), padding="same", activation="relu")(conv6)

    up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=channel_axis)
    conv7 = Conv2D(128, (3, 3), padding="same", activation="relu")(up7)
    conv7 = Conv2D(128, (3, 3), padding="same", activation="relu")(conv7)

    up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=channel_axis)
    conv8 = Conv2D(64, (3, 3), padding="same", activation="relu")(up8)
    conv8 = Conv2D(64, (3, 3), padding="same", activation="relu")(conv8)

    up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=channel_axis)
    conv9 = Conv2D(32, (3, 3), padding="same", activation="relu")(up9)
    conv9 = Conv2D(32, (3, 3), padding="same", activation="relu")(conv9)

    conv10 = Conv2D(nb_channels, (1, 1), activation='sigmoid')(conv9)

    model = Model(inputs=inputs, outputs=conv10)
    model.compile(optimizer=Adam(lr=learning_rate), loss=dice_coef_loss, metrics=[dice_coef])
    return model


def load_model(img_height, img_width, nb_channels, learning_rate, weight_file):
    # Load model
    print('Loading {} ...'.format(weight_file))
    model = create_model(img_height, img_width, nb_channels, learning_rate)
    model.load_weights(weight_file)
    return model

Using TensorFlow backend.


In [3]:
from keras.preprocessing.image import ImageDataGenerator


def getCombinedImageDataGenerator(x_folder, y_folder, debug=False):
    # we create two instances with the same arguments
    data_gen_args = dict(
        featurewise_center=False,
        featurewise_std_normalization=False,
        rescale=rescale,
        preprocessing_function=preprocessing_function,
        shear_range=shear_range,
        zoom_range=zoom_range,
        rotation_range=rotation_range,
        width_shift_range=width_shift_range,
        height_shift_range=height_shift_range,
        vertical_flip=vflip,
        horizontal_flip=hflip)

    image_datagen = ImageDataGenerator(**data_gen_args)
    mask_datagen = ImageDataGenerator(**data_gen_args)

    image_generator = image_datagen.flow_from_directory(
        x_folder,
        class_mode=None,
        seed=seed,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle=True,
        save_to_dir=VISUAL_RESIZED_FOLDER if debug else None,
        save_prefix='train' if debug else None,
        follow_links=use_symlinks
    )

    mask_generator = mask_datagen.flow_from_directory(
        y_folder,
        class_mode=None,
        seed=seed,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        shuffle=True,
        save_to_dir=VISUAL_RESIZED_FOLDER if debug else None,
        save_prefix='mask' if debug else None,
        follow_links=use_symlinks
    )

    # combine generators into one which yields image and masks
    try:
        from itertools import izip
    except ImportError:  #python3.x
        izip = zip
    combined_generator = izip(image_generator, mask_generator)
    return combined_generator

In [4]:
def cv2_morph_close(binary_image, size=5):
    import cv2
    from skimage.morphology import disk
    kernel = disk(size)
    result = cv2.morphologyEx(binary_image, cv2.MORPH_CLOSE, kernel)
    return result


def cv2_morph_open(binary_image, size=5):
    import cv2
    from skimage.morphology import disk
    kernel = disk(size)
    result = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)
    return result


def morphology_clean(mask_binary):
    return cv2_morph_close(cv2_morph_open(mask_binary))


def getTimestamp():
    import datetime
    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")


def save_training_history(info, history):
    import matplotlib.pyplot as plt
    # list all data in history
    print(history.history.keys())
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.gcf().savefig('./' + info + '/loss_history.' + getTimestamp() + '.jpg')
    # plt.show()

    # summarize history for dice_coef
    plt.plot(history.history['dice_coef'])
    plt.plot(history.history['val_dice_coef'])
    plt.title('model dice_coef')
    plt.ylabel('dice_coef')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.gcf().savefig('./' + info + '/dice_coef_history.' + getTimestamp() + '.jpg')
    # plt.show()

    # history to json file
    import json

    with open('./' + info + '/log.' + getTimestamp() + '.json', 'w') as fp:
        json.dump(history.history, fp, indent=True)

In [5]:
import glob
import math
import os
from shutil import copyfile

os.environ['KMP_DUPLICATE_LIB_OK']='True'

from PIL import ImageFile
from keras.callbacks import ModelCheckpoint

if __name__ == "__main__":
    # To allow premature JPG
    ImageFile.LOAD_TRUNCATED_IMAGES = True

    if not os.path.exists('./' + info):
        os.makedirs('./' + info)

    UNET_IMAGE_FORMAT = '*.png'

    nbr_train_samples = len(glob.glob(os.path.join(UNET_TRAIN_SPLIT_FOLDER, '*', UNET_IMAGE_FORMAT)))
    nbr_validation_samples = len(glob.glob(os.path.join(UNET_VAL_SPLIT_FOLDER, '*', UNET_IMAGE_FORMAT)))

    # autosave best Model
    best_model_file = os.path.join(info, 'weights.h5')
    best_model = ModelCheckpoint(best_model_file, monitor='val_loss', verbose=1, save_best_only=True)

    if os.path.exists(best_model_file):
        print('WARNING: Resume model and weights from previous training ...')
        # Backup previous model file
        copyfile(best_model_file, best_model_file + '.' + getTimestamp())
        model = load_model(img_height, img_width, nb_channels, learning_rate, best_model_file)
        model.summary()
    else:
        print('Using UNET impls  ... save best model to:{}'.format(best_model_file))
        model = create_model(img_height, img_width, nb_channels, learning_rate)
        model.summary()

    steps_per_epoch = math.ceil(1. * nbr_train_samples / batch_size)
    validation_steps = math.ceil(1. * nbr_validation_samples / batch_size)
    print('steps_per_epoch={} , validation_steps={} epochs={}'.format(steps_per_epoch, validation_steps, nbr_epochs))
    if steps_per_epoch <= 0:
        raise AssertionError("Found 0 train samples")
    if validation_steps <= 0:
        raise AssertionError("Found 0 validation samples")


    train_generator = getCombinedImageDataGenerator(
        x_folder=UNET_TRAIN_SPLIT_FOLDER,
        y_folder=UNET_TRAINMASK_SPLIT_FOLDER
    )
    validation_generator = getCombinedImageDataGenerator(
        x_folder=UNET_VAL_SPLIT_FOLDER,
        y_folder=UNET_VALMASK_SPLIT_FOLDER
    )

    print('Start training using ImageDataGenerator:')
    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=steps_per_epoch,
        epochs=nbr_epochs,
        validation_data=validation_generator,
        validation_steps=validation_steps,
        callbacks=[best_model],
        verbose=1)

    save_training_history(info, history)

Loading unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5 ...
K.image_dim_ordering=tf Channel axis=3
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 128, 128, 32) 896         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 128, 128, 32) 9248        conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 64, 64, 32)   0           conv2d_2[0][0]       

Epoch 3/400

Epoch 00003: val_loss improved from -0.54965 to -0.59444, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 4/400

Epoch 00004: val_loss improved from -0.59444 to -0.60225, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 5/400

Epoch 00005: val_loss did not improve from -0.60225
Epoch 6/400

Epoch 00006: val_loss improved from -0.60225 to -0.61180, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 7/400

Epoch 00007: val_loss improved from -0.61180 to -0.62137, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 8/400

Epoch 00008: val_loss improved from -0.62137 to -0.62342, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 9/400

Epoch 00009: val_loss did not improve from -0.62342
Epoch 10/400

Epoch 00010: val_loss improved from -0.62342 to -0.64487, saving m


Epoch 00038: val_loss improved from -0.67677 to -0.67995, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 39/400

Epoch 00039: val_loss did not improve from -0.67995
Epoch 40/400

Epoch 00040: val_loss did not improve from -0.67995
Epoch 41/400

Epoch 00041: val_loss did not improve from -0.67995
Epoch 42/400

Epoch 00042: val_loss did not improve from -0.67995
Epoch 43/400

Epoch 00043: val_loss improved from -0.67995 to -0.69064, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 44/400

Epoch 00044: val_loss improved from -0.69064 to -0.69360, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 45/400

Epoch 00045: val_loss did not improve from -0.69360
Epoch 46/400

Epoch 00046: val_loss did not improve from -0.69360
Epoch 47/400

Epoch 00047: val_loss did not improve from -0.69360
Epoch 48/400

Epoch 00048: val_loss did not improve from -0.69360
Epoch 49


Epoch 00075: val_loss did not improve from -0.72116
Epoch 76/400

Epoch 00076: val_loss did not improve from -0.72116
Epoch 77/400

Epoch 00077: val_loss did not improve from -0.72116
Epoch 78/400

Epoch 00078: val_loss did not improve from -0.72116
Epoch 79/400

Epoch 00079: val_loss did not improve from -0.72116
Epoch 80/400

Epoch 00080: val_loss did not improve from -0.72116
Epoch 81/400

Epoch 00081: val_loss improved from -0.72116 to -0.72611, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 82/400

Epoch 00082: val_loss did not improve from -0.72611
Epoch 83/400

Epoch 00083: val_loss did not improve from -0.72611
Epoch 84/400

Epoch 00084: val_loss improved from -0.72611 to -0.73332, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 85/400

Epoch 00085: val_loss did not improve from -0.73332
Epoch 86/400

Epoch 00086: val_loss did not improve from -0.73332
Epoch 87/400

Epoch 00087: val_loss


Epoch 00115: val_loss improved from -0.73690 to -0.73759, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 116/400

Epoch 00116: val_loss did not improve from -0.73759
Epoch 117/400

Epoch 00117: val_loss did not improve from -0.73759
Epoch 118/400

Epoch 00118: val_loss did not improve from -0.73759
Epoch 119/400

Epoch 00119: val_loss did not improve from -0.73759
Epoch 120/400

Epoch 00120: val_loss improved from -0.73759 to -0.74373, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 121/400

Epoch 00121: val_loss did not improve from -0.74373
Epoch 122/400

Epoch 00122: val_loss did not improve from -0.74373
Epoch 123/400

Epoch 00123: val_loss did not improve from -0.74373
Epoch 124/400

Epoch 00124: val_loss did not improve from -0.74373
Epoch 125/400

Epoch 00125: val_loss did not improve from -0.74373
Epoch 126/400

Epoch 00126: val_loss did not improve from -0.74373
Epoch 127/400

Epoch 001


Epoch 00153: val_loss did not improve from -0.75660
Epoch 154/400

Epoch 00154: val_loss did not improve from -0.75660
Epoch 155/400

Epoch 00155: val_loss did not improve from -0.75660
Epoch 156/400

Epoch 00156: val_loss did not improve from -0.75660
Epoch 157/400

Epoch 00157: val_loss did not improve from -0.75660
Epoch 158/400

Epoch 00158: val_loss did not improve from -0.75660
Epoch 159/400

Epoch 00159: val_loss did not improve from -0.75660
Epoch 160/400

Epoch 00160: val_loss did not improve from -0.75660
Epoch 161/400

Epoch 00161: val_loss improved from -0.75660 to -0.75917, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 162/400

Epoch 00162: val_loss did not improve from -0.75917
Epoch 163/400

Epoch 00163: val_loss did not improve from -0.75917
Epoch 164/400

Epoch 00164: val_loss did not improve from -0.75917
Epoch 165/400

Epoch 00165: val_loss did not improve from -0.75917
Epoch 166/400

Epoch 00166: val_loss did not impro


Epoch 00192: val_loss did not improve from -0.76256
Epoch 193/400

Epoch 00193: val_loss did not improve from -0.76256
Epoch 194/400

Epoch 00194: val_loss did not improve from -0.76256
Epoch 195/400

Epoch 00195: val_loss did not improve from -0.76256
Epoch 196/400

Epoch 00196: val_loss did not improve from -0.76256
Epoch 197/400

Epoch 00197: val_loss improved from -0.76256 to -0.76355, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 198/400

Epoch 00198: val_loss did not improve from -0.76355
Epoch 199/400

Epoch 00199: val_loss did not improve from -0.76355
Epoch 200/400

Epoch 00200: val_loss did not improve from -0.76355
Epoch 201/400

Epoch 00201: val_loss did not improve from -0.76355
Epoch 202/400

Epoch 00202: val_loss did not improve from -0.76355
Epoch 203/400

Epoch 00203: val_loss did not improve from -0.76355
Epoch 204/400

Epoch 00204: val_loss did not improve from -0.76355
Epoch 205/400

Epoch 00205: val_loss did not impro


Epoch 00232: val_loss did not improve from -0.76520
Epoch 233/400

Epoch 00233: val_loss did not improve from -0.76520
Epoch 234/400

Epoch 00234: val_loss did not improve from -0.76520
Epoch 235/400

Epoch 00235: val_loss did not improve from -0.76520
Epoch 236/400

Epoch 00236: val_loss improved from -0.76520 to -0.76695, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 237/400

Epoch 00237: val_loss did not improve from -0.76695
Epoch 238/400

Epoch 00238: val_loss did not improve from -0.76695
Epoch 239/400

Epoch 00239: val_loss did not improve from -0.76695
Epoch 240/400

Epoch 00240: val_loss did not improve from -0.76695
Epoch 241/400

Epoch 00241: val_loss did not improve from -0.76695
Epoch 242/400

Epoch 00242: val_loss did not improve from -0.76695
Epoch 243/400

Epoch 00243: val_loss did not improve from -0.76695
Epoch 244/400

Epoch 00244: val_loss did not improve from -0.76695
Epoch 245/400

Epoch 00245: val_loss improved from


Epoch 00271: val_loss did not improve from -0.76870
Epoch 272/400

Epoch 00272: val_loss did not improve from -0.76870
Epoch 273/400

Epoch 00273: val_loss did not improve from -0.76870
Epoch 274/400

Epoch 00274: val_loss did not improve from -0.76870
Epoch 275/400

Epoch 00275: val_loss did not improve from -0.76870
Epoch 276/400

Epoch 00276: val_loss did not improve from -0.76870
Epoch 277/400

Epoch 00277: val_loss did not improve from -0.76870
Epoch 278/400

Epoch 00278: val_loss improved from -0.76870 to -0.77275, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 279/400

Epoch 00279: val_loss did not improve from -0.77275
Epoch 280/400

Epoch 00280: val_loss did not improve from -0.77275
Epoch 281/400

Epoch 00281: val_loss did not improve from -0.77275
Epoch 282/400

Epoch 00282: val_loss did not improve from -0.77275
Epoch 283/400

Epoch 00283: val_loss did not improve from -0.77275
Epoch 284/400

Epoch 00284: val_loss did not impro


Epoch 00311: val_loss did not improve from -0.77275
Epoch 312/400

Epoch 00312: val_loss did not improve from -0.77275
Epoch 313/400

Epoch 00313: val_loss did not improve from -0.77275
Epoch 314/400

Epoch 00314: val_loss did not improve from -0.77275
Epoch 315/400

Epoch 00315: val_loss did not improve from -0.77275
Epoch 316/400

Epoch 00316: val_loss did not improve from -0.77275
Epoch 317/400

Epoch 00317: val_loss did not improve from -0.77275
Epoch 318/400

Epoch 00318: val_loss did not improve from -0.77275
Epoch 319/400

Epoch 00319: val_loss did not improve from -0.77275
Epoch 320/400

Epoch 00320: val_loss did not improve from -0.77275
Epoch 321/400

Epoch 00321: val_loss did not improve from -0.77275
Epoch 322/400

Epoch 00322: val_loss did not improve from -0.77275
Epoch 323/400

Epoch 00323: val_loss did not improve from -0.77275
Epoch 324/400

Epoch 00324: val_loss did not improve from -0.77275
Epoch 325/400

Epoch 00325: val_loss did not improve from -0.77275
Epoch 326


Epoch 00351: val_loss did not improve from -0.77275
Epoch 352/400

Epoch 00352: val_loss did not improve from -0.77275
Epoch 353/400

Epoch 00353: val_loss did not improve from -0.77275
Epoch 354/400

Epoch 00354: val_loss improved from -0.77275 to -0.77446, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 355/400

Epoch 00355: val_loss did not improve from -0.77446
Epoch 356/400

Epoch 00356: val_loss did not improve from -0.77446
Epoch 357/400

Epoch 00357: val_loss did not improve from -0.77446
Epoch 358/400

Epoch 00358: val_loss did not improve from -0.77446
Epoch 359/400

Epoch 00359: val_loss improved from -0.77446 to -0.77623, saving model to unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5
Epoch 360/400

Epoch 00360: val_loss did not improve from -0.77623
Epoch 361/400

Epoch 00361: val_loss did not improve from -0.77623
Epoch 362/400

Epoch 00362: val_loss did not improve from -0.77623
Epoch 363/400

Epoch 003


Epoch 00390: val_loss did not improve from -0.77963
Epoch 391/400

Epoch 00391: val_loss did not improve from -0.77963
Epoch 392/400

Epoch 00392: val_loss did not improve from -0.77963
Epoch 393/400

Epoch 00393: val_loss did not improve from -0.77963
Epoch 394/400

Epoch 00394: val_loss did not improve from -0.77963
Epoch 395/400

Epoch 00395: val_loss did not improve from -0.77963
Epoch 396/400

Epoch 00396: val_loss did not improve from -0.77963
Epoch 397/400

Epoch 00397: val_loss did not improve from -0.77963
Epoch 398/400

Epoch 00398: val_loss did not improve from -0.77963
Epoch 399/400

Epoch 00399: val_loss did not improve from -0.77963
Epoch 400/400

Epoch 00400: val_loss did not improve from -0.77963
dict_keys(['val_loss', 'val_dice_coef', 'loss', 'dice_coef'])


In [14]:
def maxHist(hist):
    maxArea = (0, 0, 0)
    height = []
    position = []
    for i in range(len(hist)):
        if (len(height) == 0):
            if (hist[i] > 0):
                height.append(hist[i])
                position.append(i)
        else:
            if (hist[i] > height[-1]):
                height.append(hist[i])
                position.append(i)
            elif (hist[i] < height[-1]):
                while (height[-1] > hist[i]):
                    maxHeight = height.pop()
                    area = maxHeight * (i - position[-1])
                    if (area > maxArea[0]):
                        maxArea = (area, position[-1], i)
                    last_position = position.pop()
                    if (len(height) == 0):
                        break
                position.append(last_position)
                if (len(height) == 0):
                    height.append(hist[i])
                elif (height[-1] < hist[i]):
                    height.append(hist[i])
                else:
                    position.pop()
    while (len(height) > 0):
        maxHeight = height.pop()
        last_position = position.pop()
        area = maxHeight * (len(hist) - last_position)
        if (area > maxArea[0]):
            maxArea = (area, len(hist), last_position)
    return maxArea


def maxRect(img):
    maxArea = (0, 0, 0)
    addMat = np.zeros(img.shape)
    for r in range(img.shape[0]):
        if r == 0:
            addMat[r] = img[r]
            area = maxHist(addMat[r])
            if area[0] > maxArea[0]:
                maxArea = area + (r,)
        else:
            addMat[r] = img[r] + addMat[r - 1]
            addMat[r][img[r] == 0] *= 0
            area = maxHist(addMat[r])
            if area[0] > maxArea[0]:
                maxArea = area + (r,)
    return (
        int(maxArea[3] + 1 - maxArea[0] / abs(maxArea[1] - maxArea[2])), maxArea[2], maxArea[3], maxArea[1], maxArea[0])


def cropCircle(img, resize=None):
    if resize:
        if (img.shape[0] > img.shape[1]):
            tile_size = (int(img.shape[1] * resize / img.shape[0]), resize)
        else:
            tile_size = (resize, int(img.shape[0] * resize / img.shape[1]))
        img = cv2.resize(img, dsize=tile_size, interpolation=cv2.INTER_CUBIC)
    else:
        tile_size = img.shape

    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY);
    _, thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)

    contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)

    main_contour = sorted(contours, key=cv2.contourArea, reverse=True)[0]

    ff = np.zeros((gray.shape[0], gray.shape[1]), 'uint8')
    cv2.drawContours(ff, main_contour, -1, 1, 15)
    ff_mask = np.zeros((gray.shape[0] + 2, gray.shape[1] + 2), 'uint8')
    cv2.floodFill(ff, ff_mask, (int(gray.shape[1] / 2), int(gray.shape[0] / 2)), 1)

    rect = maxRect(ff)
    rectangle = [min(rect[0], rect[2]), max(rect[0], rect[2]), min(rect[1], rect[3]), max(rect[1], rect[3])]
    img_crop = img[rectangle[0]:rectangle[1], rectangle[2]:rectangle[3]]
    cv2.rectangle(ff, (min(rect[1], rect[3]), min(rect[0], rect[2])), (max(rect[1], rect[3]), max(rect[0], rect[2])), 3,
                  2)

    return [img_crop, rectangle, tile_size]


if __name__ == '__main__':

    #### TRAIN SET

    # INPUT_FOLDER = ROOT_FOLDER + '/input/train'
    # CROPSET_FOLDER = ROOT_FOLDER + '/input/train_cropped'
    #
    # total_images = glob.glob(os.path.join(INPUT_FOLDER, FILE_PATTERN))
    # total = len(total_images)
    #
    # for clazz in ClassNames:
    #     OUTPUT_FOLDER = os.path.join(CROPSET_FOLDER, clazz)
    #     if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER)
    #
    #     total_images = glob.glob(os.path.join(INPUT_FOLDER, clazz, FILE_PATTERN))
    #     total = len(total_images)
    #     for i, input_filename in enumerate(total_images):
    #         img = cv2.imread(input_filename)
    #
    #         img_crop, rectangle, tile_size = cropCircle(img, resize=None)
    #
    #         basename = ntpath.basename(input_filename)
    #         output_filename = os.path.join(OUTPUT_FOLDER, basename)
    #         cv2.imwrite(output_filename, img_crop)
    #
    #         if i % 20 == 0:
    #             print("Cropped {}/{} images".format(i, total))
    #

    INPUT_FOLDER = ROOT_FOLDER + '/input/test'
    CROPSET_FOLDER = ROOT_FOLDER + '/input/test_cropped'

    total_images = glob.glob(os.path.join(INPUT_FOLDER, FILE_PATTERN))
    total = len(total_images)

    OUTPUT_FOLDER = CROPSET_FOLDER
    if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER)

    total_images = glob.glob(os.path.join(INPUT_FOLDER, FILE_PATTERN))
    total = len(total_images)
    for i, input_filename in enumerate(total_images):
        img = cv2.imread(input_filename)

        img_crop, rectangle, tile_size = cropCircle(img, resize=None)

        basename = ntpath.basename(input_filename)
        output_filename = os.path.join(OUTPUT_FOLDER, basename)
        cv2.imwrite(output_filename, img_crop)

        if i % 20 == 0:
            print("Cropped {}/{} images".format(i, total))

Cropped 0/512 images
Cropped 20/512 images
Cropped 40/512 images
Cropped 60/512 images
Cropped 80/512 images
Cropped 100/512 images
Cropped 120/512 images
Cropped 140/512 images
Cropped 160/512 images
Cropped 180/512 images
Cropped 200/512 images
Cropped 220/512 images
Cropped 240/512 images
Cropped 260/512 images
Cropped 280/512 images
Cropped 300/512 images
Cropped 320/512 images
Cropped 340/512 images
Cropped 360/512 images
Cropped 380/512 images
Cropped 400/512 images
Cropped 420/512 images
Cropped 440/512 images
Cropped 460/512 images
Cropped 480/512 images
Cropped 500/512 images


In [15]:
import ntpath

import cv2
import numpy as np

def preprocessing(img):
    return img * rescale


def inverse_preprocessing(img):
    return img / rescale


def to_binary_mask(mask, t=0.00001):
    mask = inverse_preprocessing(mask)

    ### Threshold the RGB image  - This step increase sensitivity
    mask[mask > t] = 255
    mask[mask <= t] = 0

    ### To grayscale and normalize
    mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    mask_gray = cv2.normalize(src=mask_gray, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)

    ### Auto binary threshold
    (thresh, mask_binary) = cv2.threshold(mask_gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    return mask_binary


def find_bbox(mask_binary, margin_factor=None):
    ret, thresh = cv2.threshold(mask_binary, 127, 255, 0)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    # Find the index of the largest contour
    areas = [cv2.contourArea(c) for c in contours]
    if len(areas) == 0:
        return (0, 0, mask_binary.shape[0], mask_binary.shape[1], False)
    else:
        max_index = np.argmax(areas)
        cnt = contours[max_index]

        x, y, w, h = cv2.boundingRect(cnt)

        if margin_factor != None and margin_factor > 0:
            wm = w * margin_factor
            hm = h * margin_factor
            x -= wm
            y -= hm
            w += 2 * wm
            h += 2 * hm
            x = max(0, x)
            y = max(0, y)
            X = min(x + w, mask_binary.shape[1])
            Y = min(y + h, mask_binary.shape[0])
            w = X - x
            h = Y - y
        return (int(x), int(y), int(w), int(h), True)


def transform_bbox(bbox, from_dim, to_dim):
    H0, W0 = from_dim
    H1, W1 = to_dim
    x, y, w, h = bbox
    w_factor = 1. * W1 / W0
    h_factor = 1. * H1 / H0
    return max(0, int(math.floor(x * w_factor))), \
           max(0, int(math.floor(y * h_factor))), \
           int(math.floor(w * w_factor)), \
           int(math.floor(h * h_factor))


def predict_and_crop(model, original_folder, resized_folder, output_folder, margin_factor):
    generate_previews = False #Set to True if you want to see the overlay of bbox on original image
    generate_crops = True
    generate_masks = False

    if not os.path.exists(output_folder): os.makedirs(output_folder)

    # Test images
    print('Input folder: {}'.format(resized_folder))
    test_image_files = np.sort(glob.glob(os.path.join(resized_folder, '*.png')))
    total = len(test_image_files)
    for i, filename in enumerate(test_image_files):
        if i > 0 and i % 50 == 0:
            print('Processed {}/{} files ...'.format(i, total))

        basename = ntpath.basename(filename)
        img1 = cv2.resize(cv2.imread(filename), dsize=(img_height, img_width))
        img = preprocessing(img1)
        img = img[None,]  # Add dimension

        predict = model.predict(img, batch_size=1, verbose=0)

        # extract binary mask
        binary_mask = to_binary_mask(predict[0])
        morphed_mask = morphology_clean(binary_mask)
        x, y, w, h, success = find_bbox(morphed_mask, margin_factor)

        original_img_file = os.path.join(original_folder, basename.replace('.png', '.jpg'))
        original = cv2.imread(original_img_file)
        if original is None:
            raise AssertionError("Cannot read the original image:{}".format(original_img_file))

        # transform bbox back to original dimension
        x1, y1, w1, h1 = transform_bbox(bbox=(x, y, w, h), from_dim=morphed_mask.shape, to_dim=original.shape[0:2])

        if generate_crops:
            cropped = original[y1:y1 + h1, x1:x1 + w1, :]
            cropped_filename = os.path.join(output_folder, basename.replace('.png', OUTPUT_FILE_EXT))
            if cropped.mean() <= 15 or not success: # a black crop or fail to find bounding box
                img_crop, rectangle, tile_size = cropCircle(original, resize=None)
                cv2.imwrite(cropped_filename, img_crop)
            else:
                cv2.imwrite(cropped_filename, cropped)

        # For debug & preview
        if generate_masks:
            cv2.imwrite(os.path.join(output_folder, basename.replace('.png', '_mask.png')), morphed_mask)

        if generate_previews:
            # Highlight the mask in original
            img_highlighted = original.copy()
            original_mask = cv2.resize(morphed_mask, dsize=(original.shape[1], original.shape[0]),
                                       interpolation=cv2.INTER_NEAREST)
            blue_channel = img_highlighted[:, :, 0]
            blue_channel[original_mask > 0] = 255
            cv2.rectangle(img_highlighted, (x1, y1), (x1 + w1, y1 + h1), (0, 255, 0), 3)
            preview_filename = os.path.join(output_folder, basename.replace('.png', '_preview.jpg'))
            cv2.imwrite(preview_filename, img_highlighted)


if __name__ == '__main__':
    weight_file = os.path.join(info, 'weights.h5')
    model = load_model(img_height, img_width, nb_channels, learning_rate, weight_file)

    # predict the ROI of test images
    predict_and_crop(model, TESTSET_INPUT_FOLDER, TESTSET_RESIZED_FOLDER, TESTSET_OUTPUT_FOLDER, margin)

    # predict the ROI of train images
    for c in ClassNames:
        ORIGINAL_FOLDER = os.path.join(TRAINSET_INPUT_FOLDER, c)
        INPUT_FOLDER = os.path.join(TRAINSET_RESIZED_FOLDER, c)
        OUTPUT_FOLDER = os.path.join(TRAINSET_OUTPUT_FOLDER, c)
        predict_and_crop(model, ORIGINAL_FOLDER, INPUT_FOLDER, OUTPUT_FOLDER, margin)

    if os.path.exists(ADDSET_INPUT_FOLDER):
        # predict the ROI of additional images
        for c in ClassNames:
            ORIGINAL_FOLDER = os.path.join(ADDSET_INPUT_FOLDER, c)
            INPUT_FOLDER = os.path.join(ADDSET_RESIZED_FOLDER, c)
            OUTPUT_FOLDER = os.path.join(ADDSET_OUTPUT_FOLDER, c)
            predict_and_crop(model, ORIGINAL_FOLDER, INPUT_FOLDER, OUTPUT_FOLDER, margin)

Loading unet_128x128x3_sp0.8_sh0.78_zm0.4_rt180_vf1_hf1_ws0.3_hs0.3/weights.h5 ...
K.image_dim_ordering=tf Channel axis=3
Input folder: /Users/keerat/Documents/Research//input/test_resized
Processed 50/512 files ...
Processed 100/512 files ...
Processed 150/512 files ...
Processed 200/512 files ...
Processed 250/512 files ...
Processed 300/512 files ...
Processed 350/512 files ...
Processed 400/512 files ...
Processed 450/512 files ...
Processed 500/512 files ...
Input folder: /Users/keerat/Documents/Research//input/train_resized/Type_1
Processed 50/249 files ...
Processed 100/249 files ...
Processed 150/249 files ...
Processed 200/249 files ...
Input folder: /Users/keerat/Documents/Research//input/train_resized/Type_2
Processed 50/772 files ...
Processed 100/772 files ...
Processed 150/772 files ...
Processed 200/772 files ...
Processed 250/772 files ...
Processed 300/772 files ...
Processed 350/772 files ...
Processed 400/772 files ...
Processed 450/772 files ...
Processed 500/772 fi