In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import random
import shutil

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sb

from tqdm import tqdm

import skimage.io
import skimage.segmentation
import skimage.morphology

import sys
__file__ = 'full_experiment.ipynb'
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import utils.dirtools  # utils package should has __init__.py in it
import utils.augmentation
import utils.model_builder
import utils.data_provider
import utils.metrics
import utils.objectives
import utils.evaluation

import keras.backend
import keras.callbacks
import keras.layers
import keras.models
import keras.optimizers
import tensorflow as tf

from config import config_vars

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

def empty_dir(folder):
    print('empty directory: ', folder)
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

os.environ["CUDA_VISIBLE_DEVICES"] = "3"  ### 

# build session running on GPU 1
configuration = tf.ConfigProto()
configuration.gpu_options.allow_growth = True
# configuration.gpu_options.visible_device_list = "0, 1"
session = tf.Session(config = configuration)

# apply session
keras.backend.set_session(session)

In [None]:
# setup global variables

config_vars["root_directory"] = 'DATA/FISH/'
experiment_name = '26'

config_vars = utils.dirtools.setup_working_directories(config_vars)
config_vars = utils.dirtools.setup_experiment(config_vars, experiment_name)
os.makedirs(config_vars["normalized_images_dir"], exist_ok=True)
os.makedirs(config_vars["boundary_labels_dir"], exist_ok=True)

config_vars["no_boundary_labels_dir"] = 'DATA/FISH/no_boundary_labels/'
os.makedirs(config_vars["no_boundary_labels_dir"], exist_ok=True)

In [None]:
config_vars

### TRAIN

#### Set Up Data Split
do this everytime before training or prediction

In [None]:
"""For Lineage_Tracking Datasets
save train/val/test image pathnames into txt files

'path_files_training': 'DATA/LinearTracking/training.txt',
'path_files_validation': 'DATA/LinearTracking/validation.txt',
'path_files_test': 'DATA/LinearTracking/test.txt',

folder 001 - 095 total 1585 images, 16.68 for one folder
setup dirs: `normalized` and `boundary label`
img list should contain file names like 001/0000.tif

"""  

fd_list = sorted(os.listdir('DATA/LineageTracking/raw_images/'))
# makedirs for 001, 002, ...
for f in fd_list:
    os.makedirs(config_vars["normalized_images_dir"] + f, exist_ok=True)
    os.makedirs(config_vars["boundary_labels_dir"] + f, exist_ok=True)   

"""split train, valid, test (image name list)

"""
train_fd_list = fd_list[:60]
valid_fd_list = fd_list[60:]

list_train = []
for f in train_fd_list:
    tmp_list = os.listdir('DATA/LineageTracking/boundary_labels/' + f)
    tmp_list = [x for x in tmp_list if x.endswith('png')]
    for e in sorted(tmp_list):
        list_train.append(f + '/' + e)
        
list_valid = []
for f in valid_fd_list:
    tmp_list = os.listdir('DATA/LineageTracking/boundary_labels/' + f)
    tmp_list = [x for x in tmp_list if x.endswith('png')]
    for e in sorted(tmp_list):
        list_valid.append(f + '/' + e)
        
list_test = []


utils.dirtools.write_path_files(config_vars["path_files_training"], list_train)
utils.dirtools.write_path_files(config_vars["path_files_validation"], list_valid)
utils.dirtools.write_path_files(config_vars["path_files_test"], list_test)

---------------------

In [None]:
"""For FISH Datasets
save train/val/test image pathnames into txt files

"""

# set up train-valid split EVERY-TIME
def create_image_lists(dir_raw_images):
    file_list = os.listdir(dir_raw_images)
    image_list = [x for x in file_list if x.endswith("png")]
    image_list = sorted(image_list)

    image_list_train_aug = []
    image_list_test = []
#     image_list_train = []
#     image_list_validation = image_list
    
    image_list_validation = image_list[:48]
    image_list_2 = image_list[48:]
    random.shuffle(image_list_2)
    image_list_train = image_list_2
    return image_list_train, image_list_test, image_list_validation, image_list_train_aug

[list_training, list_test, list_validation, list_training_aug] = create_image_lists(
    config_vars["normalized_images_dir"],
)

# write list into txt file
utils.dirtools.write_path_files(config_vars["path_files_training"], list_training)
utils.dirtools.write_path_files(config_vars["path_files_validation"], list_validation)
utils.dirtools.write_path_files(config_vars["path_files_test"], list_test)

##### data generator

In [None]:
"""read split filenames txt files into data_partitions dictionary"""

data_partitions = utils.dirtools.read_data_partitions(config_vars, load_augmented=False)

In [None]:
# setup data-generator
train_gen = utils.data_provider.random_sample_generator(
    config_vars["normalized_images_dir"],
    config_vars["boundary_labels_dir"],  ### boundary_labels_dir no_boundary_labels_dir
    data_partitions["training"],
    config_vars["batch_size"],
    config_vars["pixel_depth"],
    config_vars["crop_size"],
    config_vars["crop_size"],
    config_vars["rescale_labels"]
)

val_gen = utils.data_provider.single_data_from_images(
     config_vars["normalized_images_dir"],
     config_vars["boundary_labels_dir"],  ### boundary_labels_dir no_boundary_labels_dir
     data_partitions["validation"],
     config_vars["val_batch_size"],
     config_vars["pixel_depth"],
     config_vars["crop_size"],
     config_vars["crop_size"],
     config_vars["rescale_labels"]
)

#### Traininig Model

In [None]:
from keras import metrics

# delete boundary, one binary channel, output_channel=1, activation="sigmoid"
# with boundary, three binary channels, output_channel=3, activation=None
model = utils.model_builder.get_model(config_vars["crop_size"], config_vars["crop_size"], 
                                      output_channel=3, activation=None) 

# loss = "binary_crossentropy"
loss = utils.objectives.weighted_crossentropy

my_metrics = [
           keras.metrics.categorical_accuracy, 
           utils.metrics.channel_recall(channel=0, name="background_recall"), 
           utils.metrics.channel_precision(channel=0, name="background_precision"),
           utils.metrics.channel_recall(channel=1, name="interior_recall"), 
           utils.metrics.channel_precision(channel=1, name="interior_precision"),
           utils.metrics.channel_recall(channel=2, name="boundary_recall"), 
           utils.metrics.channel_precision(channel=2, name="boundary_precision"),
          ]

optimizer = keras.optimizers.RMSprop(lr=config_vars["learning_rate"])

###
# model.compile(loss=loss, metrics=[metrics.binary_accuracy], optimizer=optimizer)
model.compile(loss=loss, metrics=my_metrics, optimizer=optimizer)


In [None]:
# Keras Callbacks
log_folder = 'logs/'

csv = keras.callbacks.CSVLogger(filename=config_vars["csv_log_file"])  # append

tboard = keras.callbacks.TensorBoard(log_dir=log_folder + experiment_name, 
                                      histogram_freq=0, 
                                      batch_size=16, 
                                      write_graph=True, 
                                      write_grads=False, write_images=True,
                                      update_freq='epoch')


weights_filename1 = log_folder + experiment_name + '/model-{epoch:02d}-{val_loss:.2f}.h5'
modelckp1 = keras.callbacks.ModelCheckpoint(weights_filename1, verbose=1, save_weights_only=True,
                                           monitor='val_loss', period=1, save_best_only=True)
weights_filename2 = log_folder + experiment_name + '/model-{epoch:02d}-{loss:.2f}.h5'
modelckp2 = keras.callbacks.ModelCheckpoint(weights_filename2, verbose=1, save_weights_only=True,
                                           monitor='loss', period=1, save_best_only=True)

reducelr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, 
                                             verbose=1, mode='min', min_lr=1e-7, 
                                             cooldown=10, min_delta=1e-4)

earlystop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=15, 
                              verbose=1, mode='auto', baseline=None, restore_best_weights=False)

callbacks = [csv, tboard, modelckp1, modelckp2, reducelr, earlystop]

In [None]:
# TRAIN
statistics = model.fit_generator(
    generator=train_gen,
    steps_per_epoch=config_vars["steps_per_epoch"],  # 500 config_vars["steps_per_epoch"]
    epochs=config_vars["epochs"],
    validation_data=val_gen,
    validation_steps= int(config_vars["steps_per_epoch"]/6),  # must bigger than val_batch_size
    callbacks=callbacks,
    verbose = 1
)
print('Done! :)')

# save one weight at the end of the training
model.save_weights(config_vars["model_file"])

### PREDICT

In [None]:
image_names = [os.path.join(config_vars["normalized_images_dir"], f) for f in data_partitions["validation"]]
imagebuffer = skimage.io.imread_collection(image_names)
images = imagebuffer.concatenate()

dim1, dim2 = images.shape[1], images.shape[2]
images = images.reshape((-1, dim1, dim2, 1))
# preprocess (assuming images are encoded as 8-bits in the preprocessing step)
images = images / 255

### build model and load weights
# model = utils.model_builder.get_model(dim1, dim2, output_channel=1, activation="sigmoid")
model = utils.model_builder.get_model(dim1, dim2, output_channel=3, activation=None)

model.load_weights(config_vars["model_file"])
# model.load_weights('logs/15/model-01-0.19.h5')

predictions = model.predict(images, batch_size=1)

"""prepare gt annot & bd image names

rl: abrev. for raw label
bl: abrev. for boundary label
"""
rl_names = [os.path.join(config_vars["raw_annotations_dir"], f) for f in data_partitions["validation"]]
rl_buffer = skimage.io.imread_collection(rl_names) 
bl_names = [os.path.join(config_vars["boundary_labels_dir"], f) for f in data_partitions["validation"]]
bl_buffer = skimage.io.imread_collection(bl_names) 

In [None]:
empty_dir(config_vars["probmap_out_dir"])
empty_dir(config_vars["labels_out_dir"])

In [None]:
for i in range(len(images)):
    filename = imagebuffer.files[i]
    imgname = os.path.basename(filename)
    
    original_image = skimage.io.imread(filename)
    rl_image = skimage.io.imread(rl_buffer.files[i])
    bl_image = skimage.io.imread(bl_buffer.files[i])
    
    probmap = predictions[i].squeeze()
    os.makedirs(config_vars["probmap_out_dir"], exist_ok=True)
    skimage.io.imsave(config_vars["probmap_out_dir"] + imgname, probmap.astype('uint8'))
    
    pred = utils.metrics.probmap_to_pred(probmap, config_vars["boundary_boost_factor"])
    label = utils.metrics.pred_to_label(pred, config_vars["cell_min_size"])
    
    os.makedirs(config_vars["labels_out_dir"], exist_ok=True)
    skimage.io.imsave(config_vars["labels_out_dir"] + imgname, label.astype('uint8'))
    
    if (i < 15):
        f, ax = plt.subplots(2,3,figsize=(18,12))
        ax[0][0].imshow(original_image)
        ax[0][0].title.set_text('original image')
        ax[0][1].imshow(bl_image)
        ax[0][1].title.set_text('ground truth boundary')
        ax[0][2].imshow(rl_image)
        ax[0][2].title.set_text('ground truth label')
        ax[1][0].imshow(pred)
        ax[1][0].title.set_text('predict boundary')
        ax[1][1].imshow(probmap)
        ax[1][1].title.set_text('predict boundary probmap')
        ax[1][2].imshow(label)
        ax[1][2].title.set_text('predict label')
        for a in ax:
            for a_ in a:
                a_.set_xticks([])
                a_.set_yticks([])
        plt.show()
        
#     if (i == 1):
#         break


In [None]:
"""For Lineage Tracking data with second order directories

"""

for i in range(len(images)):
    filename = imagebuffer.files[i]
    # imgname = os.path.basename(filename)
    filename_split = filename.split('/')
    imgname = '/' + filename_split[-2] + '/' + filename_split[-1]
    
    original_image = skimage.io.imread(filename)
    rl_image = skimage.io.imread(rl_buffer.files[i])
    bl_image = skimage.io.imread(bl_buffer.files[i])
    
    probmap = predictions[i].squeeze()
    os.makedirs(os.path.join(config_vars["probmap_out_dir"], filename_split[-2]), exist_ok=True)
    skimage.io.imsave(config_vars["probmap_out_dir"] + imgname, probmap.astype('uint8'))
    
    pred = utils.metrics.probmap_to_pred(probmap, config_vars["boundary_boost_factor"])
    label = utils.metrics.pred_to_label(pred, config_vars["cell_min_size"])
    os.makedirs(os.path.join(config_vars["labels_out_dir"], filename_split[-2]), exist_ok=True)
    skimage.io.imsave(config_vars["labels_out_dir"] + imgname, label.astype('uint8'))
    
    if (i < 15):
        f, ax = plt.subplots(2,3,figsize=(12,8))
        ax[0][0].imshow(original_image)
        ax[0][0].title.set_text('original image')
        ax[0][1].imshow(bl_image)
        ax[0][1].title.set_text('ground truth boundary')
        ax[0][2].imshow(rl_image)
        ax[0][2].title.set_text('ground truth label')
        ax[1][0].imshow(pred)
        ax[1][0].title.set_text('predict boundary')
        ax[1][1].imshow(probmap)
        ax[1][1].title.set_text('predict boundary probmap')
        ax[1][2].imshow(label)
        ax[1][2].title.set_text('predict label')
        for a in ax:
            for a_ in a:
                a_.set_xticks([])
                a_.set_yticks([])
        plt.show()
        
#     if (i == 15):
#         break
