In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers.experimental import preprocessing
#from cnnArchitectures.Xception import get_xception_model
from sklearn.model_selection import StratifiedKFold
import os
import datetime

%matplotlib inline
%reload_ext tensorboard

In [None]:
# Data paths
# E.g. D:\DATASETS\ALL_2\training\fold_0\all
dataset_path = os.path.abspath('../input/leukemia-classification/C-NMC_Leukemia')
train_data_path = dataset_path + '/training_data'
train_data_paths = [
    dataset_path + '/training_data/fold_0/',
    dataset_path + '/training_data/fold_1/',
    dataset_path + '/training_data/fold_2/'
]
test_data_path = os.path.abspath('../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data')
image_format = ".bmp"

data_paths = list(map(os.path.abspath, train_data_paths))

In [None]:
def get_pathes_labels(path=None):
    # Create list of picture pathes and labels
    if path == None:
        dir_iter = os.walk(train_data_path) # If we want all training data
    else:
        dir_iter = os.walk(path)
    image_pathes = []
    labels = []
    for dir in dir_iter:
        if "all" in dir[0]:
            for img_name in dir[2]:
                image_pathes.append(dir[0] + "/" + img_name)
                labels.append(0)
        elif "hem" in dir[0]:
            for img_name in dir[2]:
                image_pathes.append(dir[0] + "/" + img_name)
                labels.append(1)
    image_pathes, labels = np.array(image_pathes), np.array(labels)
    return [image_pathes, labels]

In [None]:
def augment(image):
    max_gamma_delta = 0.1
    seed = 3746
    image = tf.image.random_brightness(image, max_delta=max_gamma_delta, seed=None)
    image = tf.image.random_flip_up_down(image, seed=None)
    image = tf.image.random_flip_left_right(image, seed=None)
    #image = tf.image.random_saturation()
    return image
aug_model = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    tf.keras.layers.experimental.preprocessing.RandomContrast(0.1)
])

In [None]:
def preprocess(image):
    result = tf.image.resize(image, (256, 256))
    result = tf.image.per_image_standardization(result)
    return result

In [None]:
def load_image(path):
    image = tf.io.decode_bmp(tf.io.read_file(path), channels=3)
    return image

In [None]:
def get_ds(filenames, labels, batch_size, pref_buf_size):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    label_ds, image_pathes = tf.data.Dataset.from_tensor_slices(labels), tf.data.Dataset.from_tensor_slices(filenames)
    images_ds = image_pathes.map(load_image, AUTOTUNE).map(preprocess, AUTOTUNE)
    ds = tf.data.Dataset.zip((images_ds, label_ds)).batch(batch_size).prefetch(pref_buf_size)
    return ds

In [None]:
# Return pair of (X_train, y_train), (X_test, y_test)
def get_stratified_datasets(X, Y):
    # Create Stratified object
    skf = StratifiedKFold(n_splits=4, shuffle=True)
    skf.get_n_splits(X, Y)
    for train_index, test_index in skf.split(X, Y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        p = np.random.permutation(len(X_train))
        X_train, y_train = X_train[p], y_train[p]
        yield [[X_train, y_train], [X_test, y_test]]

In [None]:
# Set up dataset parameters
BATCH_SIZE = 16
IMAGE_SIZE = (256, 256)
SEED = 322
PREFETCH_BUFFER_SIZE = 400
SHUFFLE_BUFFER_SIZE = 1000
CACHE_DIR = "caches/ds_cache"
ds_params = dict(
    labels="inferred",
    label_mode="categorical",
    class_names=["all", "hem"],
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    seed=SEED
)

In [None]:
def test_model(model, callbacks=None):
    #../input/leukemia-classification/C-NMC_Leukemia/validation_data/C-NMC_test_prelim_phase_data_labels.csv
    test_dir = dataset_path + "/validation_data"
    test_data_csv = pd.read_csv(
        test_dir + "/C-NMC_test_prelim_phase_data_labels.csv"
    )
    #print(test_data_csv.head())
    #labels = np.array(test_data_csv["labels"].to_list())
    #inverted_labels = test_data_csv[["new_names", "labels"]].sort_values("new_names")["labels"].to_list()
    #labels = np.array([1 - label for label in inverted_labels])
    test_data_dir = test_dir + "/C-NMC_test_prelim_phase_data"
    dir_list = list(os.walk(test_data_dir))[0]
    filenames = sorted([test_data_dir + "/" + name for name in dir_list[2]])
    get_label_by_name = lambda x: test_data_csv.loc[test_data_csv['new_names'] == x]["labels"].to_list()[0]
    labels = [1 - get_label_by_name(name) for name in dir_list[2]]
    #print(filenames)
    #print(test_data_csv[["new_names", "labels"]])
    test_ds = get_ds(filenames, labels, BATCH_SIZE, PREFETCH_BUFFER_SIZE)

    if callbacks == None:
        model.evaluate(test_ds)
    else:
        model.evaluate(test_ds, callbacks=callbacks)

In [None]:
def get_cnn_model_1(input_shape):

    kernel_initializer = 'lecun_uniform'
    bias_initializer = 'lecun_uniform'
    kernel_regularizer = None
    activation = "selu"

    model = tf.keras.Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=input_shape, 
                     data_format="channels_last", kernel_initializer=kernel_initializer, 
                     bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer
                    ))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), data_format="channels_last", kernel_initializer=kernel_initializer, 
                     bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer
                    ))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=(3, 3)))

    model.add(Conv2D(128, (3, 3), data_format="channels_last", kernel_initializer=kernel_initializer, 
                     bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer
                    ))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=(3, 3)))

    model.add(Conv2D(256, (3, 3), data_format="channels_last", kernel_initializer=kernel_initializer, 
                     bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer
                    ))
    model.add(Activation(activation))
    model.add(Dropout(0.6))

    # adding fully connected layers
    model.add(Flatten())
    model.add(Dense(512, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))
    model.add(Activation(activation))
    model.add(Dropout(0.8))
    model.add(Dense(256, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))
    model.add(Activation(activation))
    model.add(Dropout(0.8))
    model.add(Dense(128, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))
    model.add(Activation(activation))
    model.add(Dropout(0.7))
    model.add(Dense(64, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    return model

In [None]:
model = get_cnn_model_1(IMAGE_SIZE + (3,))
model.summary()

In [None]:
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.0001, amsgrad=True)
metrics = ["accuracy", tf.keras.metrics.Precision(name="precision")]
model.compile(
    optimizer=adam_opt,
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=metrics
)

In [None]:
ERAS = 50
EPOCHS = 1
#update_freq = 250

In [None]:
model_checkpoint_callback1 = tf.keras.callbacks.ModelCheckpoint(
    filepath="./prec_checkpoints/",
    save_weights_only=True,
    monitor='val_precision',
    mode='max',
    save_best_only=True)
model_checkpoint_callback2 = tf.keras.callbacks.ModelCheckpoint(
    filepath="./",
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)
#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, update_freq = update_freq,
#                                                     profile_batch = '500,520')

In [None]:
images, labels = get_pathes_labels()

In [None]:

for era in range(ERAS):
    print("Era ", era)
    data_gen = get_stratified_datasets(images, labels)
    while True:
        try:
            train_data, valid_data = next(data_gen)
            train_ds = get_ds(*train_data, BATCH_SIZE, PREFETCH_BUFFER_SIZE)
            train_ds = train_ds.map(lambda x,y: [augment(x), y], tf.data.experimental.AUTOTUNE)
            valid_ds = get_ds(*valid_data, BATCH_SIZE, PREFETCH_BUFFER_SIZE)
            model.fit(
                train_ds, validation_data=valid_ds, epochs=EPOCHS, 
                batch_size=BATCH_SIZE, callbacks=[model_checkpoint_callback1])
        except StopIteration:
            break
    print("Model test")
    test_model(model, [model_checkpoint_callback1, model_checkpoint_callback2])
    model.save_weights("./w.h5")

In [None]:
test_model(model, [model_checkpoint_callback1, model_checkpoint_callback2])

In [None]:
model.save_weights("./w.h5")