# Available digest modules:
tif2tensor_utils, cif2tensor_utils
cif2png_utils, tif2png_utils, montage2png_utils, 

In [None]:
import cif2tensor_utils

In [1]:
import keras
import numpy
import matplotlib.pyplot
import os.path
import pandas
import seaborn
import sklearn.metrics
import keras.applications
import keras.preprocessing.image
import tensorflow
import random

Using TensorFlow backend.


# User's settings

In [10]:
directory = "/data1/Minh/IFC/DeepLearning/Allchannels_digested_except113_190_207_209"

data = {
    "normal": '/data1/Minh/IFC/DeepLearning/cif/normal/',
    
  "leukemic": '/data1/Minh/IFC/DeepLearning/cif/abnormal/'
}

# Warning: Neural networks often require a combination of 1 or 3 or 4 channels.
# Users should specify ALL desired channels here. In downstream modules, users can duplicate channels if needed.
channels = [0,1,2,3,5,6,10,11]

image_size = 32

split = {
    "Training" : 0.8,
    "Validation" : 0.1,
    "Testing" : 0.1
}

classes = len(data.keys())

# Save digested data to tensor:

In [None]:
cif2tensor_utils.parse(directory, data, channels, image_size, split)
class_weights = numpy.load(os.path.join(directory, "class_weights.npy"))

Adjust class weights if needed

In [None]:
# for k in class_weights.keys():
#     class_weights[k] = 1/class_weights[k]
# print(class_weights)

In [2]:
# Heavy weights
class_weights = {0: 1.0300000712336965, 1: 5.333254184969725}

# Optional: Manual flip rotate a selected class

In [None]:
training_x = numpy.load(os.path.join(directory, "training_x.npy"))

training_y = numpy.load(os.path.join(directory, "training_y.npy"))

In [None]:
# Count each class:

from collections import Counter
tuple_L = [tuple(element) for element in training_y]

freq = Counter(tuple_L)

import collections

size = []
for l in list(set(tuple_L)):
    print( '%s : %d' % (l, freq[l]))
    size.append(freq[l])

In [None]:
# TODO: find max,min size[], order alphabets

training_normal_x = training_x[224612:,...]
training_leukemic_x = training_x[:224612,...]

In [None]:
import skimage.io
import skimage.transform
import numpy
skimage.io.use_plugin('freeimage')

In [None]:
training_normal_augmented_x = []

for i in range(training_normal_x.shape[0]):

    for ii in range(4):
        
        angle = 90*ii
                
        nested_filmstrip_rotate = []
        nested_filmstrip_flip = []
        for j in range(training_normal_x.shape[-1]):

            im = training_normal_x[i,:,:,j]

            rotated = skimage.transform.rotate(im, angle)
                
            nested_filmstrip_rotate.append( rotated )
            
            flipped = numpy.flipud(rotated)
            
            nested_filmstrip_flip.append( flipped )

        training_normal_augmented_x.append(
            numpy.expand_dims(
                numpy.array(nested_filmstrip_rotate).swapaxes(0,1).swapaxes(1,2) ,
                axis = 0
            )
        )
        training_normal_augmented_x.append( 
            numpy.expand_dims(
                numpy.array(nested_filmstrip_flip).swapaxes(0,1).swapaxes(1,2) , 
                axis = 0  
            )
        )
        
training_normal_augmented_x = numpy.concatenate( (training_normal_augmented_x) )
print(training_normal_augmented_x.shape)

In [None]:
training_normal_augmented_y = numpy.full( (training_normal_augmented_x.shape[0], 2) , training_y[-1,:])
training_leukemic_y = training_y[:size[1],:]

In [None]:
training_xx = numpy.concatenate( (training_leukemic_x, training_normal_augmented_x) )
training_yy = numpy.concatenate( (training_leukemic_y, training_normal_augmented_y) )

In [None]:
numpy.save(os.path.join(directory, "augmented_training_x.npy"), training_xx)
numpy.save(os.path.join(directory, "augmented_training_y.npy"), training_yy)

# Load data and labels:

In [None]:
# Use this function to normalize signal intensities across images
def min_max_norm(x, minimum=None, maximum=None):
    if minimum is None:
        minimum = x.min()
    if maximum is None:
        maximum = x.max()
    result = 100.0*( (numpy.ndarray.astype(x, numpy.float32) - minimum)/(maximum - minimum) )
    return (result, minimum, maximum)

In [3]:
def data_generator(input_x, input_y, batch_size):
    def generator():
        while True:
            indices = sorted( random.sample(range(input_x.shape[0]), batch_size) )
            x_sample = input_x[indices, ...]
            y_sample = input_y[indices, ...]
            yield (x_sample, y_sample)
    return generator()

In [4]:
directory = "/data1/Minh/IFC/DeepLearning/Allchannels_digested_except113_190_207_209"

training_x = numpy.load(os.path.join(directory, "augmented_training_x.npy"))

training_y = numpy.load(os.path.join(directory, "augmented_training_y.npy"))

# Use this function to normalize signal intensities across images
# training_xx, pix_min, pix_max = min_max_norm(training_xx)

training_generator = data_generator(training_x, training_y, 32) 

In [5]:
validation_x = numpy.load(os.path.join(directory, "validation_x.npy"))

validation_y = numpy.load(os.path.join(directory, "validation_y.npy"))

# Use this function to normalize signal intensities across images
# validation_x, pix_min, pix_max = min_max_norm(validation_x, pix_min, pix_max)

validation_generator = data_generator(validation_x, validation_y, 32)

In [6]:
testing_x = numpy.load(os.path.join(directory, "testing_x.npy"))

testing_y = numpy.load(os.path.join(directory, "testing_y.npy"))

# Use this function to normalize signal intensities across images
# testing_x, pix_min, pix_max = min_max_norm(testing_x, pix_min, pix_max)

test_generator = data_generator(testing_x, testing_y, 32)

# Construct convolutional neural network:

In [7]:
shape = (training_x.shape[1], training_x.shape[2], training_x.shape[3])

x = keras.layers.Input(shape)

In [11]:
options = {"activation": "relu", "kernel_size": (3, 3), "padding": "same"}

# Block 1:

y = keras.layers.Conv2D(32, **options)(x)
y = keras.layers.Conv2D(32, **options)(y)

y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 2:
y = keras.layers.Conv2D(64, **options)(y)
y = keras.layers.Conv2D(64, **options)(y)

y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 3:
y = keras.layers.Conv2D(128, **options)(y)
y = keras.layers.Conv2D(128, **options)(y)

y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 4:
# y = keras.layers.Conv2D(256, **options)(y)
# y = keras.layers.Conv2D(256, **options)(y)

# y = keras.layers.MaxPooling2D(pool_size=2, strides=None, padding='same')(y)

# Block 5:
y = keras.layers.Flatten()(y)

intermediate_layer = keras.layers.Dense(1024, activation="relu")(y) # This intermediate_layer will be used for embeddings

y = keras.layers.Dropout(0.5)(intermediate_layer)

y = keras.layers.Dense(classes)(y)

y = keras.layers.Activation("softmax")(y)

In [12]:
model = keras.models.Model(x, y)

In [13]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 32, 32, 8)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 32, 32, 32)        2336      
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 16, 16, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 8, 8, 64)          0         
__________

In [14]:
loss = keras.losses.categorical_crossentropy

optimizer = keras.optimizers.Adam(0.00001)

model.compile(
    loss=loss, 
    metrics=[
        "accuracy"
    ],
    optimizer=optimizer
)

# Train the network

In [15]:
# New output directory:
directory = "/home/minh-doan/Leukemia_DeepLearning/Allchannels_augmented_training"
if not os.path.exists(directory):
    os.makedirs(directory)

In [16]:
csv_logger = keras.callbacks.CSVLogger(os.path.join(directory, 'training.csv') )

early_stopping = keras.callbacks.EarlyStopping(patience=64)

# checkpoint
filepath = os.path.join(directory, "weights.best.hdf5")
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max')

In [17]:
configuration = tensorflow.ConfigProto()

configuration.gpu_options.allow_growth = True

session = tensorflow.Session(config=configuration)

keras.backend.set_session(session)

In [18]:
with tensorflow.device("/gpu:0"):
    model.fit_generator(
        callbacks=[
            #checkpoint,
            csv_logger
        ],
        epochs=15,
        class_weight = class_weights,
        generator = training_generator,
        max_q_size = 256,
        steps_per_epoch=2000,
        validation_data = validation_generator,
        validation_steps = 2000
    )

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


Evaluate testing set

In [19]:
model.evaluate_generator(
    generator=test_generator, 
    steps=256
)

[1.2798961628577672, 0.777587890625]

# Generate Graph for visualization in Tensorboard

In [None]:
tbCallBack = keras.callbacks.TensorBoard(log_dir='./', histogram_freq=0, write_graph=True, write_images=True)

options = {
    "batch_size": 1,
    "class_weight": class_weights,
    "callbacks": [
        tbCallBack
    ],
    "epochs": 1,
    "verbose": 0,
    "x": testing_x,
    "y": testing_y    
}

%time model.fit(**options)

# Generate embeddings for visualization in Tensorboard

Converting one-hot labels into class names

In [None]:
class_names = sorted(list(data.keys()))

def save_metadata(file):
    with open(file, 'w') as f:
        for i in range(testing_y.shape[0]):
            for j in range(len(class_names)):
                if numpy.nonzero(testing_y[::1])[1:][0][i] == j:
                    c = class_names[j]
                    f.write('{}\n'.format(c))
            
save_metadata('./metadata.tsv')

In [None]:
from tensorflow.contrib.tensorboard.plugins import projector

intermediate_values = intermediate_layer.eval(feed_dict = {x : testing_x}, session = keras.backend.get_session())
print(intermediate_values.shape)

In [None]:
embedding_var = tensorflow.Variable(intermediate_values)

# file names
ROOT_DIR = './' # Warning! If specified as absolute path, the resulted files will be BOUND to that folder path
OUTPUT_MODEL_FILE_NAME = os.path.join(ROOT_DIR,'tf.ckpt')

# keras.backend.get_session()
embedSess = tensorflow.Session()

# save variable in session
embedSess.run(embedding_var.initializer)
# embedSess.run(tensorflow.global_variables_initializer())

# save session (only used variable) to file
saver = tensorflow.train.Saver([embedding_var])
saver.save(embedSess, OUTPUT_MODEL_FILE_NAME)

summary_writer = tensorflow.summary.FileWriter(ROOT_DIR)

config = tensorflow.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
embedding.metadata_path = 'metadata.tsv' # this metadata_path need to be modified later. See note.
tensorflow.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)

# Note:
Have a careful check in this output : "projector_config.pbtxt"
"/path/to/logdir/metadata.tsv" has to be specified, CANNOT be relative path "./metadata.tsv", nor "~/metadata.tsv"

Type command in terminal: tensorboard --logdir="/path/to/logdir"

Next, open web-browser, connect to http://localhost:6006.

# Plot categorical accuracy and loss

In [None]:
metrics = pandas.read_csv(os.path.join(directory, 'training.csv') )

In [None]:
print(metrics)

In [None]:
%matplotlib inline

In [None]:
matplotlib.pyplot.plot(metrics["acc"])
matplotlib.pyplot.plot(metrics["val_acc"])

In [None]:
matplotlib.pyplot.plot(metrics["loss"])
matplotlib.pyplot.plot(metrics["val_loss"])

# Confusion matrix

In [None]:
predicted = model.predict(
    batch_size=50,
    x=testing_x
#     x=testing_xx # for 3-channel RGB
)

predicted = numpy.argmax(predicted, -1)

In [None]:
expected = numpy.argmax(testing_y[:, :], -1)

In [None]:
%matplotlib inline

In [None]:
confusion = sklearn.metrics.confusion_matrix(expected, predicted)

confusion = pandas.DataFrame(confusion)

matplotlib.pyplot.figure(figsize=(12, 8))

seaborn.heatmap(confusion, annot=True)

In [28]:
session.close()