In [None]:
import ifc_utils
import keras
import numpy
import matplotlib.pyplot
import os.path
import pandas

import keras.applications
import keras.preprocessing.image
import tensorflow

In [None]:
configuration = tensorflow.ConfigProto()

configuration.gpu_options.allow_growth = True

session = tensorflow.Session(config=configuration)

keras.backend.set_session(session)

# User's settings:

In [None]:
directory = "/home/minh-doan/Cell_cycle/temp_processed"

data = {
    "G1": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/G1/",
    "S_phase": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/S_phase/",
    "G2": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/G2/",
    "Prophase": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/Prophase/",
    "Anaphase": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/Anaphase/",
    "Metaphase": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/Metaphase/",
    "Telophase": "/home/minh-doan/Cell_cycle/Step2_input_single_tifs/Telophase/"
}

channels = [3, 6]

image_size = 40

split = {
    "Training" : 0.8,
    "Validation" : 0.15,
    "Testing" : 0.05
}

In [None]:
ifc_utils.parse(directory, data, channels, image_size)

In [None]:
ifc_utils.split(directory, data.keys(), split, image_size)

In [None]:
class_weights = ifc_utils.class_weights("/home/minh-doan/Cell_cycle/temp_processed/Training/", data)

In [None]:
classes = len(data.keys())

# Utilize prebuilt VGG19 network

In [None]:
# Change here the number of channesl: for VGG19 expecting (224,224,3) with 3 channels, for homemade model it's (image_size, image_size, 1)
shape = (224, 224, 3)

In [None]:
VGG19 = keras.applications.VGG19(include_top = False, classes = classes)

In [None]:
for layer in VGG19.layers:
    layer.trainable = False
    
x = VGG19.output
x = keras.layers.GlobalAveragePooling2D()(x)

In [None]:
# For classification:
# x = keras.layers.Dense(4096, activation='relu')(x)
# x = keras.layers.BatchNormalization()(x)

# For Tensorboard embedding:
intermediate_layer = keras.layers.Dense(4096, activation='relu')(x)
x = keras.layers.BatchNormalization()(intermediate_layer)
x = keras.layers.Dropout(rate=0.5)(x)

predictions = keras.layers.Dense(classes, activation='softmax')(x)

# this is the model we will train
model = keras.models.Model(input=VGG19.input, output=predictions)

In [None]:
model.summary()

In [None]:
optimizer = keras.optimizers.Adam(lr = 0.0001)

loss = keras.losses.categorical_crossentropy

metrics = [
    keras.metrics.categorical_accuracy
]

model.compile(optimizer, loss, metrics)

In [None]:
training_generator = keras.preprocessing.image.ImageDataGenerator(horizontal_flip = True, vertical_flip = True)

training_generator = training_generator.flow_from_directory(
    batch_size=32,
    color_mode="rgb",
    directory="/home/minh-doan/Cell_cycle/temp_processed/Training/",
    target_size=(224, 224)
)

In [None]:
validation_generator = keras.preprocessing.image.ImageDataGenerator(horizontal_flip = True, vertical_flip = True)

validation_generator = validation_generator.flow_from_directory(
    batch_size=32,
    color_mode="rgb",
    directory="/home/minh-doan/Cell_cycle/temp_processed/Validation/",
    target_size=(224, 224)
)

In [None]:
# csv_logger = keras.callbacks.CSVLogger("training.csv")

early_stopping = keras.callbacks.EarlyStopping(patience=64)

In [None]:
# checkpoint
filepath="/home/minh-doan/Cell_cycle/temp_processed/weights.best.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = checkpoint

In [None]:
with tensorflow.device("/gpu:2"):
    model.fit_generator(
        callbacks=[
            callbacks_list
            #csv_logger
        ],
        epochs=1,
        generator=training_generator,
        max_q_size=256,
        steps_per_epoch=2000,
        validation_data=validation_generator,
        validation_steps=2000
    )

In [None]:
test_generator = keras.preprocessing.image.ImageDataGenerator(horizontal_flip = True, vertical_flip = True)

test_generator = test_generator.flow_from_directory(
    batch_size=1,
    color_mode="rgb",
    directory="/home/minh-doan/Cell_cycle/temp_processed/Testing/"
)

model.evaluate_generator(
    generator=test_generator, 
    steps=256
)

In [None]:
test_x = numpy.load("/home/minh-doan/Cell_cycle/temp_processed/testing_x.npy")

test_y = numpy.load("/home/minh-doan/Cell_cycle/temp_processed/testing_y.npy")

In [None]:
# Because VGG19 need 3 channels RGB not single-channel grayscale:
test_xx = numpy.concatenate((test_x,test_x,test_x), axis=3)

# Generate Graph for visualization in Tensorboard

In [None]:
tbCallBack = keras.callbacks.TensorBoard(log_dir='./', histogram_freq=0, write_graph=True, write_images=True)

In [None]:
options = {
    "batch_size": 1,
    "class_weight": class_weights,
    "callbacks": [
        tbCallBack
    ],
    "epochs": 1,
    "verbose": 0,
    "x": test_xx,
    "y": test_y    
}

%time model.fit(**options)

# Generate embeddings for visualization in Tensorboard

In [None]:
# Converting one-hot labels into serial numbers [0,1,2,3...]
def save_metadata(file):
    with open(file, 'w') as f:
        for i in range(test_y.shape[0]):
            c = numpy.nonzero(test_y[::1])[1:][0][i]
            f.write('{}\n'.format(c))
            
save_metadata('./metadata.tsv')

In [None]:
from tensorflow.contrib.tensorboard.plugins import projector

# Create randomly initialized embedding weights which will be trained.
N = test_y.shape[0] # Number of items
print(N)
D = 4096 # Dimensionality of the embedding Dense layer

In [None]:
intermediate_values = intermediate_layer.eval(feed_dict = {VGG19.input : test_xx}, session = keras.backend.get_session())
print(intermediate_values.shape)
print(type(intermediate_values))

In [None]:
embedding_var = tensorflow.Variable(intermediate_values)

# file names
ROOT_DIR = './' # Pay attention here ! If specify this path as absolute path, the resulted files have to be read with the same folder path
OUTPUT_MODEL_FILE_NAME = os.path.join(ROOT_DIR,'tf.ckpt')

# keras.backend.get_session()
embedSess = tensorflow.Session()

# save variable in session
embedSess.run(embedding_var.initializer)
# embedSess.run(tensorflow.global_variables_initializer())

# save session (only used variable) to file
saver = tensorflow.train.Saver([embedding_var])
saver.save(embedSess, OUTPUT_MODEL_FILE_NAME)

In [None]:
summary_writer = tensorflow.summary.FileWriter(ROOT_DIR)

config = tensorflow.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
embedding.metadata_path = '/Users/minhdoan/Desktop/1_copy/metadata.tsv' # Shame on GOOGLE, while absolute path is NOT good otherwise, this metadata HAS TO have absolute path to function
tensorflow.contrib.tensorboard.plugins.projector.visualize_embeddings(summary_writer, config)

# Plot the loss and accuracy of classification

In [None]:
%matplotlib inline

In [None]:
import pandas
import seaborn
import sklearn.metrics
import sklearn.preprocessing
import sklearn.utils

In [None]:
metrics = pandas.read_csv("./training.csv")

In [None]:
matplotlib.pyplot.plot(metrics["categorical_accuracy"])
matplotlib.pyplot.plot(metrics["val_categorical_accuracy"])

In [None]:
matplotlib.pyplot.plot(metrics["loss"])
matplotlib.pyplot.plot(metrics["val_loss"])

Building confusion matrix

In [None]:
predicted = model.predict(
    batch_size=50,
    x = numpy.load("/home/minh-doan/Cell_cycle/temp_processed/testing_x.npy")
)

predicted = numpy.argmax(predicted, -1)

# When done, close the session to release the GPU/CPU memory

In [None]:
session.close()

# If train the network from scratch

In [None]:
# Change here the number of channesl: for VGG19 expecting (224,224,3) with 3 channels, for homemade model it's (image_size, image_size, 1)
shape = (image_size, image_size, 1)

In [None]:
x = keras.layers.Input(shape)

The classifier architecture is adapted from the LeNet-5 convolutional neural network by Yann LeCunn, et al.

In [None]:
options = {"activation": "relu", "kernel_size": (3, 3)}

y = keras.layers.Conv2D(32, **options)(x)
y = keras.layers.Conv2D(64, **options)(y)

y = keras.layers.MaxPooling2D()(y)

y = keras.layers.Dropout(0.5)(y)

y = keras.layers.Flatten()(y)

y = keras.layers.Dense(128, activation="relu")(y)

y = keras.layers.Dropout(0.5)(y)

y = keras.layers.Dense(classes)(y)

y = keras.layers.Activation("softmax")(y)

In [None]:
model = keras.models.Model(x, y)

In [None]:
loss = keras.losses.categorical_crossentropy

optimizer = keras.optimizers.Adam(0.0001)

model.compile(
    loss=loss, 
    metrics=[
        "accuracy"
    ],
    optimizer=optimizer
)

In [None]:
training_x = numpy.load(os.path.join(directory, "training_x.npy"))

training_y = numpy.load(os.path.join(directory, "training_y.npy"))

In [None]:
csv_logger = keras.callbacks.CSVLogger("training.csv")

early_stopping = keras.callbacks.EarlyStopping(patience=32)

model_checkpoint = keras.callbacks.ModelCheckpoint("checkpoint.hdf5")

reduce_learning_rate_on_plateau = keras.callbacks.ReduceLROnPlateau()

In [None]:
options = {
    "batch_size": 4096,
    "class_weight": class_weights,
    "callbacks": [
        csv_logger,
        early_stopping,
        model_checkpoint,
        reduce_learning_rate_on_plateau
    ],
    "epochs": 256,
    "validation_split": 0.25,
    "verbose": 0,
    "x": training_x,
    "y": training_y    
}

%time model.fit(**options)

In [None]:
%matplotlib inline

In [None]:
observations = pandas.read_csv("training.csv")

_, (a, b) = matplotlib.pyplot.subplots(nrows=1, ncols=2, figsize=(12, 4))

a.plot(observations["acc"], "b")
a.plot(observations["val_acc"], "r")

b.plot(observations["loss"], "b")
b.plot(observations["val_loss"], "r")

In [None]:
model.load_weights("./checkpoint.hdf5")

In [None]:
test_x = numpy.load(os.path.join(directory, "test_x.npy"))

test_y = numpy.load(os.path.join(directory, "test_y.npy"))

In [None]:
model.evaluate(test_x, test_y)