In [55]:
import cv2
import numpy as np
import pandas as pd
import os
import sys
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import shutil
from os import path
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)

import warnings
warnings.filterwarnings('always')

# Utils Functions

In [130]:

def continueTrain_model(model3,model4, meta, batch_data, y):
    # Training model 4
    # model4.fit(batch_data, y, validation_data=(x_val,y_val), epochs=1, batch_size= 256)
    
    # Training meta-model
    train_meta(model3,model4,meta,batch_data, y, epochs=2)

    # Re-evaluate
    meta_pred = model_predict(model3,model4, meta,x_test)
    multiclass_classification_report(y_test,meta_pred)

def train_meta(model3,model4,meta,x, y, epochs):
    batchpred4 = model4.predict(x)
    batchpred3 = model3.predict(x)
    stacked_train = np.concatenate((batchpred3, batchpred4), axis=1)

    batchpred4 = model4.predict(x_val)
    batchpred3 = model3.predict(x_val)
    stacked_val = np.concatenate((batchpred3, batchpred4), axis=1)
    meta.fit(stacked_train, y, validation_data=(stacked_val, y_val), epochs=epochs, batch_size=256)
    return meta
    
def model_predict(model3,model4, meta,x):
    pred3 = model3.predict(x)
    pred4 = model4.predict(x)

    stacked = np.concatenate((pred3, pred4), axis=1)
    meta_pred = meta.predict(stacked)
    return meta_pred

def multiclass_classification_report(y_test, prediction):
    """
        Method to generate sklearn classification report with CNN multiclass output
    """
    encoded_pred = encode_pred(prediction)
    print(classification_report(y_test, encoded_pred))

def encode_pred(prediction):
    encoded_pred = list()
    # convert each CNN output (sparse categorial) to class
    for pred in prediction:
        encoded_pred.append(np.argmax(pred))

    return np.array(encoded_pred)

# Load Images

In [101]:

# method get data for multiclass task (task 2)
def celltype_classify_data(class_list, root):    
    images = list()
    labels = list()
    
    for i, label in enumerate(class_list):
        # get image directory
        img_dir = os.path.join(root, f"{label}")
        
        for img in os.listdir(img_dir):
            img = cv2.imread(os.path.join(img_dir, img))
            # resize to 0-1 for faster computation
            resized = img / 255
            images.append(resized)
            labels.append(i)
        
    return (images, labels)

# Model definitions

In [9]:
def get_multiclass_model():
    model = tf.keras.Sequential()
    # First convo-pooling
    # Convolutional layers (filter the image with a kernel)
    model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=1,activation="relu", input_shape=[27, 27, 3]))
    # Max-pooling layers (reduce the size of the image by choosing max pixel at certain area)
    model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=1))
    
    # Second convo-pooling
    # Convolutional layers (filter the image with a kernel)
    model.add(tf.keras.layers.Conv2D(64, (3, 3),strides=1, activation="relu"))
    # Max-pooling layers (reduce the size of the image by choosing max pixel at certain area)
    model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2),strides=1))
    
    # Flatten input
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation="relu"))
    model.add(tf.keras.layers.Dense(128, activation="relu"))
    model.add(tf.keras.layers.Dense(32, activation="relu"))
    # Output layer
    model.add(tf.keras.layers.Dense(4, activation="softmax"))
    # Compile model 
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    
    return model

def get_subclass_model():
    model = tf.keras.Sequential()
    # First convo-pooling
    # Convolutional layers (filter the image with a kernel)
    model.add(tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=[27, 27, 3]))
    # Max-pooling layers (reduce the size of the image by choosing max pixel at certain area)
    model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))
    # Flatten input
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256, activation="relu"))
    model.add(tf.keras.layers.Dense(128, activation="relu"))
    model.add(tf.keras.layers.Dense(32, activation="relu"))
    # Output layer
    model.add(tf.keras.layers.Dense(3, activation="softmax"))
    # Compile model 
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    
    return model

def get_transfer_model():
    model = tf.keras.Sequential()
    # Flatten input
    model.add(tf.keras.layers.Flatten())
    # Hidden layers
    model.add(tf.keras.layers.Dense(256, activation="relu"))
    model.add(tf.keras.layers.Dense(128, activation="softmax"))
    model.add(tf.keras.layers.Dense(32, activation="relu"))
    model.add(tf.keras.layers.Dense(3, activation="sigmoid"))
    # Output layer
    model.add(tf.keras.layers.Dense(4, activation="softmax"))
    # Compile model
    model.compile(optimizer="adam",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# Train model

In [95]:
image_train, label_train = celltype_classify_data(all_class, root="Image_classification_data/split3-multi-task/train")
image_val, label_val = celltype_classify_data(all_class, root="Image_classification_data/split3-multi-task/val")
image_test, label_test = celltype_classify_data(all_class, root="Image_classification_data/split3-multi-task/test")

x_train, y_train = np.array(image_train), np.array(label_train)
x_val, y_val = np.array(image_val), np.array(label_val)
x_test, y_test = np.array(image_test), np.array(label_test)


In [102]:
classes3 = ["epithelial", "fibroblast", "inflammatory"]
image_train, label_train = celltype_classify_data(classes3, root="Image_classification_data/split3-multi-task/train")
x3_train, y3_train = np.array(image_train), np.array(label_train)
image_val, label_val = celltype_classify_data(classes3, root="Image_classification_data/split3-multi-task/val")
x3_val, y3_val = np.array(image_val), np.array(label_val)

## Train model of 3 classes

In [104]:
model3 = get_subclass_model()
model3.fit(x3_train, y3_train, validation_data=(x3_val, y3_val), epochs=15, batch_size=128)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7ff41381d5d0>

## Train sub-model of 4 classes

In [106]:
model4 = get_multiclass_model()
model4.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=20, batch_size=64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7ff306283410>

## Train Meta-model

In [115]:
meta  = get_transfer_model()
x = x_train
batchpred4 = model4.predict(x)
batchpred3 = model3.predict(x)
stacked_train = np.concatenate((batchpred3, batchpred4), axis=1)

batchpred4 = model4.predict(x_val)
batchpred3 = model3.predict(x_val)
stacked_val = np.concatenate((batchpred3, batchpred4), axis=1)
meta.fit(stacked_train, y_train, validation_data=(stacked_val, y_val), epochs=100, batch_size=256)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7ff304c2b7d0>

In [118]:
model3.save("results/models/model3")
model4.save("results/models/model4")
meta.save("results/models/meta")

# Load models

In [132]:
model3 = tf.keras.models.load_model("results/models/model3")
model4 = tf.keras.models.load_model("results/models/model4")
meta = tf.keras.models.load_model("results/models/meta")

## Evaluate model with the test dataset

In [120]:
pred_test = model_predict(model3,model4,meta,x_test)
multiclass_classification_report(y_test, pred_test)

              precision    recall  f1-score   support

           0       0.94      0.97      0.96       817
           1       0.89      0.87      0.88       379
           2       0.86      0.93      0.89       510
           3       0.79      0.64      0.70       278

    accuracy                           0.89      1984
   macro avg       0.87      0.85      0.86      1984
weighted avg       0.89      0.89      0.89      1984



# Extra data

In [133]:
import pandas as pd

# Load Extra data
extra1_x , label1 = celltype_classify_data(["1"], root="Image_classification_data/extra")
extra0_x, label0 = celltype_classify_data(["0"], root="Image_classification_data/extra")

extra1_x = np.array(extra1_x)
extra0_x = np.array(extra0_x)
label1 = np.array(label1)
label0 = np.array(label0)

In [122]:
print(extra1_x.shape)
print(extra0_x.shape)

(2990, 27, 27, 3)
(7394, 27, 27, 3)


## Train model with extra data in batches

In [134]:
datagen = ImageDataGenerator()
batches = 0
batch_size = 1024
tf.config.run_functions_eagerly(False)

for x_batch in datagen.flow(extra0_x, batch_size=batch_size):
    batches += 1
    if batches >= len(extra0_x) / batch_size:
    # if batches >= 4:
        break
    else:
        print(batches)
        pred_extra0 = model_predict(model3, model4, meta,x_batch) # meta learner predict label of extra data
        pred_extra0 = encode_pred(pred_extra0) # encode probability predction into labels
        continueTrain_model(model3, model4, meta,x_batch, pred_extra0) # retrain the models with this extra data and it's given labels

1
Epoch 1/2
Epoch 2/2
              precision    recall  f1-score   support

           0       0.94      0.97      0.96       817
           1       0.88      0.89      0.89       379
           2       0.87      0.92      0.89       510
           3       0.82      0.63      0.71       278

    accuracy                           0.90      1984
   macro avg       0.88      0.85      0.86      1984
weighted avg       0.89      0.90      0.89      1984

2
Epoch 1/2
Epoch 2/2
              precision    recall  f1-score   support

           0       0.94      0.97      0.96       817
           1       0.87      0.90      0.89       379
           2       0.87      0.92      0.89       510
           3       0.83      0.62      0.71       278

    accuracy                           0.90      1984
   macro avg       0.88      0.85      0.86      1984
weighted avg       0.89      0.90      0.89      1984

3
Epoch 1/2
Epoch 2/2
              precision    recall  f1-score   support

         