In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir('/content/drive/My Drive/lymhphoma')

In [None]:
import os
import numpy as np
import time
from sklearn.metrics import accuracy_score
import psutil
from sklearn.metrics import confusion_matrix
import GPUtil
from PIL import Image
import h5py
from tensorflow.keras.models import model_from_json
from skimage.color import rgb2lab, rgb2hed
from pathlib import Path
from skimage.exposure import rescale_intensity
from sklearn.ensemble import VotingClassifier
from skimage.util import view_as_windows
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from keras.optimizers import RMSprop
from tensorflow.keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import keras
from collections import Counter
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.layers import Add, Multiply, Lambda, Dense
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import mixed_precision
from tensorflow.keras.optimizers import legacy as legacy_optimizers
import random

In [None]:
!pip install tensorflow keras scikit-image matplotlib seaborn scikit-learn gputil scikit-learn scikit-metrics


In [None]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [None]:
mixed_precision.set_global_policy('float32')

In [None]:
devices = tf.config.list_physical_devices()
print("\nDevices: ", devices)

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        details = tf.config.experimental.get_device_details(gpus[0])
        print("GPU details: ", details)
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found")

In [None]:
def load_data(data_dir, color, class_list):
    conversion_functions = {
        'rgb': lambda img_path: Image.open(img_path).convert('RGB'),
        'grayscale': lambda img_path: Image.open(img_path).convert('L'),
        'lab': lambda img_path: rgb2lab(np.array(Image.open(img_path).convert('RGB')))
    }

    X, Y = [], []
    for class_name in class_list:
        for filename in data_dir.glob(f'{class_name}/*.tif'):
            img_path = str(filename)
            if color in conversion_functions:
                converted_img = conversion_functions[color](img_path)
                X.append(np.array(converted_img))
            Y.append(class_name)

    X = np.asarray(X, dtype=np.float32)
    Y = np.asarray(Y)

    if len(X.shape) == 3:
        X = X.reshape((X.shape[0], X.shape[1], X.shape[2], 1))

    target = np.zeros(Y.shape, dtype=int)
    target[Y == 'CLL'] = 0
    target[Y == 'FL'] = 1
    target[Y == 'MCL'] = 2
    Y = target

    if color in ['rgb', 'lab', 'grayscale']:
        X = X / 255.0

    print(f"Loaded data for color space {color} with shape {X.shape}")
    return X, Y

In [None]:
def rebuild_model(model_path, custom_objects):
    with open(model_path.replace('.h5', '_config.json'), 'r') as json_file:
        json_config = json_file.read()
    model = tf.keras.models.model_from_json(json_config, custom_objects=custom_objects)
    model.load_weights(model_path)
    return model

In [None]:
def save_patches(patches, labels, prefix, batch_size=10000):
    num_batches = len(patches) // batch_size + 1
    for i in range(num_batches):
        batch_patches = patches[i*batch_size:(i+1)*batch_size]
        batch_labels = labels[i*batch_size:(i+1)*batch_size]
        patch_file = f'{prefix}_patches_{i}.npy'
        label_file = f'{prefix}_labels_{i}.npy'
        np.save(patch_file, batch_patches)
        np.save(label_file, batch_labels)
        print(f"Saved patches to {patch_file} and labels to {label_file}")

In [None]:
def create_patches(X, Y, size, stride, prefix, batch_size=10000):
    os.makedirs(prefix, exist_ok=True)
    patches, labels = [], []

    for i in range(X.shape[0]):
        label = Y[i]
        img_patches = view_as_windows(X[i], (size, size, X.shape[3]), step=stride)
        nR, nC, _, H, W, C = img_patches.shape
        img_patches = img_patches.reshape(nR * nC, H, W, C)
        patches.extend(img_patches)
        labels.extend([label] * img_patches.shape[0])

    patches = np.asarray(patches, dtype=np.float32)
    labels = np.asarray(labels)
    save_patches(patches, labels, prefix, batch_size=batch_size)
    print(f"Created and saved patches for prefix {prefix} with shape {patches.shape}")

In [None]:
class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight',
                                 shape=(input_shape[-1], input_shape[-1]),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(name='attention_bias',
                                 shape=(input_shape[-1],),
                                 initializer='zeros',
                                 trainable=True)
        self.u = self.add_weight(name='context_vector',
                                 shape=(input_shape[-1], 1),
                                 initializer='random_normal',
                                 trainable=True)
        super(Attention, self).build(input_shape)

    def call(self, x):
        u_it = tf.tanh(tf.tensordot(x, self.W, axes=[-1, 0]) + self.b)
        scores = tf.tensordot(u_it, self.u, axes=[-1, 0])
        alphas = tf.nn.softmax(scores, axis=1)
        context = tf.reduce_sum(alphas * x, axis=1)
        return context

In [None]:
def resnet_block(inputs, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
    bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
    x = Conv2D(filters, kernel_size, strides=stride, padding='same', use_bias=False, name=name + '_conv1')(inputs)
    x = BatchNormalization(axis=bn_axis, name=name + '_bn1')(x)
    x = Activation('relu', name=name + '_relu1')(x)
    x = Conv2D(filters, kernel_size, padding='same', use_bias=False, name=name + '_conv2')(x)
    x = BatchNormalization(axis=bn_axis, name=name + '_bn2')(x)
    if conv_shortcut:
        shortcut = Conv2D(filters, 1, strides=stride, use_bias=False, name=name + '_0_conv')(inputs)
        shortcut = BatchNormalization(axis=bn_axis, name=name + '_0_bn')(shortcut)
        x = Add(name=name + '_add')([x, shortcut])
    else:
        x = Add(name=name + '_add')([x, inputs])
    x = Activation('relu', name=name + '_out')(x)
    return x

In [None]:
def create_resnet_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv2D(64, 7, strides=2, padding='same', use_bias=False, name='conv1_conv')(inputs)
    x = BatchNormalization(axis=3, name='conv1_bn')(x)
    x = Activation('relu', name='conv1_relu')(x)
    x = MaxPooling2D(3, strides=2, padding='same', name='pool1_pool')(x)
    x = resnet_block(x, 64, name='conv2_block1')
    x = resnet_block(x, 64, name='conv2_block2')
    x = resnet_block(x, 64, name='conv2_block3')
    x = resnet_block(x, 128, stride=2, name='conv3_block1')
    x = resnet_block(x, 128, name='conv3_block2')
    x = resnet_block(x, 128, name='conv3_block3')
    x = resnet_block(x, 128, name='conv3_block4')
    x = resnet_block(x, 256, stride=2, name='conv4_block1')
    x = resnet_block(x, 256, name='conv4_block2')
    x = resnet_block(x, 256, name='conv4_block3')
    x = resnet_block(x, 256, name='conv4_block4')
    x = resnet_block(x, 256, name='conv4_block5')
    x = resnet_block(x, 256, name='conv4_block6')
    x = resnet_block(x, 512, stride=2, name='conv5_block1')
    x = resnet_block(x, 512, name='conv5_block2')
    x = resnet_block(x, 512, name='conv5_block3')
    x = GlobalAveragePooling2D(name='avg_pool')(x)
    x = tf.expand_dims(x, axis=1) 
    x = Attention()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax', name='fc1000')(x)
    model = Model(inputs, outputs, name='resnet50')
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def extract_features(model, x_data):
    feature_model = Model(inputs=model.input, outputs=model.layers[-2].output)
    features = feature_model.predict(x_data)
    return features

In [None]:
def grad_cam(model, img, layer_name="conv2d_3"):
    grad_model = Model([model.inputs], [model.get_layer(layer_name).output, model.output])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(np.array([img]))
        loss = predictions[:, np.argmax(predictions[0])]
    output = conv_outputs[0]
    grads = tape.gradient(loss, conv_outputs)[0]
    weights = tf.reduce_mean(grads, axis=(0, 1))
    cam = np.dot(output, weights)
    cam = cv2.resize(cam.numpy(), (img.shape[1], img.shape[0]))
    cam = np.maximum(cam, 0)
    heatmap = (cam - cam.min()) / (cam.max() - cam.min())
    heatmap = np.uint8(255 * heatmap)
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = cv2.addWeighted(cv2.cvtColor(img, cv2.COLOR_RGB2BGR), 0.6, heatmap, 0.4, 0)
    return superimposed_img

In [None]:
def create_cnn_model_with_attention(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv2D(filters=32, kernel_size=(5, 5), strides=(1, 1), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(filters=48, kernel_size=(5, 5), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)
    x = Conv2D(filters=64, kernel_size=(5, 5), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)
    x = Conv2D(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.2)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Reshape((64, 1))(x)
    x = Attention()(x)
    x = Flatten()(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def create_rnn_model_with_attention(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    if input_shape[-1] == 1:
        x = Reshape(input_shape[0:2])(inputs)
    else:
        x = Reshape((input_shape[0], input_shape[1] * input_shape[2]))(inputs)
        x = TimeDistributed(Flatten())(x)
    x = GRU(256, activation='relu', return_sequences=True)(x)
    x = Dropout(0.2)(x)
    x = GRU(128, activation='relu', return_sequences=True)(x)
    x = Dropout(0.2)(x)
    x = Attention()(x)
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [None]:
def create_cnn_rnn_model_with_attention(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv2D(filters=32, kernel_size=(5, 5), strides=(1, 1), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Conv2D(filters=48, kernel_size=(5, 5), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)
    x = Conv2D(filters=64, kernel_size=(5, 5), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(x)
    x = Conv2D(filters=128, kernel_size=(5, 5), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(0.2)(x)
    x = Reshape((x.shape[1] * x.shape[2], x.shape[3]))(x)
    x = GRU(256, activation='relu', return_sequences=True)(x)
    x = Dropout(0.2)(x)
    x = GRU(128, activation='relu', return_sequences=True)(x)
    x = Dropout(0.2)(x)
    x = Attention()(x)
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs, outputs)
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
    return model

In [None]:
class PrintEpochMetrics(Callback):
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        print(f"Epoch {epoch + 1} Accuracy: {logs.get('accuracy'):.4f}, "
              f"Val Accuracy: {logs.get('val_accuracy'):.4f}")

In [None]:
def reset_energy_usage():
    return {'epoch': [], 'time': [], 'cpu_percent': [], 'memory_percent': [], 'gpu_percent': [], 'gpu_memory_used': []}

In [None]:
def log_energy_usage(epoch, energy_usage, start_time):
    elapsed_time = time.time() - start_time
    cpu_percent = psutil.cpu_percent(interval=1)
    memory_percent = psutil.virtual_memory().percent
    gpus = GPUtil.getGPUs()
    if gpus:
        gpu_stats = gpus[0]
        gpu_percent = gpu_stats.load * 100
        gpu_memory_used = gpu_stats.memoryUsed
    else:
        gpu_percent = 0
        gpu_memory_used = 0

    energy_usage['epoch'].append(epoch)
    energy_usage['time'].append(elapsed_time)
    energy_usage['cpu_percent'].append(cpu_percent)
    energy_usage['memory_percent'].append(memory_percent)
    energy_usage['gpu_percent'].append(gpu_percent)
    energy_usage['gpu_memory_used'].append(gpu_memory_used)

In [None]:
class EnergyUsageCallback(tf.keras.callbacks.Callback):
    def __init__(self, energy_usage, start_time):
        self.energy_usage = energy_usage
        self.start_time = start_time
        super().__init__()

    def on_epoch_end(self, epoch, logs=None):
        log_energy_usage(epoch, self.energy_usage, self.start_time)

In [None]:
datagen = ImageDataGenerator(rotation_range=20,
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             shear_range=0.2,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest')

val_datagen = ImageDataGenerator()

In [None]:
def infinite_generator(generator):
    while True:
        for batch in generator:
            yield batch

In [None]:
def train_and_save_model(model_fn, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, model_name):
    energy_usage = reset_energy_usage()

    model = model_fn(input_shape, num_classes)
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=["accuracy"])

    early_stopping = EarlyStopping(monitor='accuracy', patience=3, verbose=1, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.2, patience=3, min_lr=0.0001, verbose=1)
    print_metrics = PrintEpochMetrics()

    global start_time
    start_time = time.time()

    energy_usage_callback = EnergyUsageCallback(energy_usage, start_time)

    history = model.fit(train_generator,
                        steps_per_epoch=steps_per_epoch,
                        validation_data=val_generator,
                        validation_steps=validation_steps,
                        epochs=20,
                        verbose=1,
                        callbacks=[early_stopping, reduce_lr, print_metrics, energy_usage_callback])

    model_save_path = f'models/{model_name}.h5'
    model.save(model_save_path)
    print(f"Model saved to {model_save_path}")

    energy_usage_df = pd.DataFrame(energy_usage)
    energy_usage_df.to_csv(f'energy_usage_metrics_{model_name}.csv', index=False)
    print(f"Energy usage metrics saved to energy_usage_metrics_{model_name}.csv")

    del model
    K.clear_session()

    return model_save_path

In [None]:
def reshape(X_patches, Y_patches, mode='eval'):
    X_reshaped = X_patches.reshape(-1, X_patches.shape[1], X_patches.shape[2], X_patches.shape[3])
    if mode == 'test':
        Y_reshaped = Y_patches.reshape(-1, 1)
    else:
        Y_reshaped = Y_patches
    return X_reshaped, Y_reshaped

In [None]:
def majority_voting(model, X_test, Y_test):
    predictions = model.predict(X_test)
    predicted_classes = np.argmax(predictions, axis=1)
    cm_prediction = predicted_classes.reshape(Y_test.shape[0], -1)
    cm_winner = [np.argmax(np.bincount(patch_preds)) for patch_preds in cm_prediction]
    return cm_prediction, cm_winner

In [None]:
def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues):
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = 'd'
    thresh = cm.max() / 2.
    for i, j in np.ndindex(cm.shape):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

In [None]:
data_dir = Path('dataset/')
color_spaces = ['rgb', 'grayscale', 'lab']
class_list = ['CLL', 'FL', 'MCL']

In [None]:
all_model_paths = []

In [None]:
patch_size_train_val = 36
stride_train_val = 32
patch_size_test = 36
stride_test = 28

In [None]:
color = 'rgb'
X, Y = load_data(data_dir, color, class_list)

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.4, random_state=seed, stratify=Y)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=seed, stratify=Y_temp)

create_patches(X_train, Y_train, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/train_patches_{color}')
create_patches(X_val, Y_val, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/val_patches_{color}')
create_patches(X_test, Y_test, size=patch_size_test, stride=stride_test, prefix=f'patches_{color}/test_patches_{color}')

train_patches_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_patches_*.npy'))
val_patches_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_patches_*.npy'))
test_patches_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_patches_*.npy'))

train_labels_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_labels_*.npy'))
val_labels_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_labels_*.npy'))
test_labels_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_labels_*.npy'))

if train_patches_files and val_patches_files and test_patches_files:
    X_train_patches = np.vstack([np.load(f) for f in train_patches_files])
    Y_train_patches = np.hstack([np.load(f) for f in train_labels_files])

    X_val_patches = np.vstack([np.load(f) for f in val_patches_files])
    Y_val_patches = np.hstack([np.load(f) for f in val_labels_files])

    X_test_patches = np.vstack([np.load(f) for f in test_patches_files])
    Y_test_patches = np.hstack([np.load(f) for f in test_labels_files])

    print(f"Loaded patches for color space {color} with shapes: ")
    print(f"  X_train_patches: {X_train_patches.shape}")
    print(f"  Y_train_patches: {Y_train_patches.shape}")
    print(f"  X_val_patches: {X_val_patches.shape}")
    print(f"  Y_val_patches: {Y_val_patches.shape}")
    print(f"  X_test_patches: {X_test_patches.shape}")
    print(f"  Y_test_patches: {Y_test_patches.shape}")

    train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
    train_generator = infinite_generator(train_generator)

    val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
    val_generator = infinite_generator(val_generator)

    steps_per_epoch = len(X_train_patches) // 32
    validation_steps = len(X_val_patches) // 32
else:
    print(f"Skipping color space {color} due to missing patches.")

In [None]:
train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
train_generator = infinite_generator(train_generator)
val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
val_generator = infinite_generator(val_generator)
steps_per_epoch = len(X_train_patches) // 32
validation_steps = len(X_val_patches) // 32
input_shape = X_train_patches.shape[1:]
num_classes = len(class_list)

In [None]:
cnn_model_path = train_and_save_model(create_cnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_attention_{color}')
all_model_paths.append(cnn_model_path)

In [None]:
rnn_model_path = train_and_save_model(create_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'rnn_attention_{color}')
all_model_paths.append(rnn_model_path)

In [None]:
cnn_rnn_model_path = train_and_save_model(create_cnn_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_rnn_attention_{color}')
all_model_paths.append(cnn_rnn_model_path)

In [None]:
resnet_model_path = train_and_save_model(create_resnet_model, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'resnet_{color}')
all_model_paths.append(resnet_model_path)

In [None]:
color = 'rgb'
X, Y = load_data(data_dir, color, class_list)

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.4, random_state=seed, stratify=Y)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=seed, stratify=Y_temp)

create_patches(X_train, Y_train, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/train_patches_{color}')
create_patches(X_val, Y_val, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/val_patches_{color}')
create_patches(X_test, Y_test, size=patch_size_test, stride=stride_test, prefix=f'patches_{color}/test_patches_{color}')

train_patches_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_patches_*.npy'))
val_patches_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_patches_*.npy'))
test_patches_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_patches_*.npy'))

train_labels_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_labels_*.npy'))
val_labels_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_labels_*.npy'))
test_labels_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_labels_*.npy'))

print(f"Train patches files: {train_patches_files}")
print(f"Val patches files: {val_patches_files}")
print(f"Test patches files: {test_patches_files}")

print(f"Train labels files: {train_labels_files}")
print(f"Val labels files: {val_labels_files}")
print(f"Test labels files: {test_labels_files}")

if train_patches_files and val_patches_files and test_patches_files:
    X_train_patches = np.vstack([np.load(f) for f in train_patches_files])
    Y_train_patches = np.hstack([np.load(f) for f in train_labels_files])

    X_val_patches = np.vstack([np.load(f) for f in val_patches_files])
    Y_val_patches = np.hstack([np.load(f) for f in val_labels_files])

    X_test_patches = np.vstack([np.load(f) for f in test_patches_files])
    Y_test_patches = np.hstack([np.load(f) for f in test_labels_files])

    print(f"Loaded patches for color space {color} with shapes: ")
    print(f"  X_train_patches: {X_train_patches.shape}")
    print(f"  Y_train_patches: {Y_train_patches.shape}")
    print(f"  X_val_patches: {X_val_patches.shape}")
    print(f"  Y_val_patches: {Y_val_patches.shape}")
    print(f"  X_test_patches: {X_test_patches.shape}")
    print(f"  Y_test_patches: {Y_test_patches.shape}")

    train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
    train_generator = infinite_generator(train_generator)

    val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
    val_generator = infinite_generator(val_generator)

    steps_per_epoch = len(X_train_patches) // 32
    validation_steps = len(X_val_patches) // 32
else:
    print(f"Skipping color space {color} due to missing patches.")

In [None]:
train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
train_generator = infinite_generator(train_generator)
val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
val_generator = infinite_generator(val_generator)
steps_per_epoch = len(X_train_patches) // 32
validation_steps = len(X_val_patches) // 32
input_shape = X_train_patches.shape[1:]
num_classes = len(class_list)

In [None]:
cnn_model_path = train_and_save_model(create_cnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_attention_{color}')
all_model_paths.append(cnn_model_path)

In [None]:
rnn_model_path = train_and_save_model(create_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'rnn_attention_{color}')
all_model_paths.append(rnn_model_path)

In [None]:
cnn_rnn_model_path = train_and_save_model(create_cnn_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_rnn_attention_{color}')
all_model_paths.append(cnn_rnn_model_path)

In [None]:
resnet_model_path = train_and_save_model(create_resnet_model, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'resnet_{color}')
all_model_paths.append(resnet_model_path)

In [None]:
color = 'lab'
X, Y = load_data(data_dir, color, class_list)

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.4, random_state=seed, stratify=Y)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=seed, stratify=Y_temp)

create_patches(X_train, Y_train, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/train_patches_{color}')
create_patches(X_val, Y_val, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/val_patches_{color}')
create_patches(X_test, Y_test, size=patch_size_test, stride=stride_test, prefix=f'patches_{color}/test_patches_{color}')

train_patches_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_patches_*.npy'))
val_patches_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_patches_*.npy'))
test_patches_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_patches_*.npy'))

train_labels_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_labels_*.npy'))
val_labels_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_labels_*.npy'))
test_labels_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_labels_*.npy'))

print(f"Train patches files: {train_patches_files}")
print(f"Val patches files: {val_patches_files}")
print(f"Test patches files: {test_patches_files}")

print(f"Train labels files: {train_labels_files}")
print(f"Val labels files: {val_labels_files}")
print(f"Test labels files: {test_labels_files}")

if train_patches_files and val_patches_files and test_patches_files:
    X_train_patches = np.vstack([np.load(f) for f in train_patches_files])
    Y_train_patches = np.hstack([np.load(f) for f in train_labels_files])

    X_val_patches = np.vstack([np.load(f) for f in val_patches_files])
    Y_val_patches = np.hstack([np.load(f) for f in val_labels_files])

    X_test_patches = np.vstack([np.load(f) for f in test_patches_files])
    Y_test_patches = np.hstack([np.load(f) for f in test_labels_files])

    print(f"Loaded patches for color space {color} with shapes: ")
    print(f"  X_train_patches: {X_train_patches.shape}")
    print(f"  Y_train_patches: {Y_train_patches.shape}")
    print(f"  X_val_patches: {X_val_patches.shape}")
    print(f"  Y_val_patches: {Y_val_patches.shape}")
    print(f"  X_test_patches: {X_test_patches.shape}")
    print(f"  Y_test_patches: {Y_test_patches.shape}")

    train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
    train_generator = infinite_generator(train_generator)

    val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
    val_generator = infinite_generator(val_generator)

    steps_per_epoch = len(X_train_patches) // 32
    validation_steps = len(X_val_patches) // 32
else:
    print(f"Skipping color space {color} due to missing patches.")

In [None]:
train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
train_generator = infinite_generator(train_generator)
val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
val_generator = infinite_generator(val_generator)
steps_per_epoch = len(X_train_patches) // 32
validation_steps = len(X_val_patches) // 32
input_shape = X_train_patches.shape[1:]
num_classes = len(class_list)

In [None]:
cnn_model_path = train_and_save_model(create_cnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_attention_{color}')
all_model_paths.append(cnn_model_path)

In [None]:
rnn_model_path = train_and_save_model(create_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'rnn_attention_{color}')
all_model_paths.append(rnn_model_path)

In [None]:
cnn_rnn_model_path = train_and_save_model(create_cnn_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_rnn_attention_{color}')
all_model_paths.append(cnn_rnn_model_path)

In [None]:
resnet_model_path = train_and_save_model(create_resnet_model, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'resnet_{color}')
all_model_paths.append(resnet_model_path)

In [None]:
color = 'grayscale'
X, Y = load_data(data_dir, color, class_list)

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.4, random_state=seed, stratify=Y)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=seed, stratify=Y_temp)

create_patches(X_train, Y_train, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/train_patches_{color}')
create_patches(X_val, Y_val, size=patch_size_train_val, stride=stride_train_val, prefix=f'patches_{color}/val_patches_{color}')
create_patches(X_test, Y_test, size=patch_size_test, stride=stride_test, prefix=f'patches_{color}/test_patches_{color}')

train_patches_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_patches_*.npy'))
val_patches_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_patches_*.npy'))
test_patches_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_patches_*.npy'))

train_labels_files = sorted(Path('.').glob(f'patches_{color}/train_patches_{color}_labels_*.npy'))
val_labels_files = sorted(Path('.').glob(f'patches_{color}/val_patches_{color}_labels_*.npy'))
test_labels_files = sorted(Path('.').glob(f'patches_{color}/test_patches_{color}_labels_*.npy'))

print(f"Train patches files: {train_patches_files}")
print(f"Val patches files: {val_patches_files}")
print(f"Test patches files: {test_patches_files}")

print(f"Train labels files: {train_labels_files}")
print(f"Val labels files: {val_labels_files}")
print(f"Test labels files: {test_labels_files}")

if train_patches_files and val_patches_files and test_patches_files:
    X_train_patches = np.vstack([np.load(f) for f in train_patches_files])
    Y_train_patches = np.hstack([np.load(f) for f in train_labels_files])

    X_val_patches = np.vstack([np.load(f) for f in val_patches_files])
    Y_val_patches = np.hstack([np.load(f) for f in val_labels_files])

    X_test_patches = np.vstack([np.load(f) for f in test_patches_files])
    Y_test_patches = np.hstack([np.load(f) for f in test_labels_files])

    print(f"Loaded patches for color space {color} with shapes: ")
    print(f"  X_train_patches: {X_train_patches.shape}")
    print(f"  Y_train_patches: {Y_train_patches.shape}")
    print(f"  X_val_patches: {X_val_patches.shape}")
    print(f"  Y_val_patches: {Y_val_patches.shape}")
    print(f"  X_test_patches: {X_test_patches.shape}")
    print(f"  Y_test_patches: {Y_test_patches.shape}")

    train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
    train_generator = infinite_generator(train_generator)

    val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
    val_generator = infinite_generator(val_generator)

    steps_per_epoch = len(X_train_patches) // 32
    validation_steps = len(X_val_patches) // 32
else:
    print(f"Skipping color space {color} due to missing patches.")

In [None]:
train_generator = datagen.flow(X_train_patches, Y_train_patches, batch_size=32, shuffle=True)
train_generator = infinite_generator(train_generator)
val_generator = val_datagen.flow(X_val_patches, Y_val_patches, batch_size=32, shuffle=True)
val_generator = infinite_generator(val_generator)
steps_per_epoch = len(X_train_patches) // 32
validation_steps = len(X_val_patches) // 32
input_shape = X_train_patches.shape[1:]
num_classes = len(class_list)

In [None]:
cnn_model_path = train_and_save_model(create_cnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_attention_{color}')
all_model_paths.append(cnn_model_path)

In [None]:
rnn_model_path = train_and_save_model(create_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'rnn_attention_{color}')
all_model_paths.append(rnn_model_path)

In [None]:
cnn_rnn_model_path = train_and_save_model(create_cnn_rnn_model_with_attention, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'cnn_rnn_attention_{color}')
all_model_paths.append(cnn_rnn_model_path)

In [None]:
resnet_model_path = train_and_save_model(create_resnet_model, input_shape, num_classes, train_generator, val_generator, steps_per_epoch, validation_steps, f'resnet_{color}')
all_model_paths.append(resnet_model_path)

In [None]:
def evaluate_model(model_path, X_test_patches, Y_test_patches, color):
    try:
        custom_objects = {'Attention': Attention}
        model = load_model(model_path, custom_objects=custom_objects)
        X_test_eval, Y_test_eval = reshape(X_test_patches, Y_test_patches)
        score = model.evaluate(X_test_eval, Y_test_eval, verbose=0)
        print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')

        X_test, Y_test = reshape(X_test_patches, Y_test_patches, mode='test')
        cm_prediction, cm_winner = majority_voting(model, X_test, Y_test)
        cm_prediction = np.asarray(cm_prediction).reshape(cm_prediction.shape[0] * cm_prediction.shape[1])
        cm_patches = confusion_matrix(Y_test_eval, cm_prediction)
        cm_img = confusion_matrix(Y_test[:, 0], cm_winner)

        print(f'Confusion matrix for patches for color {color}.')
        plot_confusion_matrix(cm_patches, classes=class_list, title=f'Confusion Matrix for patches ({color})')
        print(f'Confusion matrix for images for color {color}.')
        plot_confusion_matrix(cm_img, classes=class_list, title=f'Confusion Matrix for images ({color})', cmap=plt.cm.Reds)

        report = classification_report(Y_test[:, 0], cm_winner, target_names=class_list)
        print(f'Classification report for color {color}:')
        print(report)

        y_pred = model.predict(X_test_patches)
        return y_pred

    except Exception as e:
        print(f"Error occurred while evaluating model {model_path}: {e}")
        raise

In [None]:
color_spaces = ['rgb', 'grayscale', 'lab']
all_model_paths = {
    'rgb': [
        'models/cnn_attention_rgb.h5',
        'models/rnn_attention_rgb.h5',
        'models/cnn_rnn_attention_rgb.h5',
        'models/resnet_rgb.h5'
    ],
    'grayscale': [
        'models/cnn_attention_grayscale.h5',
        'models/rnn_attention_grayscale.h5',
        'models/cnn_rnn_attention_grayscale.h5',
        'models/resnet_grayscale.h5'
    ],
    'lab': [
        'models/cnn_attention_lab.h5',
        'models/rnn_attention_lab.h5',
        'models/cnn_rnn_attention_lab.h5',
        'models/resnet_lab.h5'
    ]
}

In [None]:
ensemble_predictions = {color: [] for color in color_spaces}
X_test_patches_dict = {}
Y_test_patches_dict = {}

for color in color_spaces:
    test_patches_files = sorted(Path(f'patches_{color}').glob(f'test_patches_{color}_patches_*.npy'))
    test_labels_files = sorted(Path(f'patches_{color}').glob(f'test_patches_{color}_labels_*.npy'))

    if test_patches_files and test_labels_files:
        X_test_patches = np.vstack([np.load(f) for f in test_patches_files])
        Y_test_patches = np.hstack([np.load(f) for f in test_labels_files])

        X_test_patches_dict[color] = X_test_patches
        Y_test_patches_dict[color] = Y_test_patches

        print(f"Loaded patches for color space {color} with shapes: ")
        print(f"  X_test_patches: {X_test_patches.shape}")
        print(f"  Y_test_patches: {Y_test_patches.shape}")

        for model_path in all_model_paths[color]:
            print(f'Evaluating model {model_path}')
            try:
                y_pred = evaluate_model(model_path, X_test_patches, Y_test_patches, color)
                ensemble_predictions[color].append(y_pred)
            except Exception as e:
                print(f"Failed to evaluate model {model_path}: {e}")
    else:
        print(f"No patches found for color space {color}.")

In [None]:
def get_ensemble_predictions(predictions_list):
    return np.mean(predictions_list, axis=0)

log_reg = LogisticRegression(max_iter=1000)
knn = KNeighborsClassifier(n_neighbors=5)

In [None]:
final_ensemble_predictions = []
for color in color_spaces:
    if ensemble_predictions[color]:
        ensemble_pred = get_ensemble_predictions(ensemble_predictions[color])
        final_ensemble_predictions.append(ensemble_pred)
    else:
        print(f"No predictions available for color space {color}.")

In [None]:
if final_ensemble_predictions:
    merged_ensemble_pred = np.mean(final_ensemble_predictions, axis=0)
    merged_ensemble_pred_labels = np.argmax(merged_ensemble_pred, axis=1)

    Y_test_combined = np.hstack([Y_test_patches_dict[color] for color in color_spaces if color in Y_test_patches_dict])
    accuracy = accuracy_score(Y_test_combined, merged_ensemble_pred_labels)
    print(f'Merged ensemble accuracy: {accuracy:.2f}')

    np.save('merged_ensemble_predictions.npy', merged_ensemble_pred)
else:
    print("No ensemble predictions available to merge.")

In [None]:
features_train_rgb = np.concatenate([extract_features(model, X_test_patches_dict['rgb']) for model in models_rgb], axis=1)
features_train_gray = np.concatenate([extract_features(model, X_test_patches_dict['gray']) for model in models_gray], axis=1)
features_train_lab = np.concatenate([extract_features(model, X_test_patches_dict['lab']) for model in models_lab], axis=1)
features_train = np.concatenate([features_train_rgb, features_train_gray, features_train_lab], axis=1)

features_test_rgb = np.concatenate([extract_features(model, X_test_patches_dict['rgb']) for model in models_rgb], axis=1)
features_test_gray = np.concatenate([extract_features(model, X_test_patches_dict['gray']) for model in models_gray], axis=1)
features_test_lab = np.concatenate([extract_features(model, X_test_patches_dict['lab']) for model in models_lab], axis=1)
features_test = np.concatenate([features_test_rgb, features_test_gray, features_test_lab], axis=1)

In [None]:
voting_clf = VotingClassifier(estimators=[
    ('lr', log_reg),
    ('knn', knn)
], voting='soft')

voting_clf.fit(features_train, np.argmax(Y_test_patches_dict['rgb'], axis=1))


In [None]:
voting_clf.fit(features_train, np.argmax(Y_test_patches_dict['rgb'], axis=1))

In [None]:
print("Ensemble Model Classification Report (RGB):")
print(classification_report(np.argmax(Y_test_patches_dict['rgb'], axis=1), ensemble_pred, target_names=['CLL', 'FL', 'MCL']))

In [None]:
print("Ensemble Model Classification Report (Grayscale):")
print(classification_report(np.argmax(Y_test_patches_dict['grayscale'], axis=1), ensemble_pred, target_names=['CLL', 'FL', 'MCL']))

In [None]:
print("Ensemble Model Classification Report (LAB):")
print(classification_report(np.argmax(Y_test_patches_dict['lab'], axis=1), ensemble_pred, target_names=['CLL', 'FL', 'MCL']))

In [None]:
for color in color_spaces:
    if ensemble_predictions[color]:
        ensemble_pred = get_ensemble_predictions(ensemble_predictions[color])
        ensemble_pred = np.argmax(ensemble_pred, axis=1)
        accuracy = accuracy_score(Y_test_patches_dict[color], ensemble_pred)
        print(f'Ensemble accuracy for {color} color space: {accuracy:.2f}')
    else:
        print(f"No predictions available for color space {color}.")


In [None]:
for color in color_spaces:
    if ensemble_predictions[color]:
        ensemble_pred = get_ensemble_predictions(ensemble_predictions[color])
        np.save(f'ensemble_predictions_{color}.npy', ensemble_pred)

In [None]:
final_ensemble_predictions = []
for color in color_spaces:
    if ensemble_predictions[color]:
        ensemble_pred = get_ensemble_predictions(ensemble_predictions[color])
        final_ensemble_predictions.append(ensemble_pred)
    else:
        print(f"No predictions available for color space {color}.")

In [None]:
if final_ensemble_predictions:
    merged_ensemble_pred = np.mean(final_ensemble_predictions, axis=0)
    merged_ensemble_pred_labels = np.argmax(merged_ensemble_pred, axis=1)

    Y_test_combined = np.hstack([Y_test_patches_dict[color] for color in color_spaces if color in Y_test_patches_dict])
    accuracy = accuracy_score(Y_test_combined, merged_ensemble_pred_labels)
    print(f'Merged ensemble accuracy: {accuracy:.2f}')

    np.save('merged_ensemble_predictions.npy', merged_ensemble_pred)
else:
    print("No ensemble predictions available to merge.")