In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d andrewmvd/lung-and-colon-cancer-histopathological-images

In [None]:
!unzip /content/lung-and-colon-cancer-histopathological-images.zip

In [None]:
import os
import time
import shutil
import pathlib
import itertools
from PIL import Image

import cv2
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

In [None]:
data_dir = '/content/lung_colon_image_set'
filepaths = []
labels = []

folds = os.listdir(data_dir)
for fold in folds:
    foldpath = os.path.join(data_dir, fold)
    flist = os.listdir(foldpath)

    for f in flist:
        f_path = os.path.join(foldpath, f)
        filelist = os.listdir(f_path)

        for file in filelist:
            fpath = os.path.join(f_path, file)
            filepaths.append(fpath)

            if f == 'colon_aca':
                labels.append('Colon Adenocarcinoma')

            elif f == 'colon_n':
                labels.append('Colon Benign Tissue')

            elif f == 'lung_aca':
                labels.append('Lung Adenocarcinoma')

            elif f == 'lung_n':
                labels.append('Lung Benign Tissue')

            elif f == 'lung_scc':
                labels.append('Lung Squamous Cell Carcinoma')

Fseries = pd.Series(filepaths, name= 'filepaths')
Lseries = pd.Series(labels, name='labels')
data = pd.concat([Fseries, Lseries], axis= 1)

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.shape

### Comment

In [None]:
# import pandas as pd

# # Assuming your dataframe is named df
# # Replace 'file_path' with the name of your file paths column
# # Replace 'label' with the name of your label column

# # Define the number of samples you want per class
# n_samples = 500

# # Group by the label and sample n_samples from each group
# data = data_x.groupby('labels', group_keys=False).apply(lambda x: x.sample(min(len(x), n_samples)))

# # Reset the index if needed
# data = data.reset_index(drop=True)

# # Now sampled_df contains 500 files for each class


In [None]:
data.shape

In [None]:
# Print the number of files for each class
class_counts = data['labels'].value_counts()
print(class_counts)


In [None]:
# data --> 80% train data && 20% (test, val)
train_df, ts_df = train_test_split(data, train_size = 0.8, shuffle = True, random_state = 42)

# test data --> 10% train data && 10% (test, val)
valid_df, test_df = train_test_split(ts_df, train_size = 0.5, shuffle = True, random_state = 42)

In [None]:
batch_size = 32
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

tr_gen = ImageDataGenerator(rescale=1. / 255)
ts_gen = ImageDataGenerator(rescale=1. / 255)

train_gen = tr_gen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

valid_gen = ts_gen.flow_from_dataframe( valid_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= False, batch_size= batch_size)

In [None]:
plt.pie([len(train_gen), len(valid_gen), len(test_gen)],
        labels=['train', 'validation', 'test'], autopct='%.1f%%', colors=['aqua', 'red', 'green'], explode=(0.05, 0, 0))
plt.show()
plt.savefig('dataset_pie.png')

In [None]:
print(train_gen.class_indices)
print(test_gen.class_indices)
print(valid_gen.class_indices)

In [None]:
g_dict = train_gen.class_indices      # defines dictionary {'class': index}
classes = list(g_dict.keys())       # defines list of dictionary's kays (classes), classes names : string
images, labels = next(train_gen)      # get a batch size samples from the generator

# ploting the patch size samples
plt.figure(figsize= (20, 20))

for i in range(batch_size):
    plt.subplot(4, 4, i + 1)
    image = images[i]
    plt.imshow(image)
    index = np.argmax(labels[i])  # get image index
    class_name = classes[index]   # get class of image
    plt.title(class_name, color= 'black', fontsize= 16)
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Displaying the model performance
def model_performance(history, Epochs):
    # Define needed variables
    tr_acc = history.history['accuracy']
    tr_loss = history.history['loss']
    val_acc = history.history['val_accuracy']
    val_loss = history.history['val_loss']

    Epochs = [i+1 for i in range(len(tr_acc))]

    # Plot training history
    plt.figure(figsize= (20, 8))
    plt.style.use('fivethirtyeight')

    plt.subplot(1, 2, 1)
    plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
    plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout
    plt.show()


# Evaluate the model
def model_evaluation(model):
    train_score = model.evaluate(train_gen, verbose= 1)
    valid_score = model.evaluate(valid_gen, verbose= 1)
    test_score = model.evaluate(test_gen, verbose= 1)

    print("Train Loss: ", train_score[0])
    print("Train Accuracy: ", train_score[1])
    print('-' * 20)
    print("Validation Loss: ", valid_score[0])
    print("Validation Accuracy: ", valid_score[1])
    print('-' * 20)
    print("Test Loss: ", test_score[0])
    print("Test Accuracy: ", test_score[1])


# Get Predictions
def get_pred(model, test_gen):

    preds = model.predict(test_gen)
    y_pred = np.argmax(preds, axis = 1)

    return y_pred


# Confusion Matrix
def plot_confusion_matrix(test_gen, y_pred):

    g_dict = test_gen.class_indices
    classes = list(g_dict.keys())

    # Display the confusion matrix
    cm = confusion_matrix(test_gen.classes, y_pred)

    plt.figure(figsize= (10, 10))
    plt.imshow(cm, interpolation= 'nearest', cmap= plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()

    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation= 45)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j], horizontalalignment= 'center', color= 'white' if cm[i, j] > thresh else 'black')


    plt.tight_layout()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')

    plt.show()


# Defining a convolutional NN block for a sequential CNN model
def conv_block(filters, act='relu'):

    block = Sequential()
    block.add(Conv2D(filters, 3, activation=act, padding='same'))
    block.add(Conv2D(filters, 3, activation=act, padding='same'))
    block.add(BatchNormalization())
    block.add(MaxPooling2D())

    return block


# Defining a dense NN block for a sequential CNN model
def dense_block(units, dropout_rate, act='relu'):

    block = Sequential()
    block.add(Dense(units, activation=act))
    block.add(BatchNormalization())
    block.add(Dropout(dropout_rate))

    return block

# Classification

### EfficientNetB3

In [None]:
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))

In [None]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3

In [None]:
# ############ OLD
# # get the pre-trained model (EfficientNetB3)
# base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape = img_shape, pooling= None)

# # fine-tune EfficientNetB3 (Adding some custom layers on top)
# x = base_model.output
# x = GlobalAveragePooling2D()(x)
# x = BatchNormalization()(x)
# x = dense_block(128, 0.5)(x)
# x = dense_block(32, 0.2)(x)
# predictions = Dense(class_counts, activation = "sigmoid")(x)    # output layer with softmax activation

# # the model
# EfficientNetB3_model = Model(inputs = base_model.input, outputs = predictions)

In [None]:
# from tensorflow.keras.utils import plot_model

# # Visualize the model
# plot_model(EfficientNetB3_model, to_file='EfficientNetB3_model.png', show_shapes=True, show_layer_names=True)

In [None]:
############## NEW #########################
# get the pre-trained model (EfficientNetB3)
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=img_shape, pooling=None)

# fine-tune EfficientNetB3 (Adding some custom layers on top)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)

# Adding an additional Dense layer with fewer units to reduce model complexity
x = Dense(128)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.4)(x)

# Another Dense layer with even fewer units
x = Dense(64)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.3)(x)

predictions = Dense(class_counts, activation="sigmoid")(x)  # Output layer with sigmoid activation

# the model
EfficientNetB3_model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
EfficientNetB3_model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

EfficientNetB3_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model

# Visualize the model
plot_model(EfficientNetB3_model, to_file='EfficientNetB3_model.png', show_shapes=True, show_layer_names=True)

In [None]:
epochs = 10   # number of all epochs in training

history = EfficientNetB3_model.fit(train_gen, epochs= epochs, verbose= 1, validation_data= valid_gen, shuffle= False)

In [None]:
model_performance(history, epochs)

In [None]:
model_evaluation(EfficientNetB3_model)

In [None]:
# get predictions
y_pred = get_pred(EfficientNetB3_model, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()

In [None]:
print(classification_report(test_gen.classes, y_pred, target_names= classes))

### Xception

In [None]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax

img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))

# Load the pre-trained Xception model
base_model = Xception(weights='imagenet', include_top=False, input_shape=img_shape, pooling=None)

In [None]:
# Add a Global Average Pooling layer
x = GlobalAveragePooling2D()(base_model.output)

# Add a Dense layer with the number of classes
output = Dense(class_counts, activation='softmax')(x)

# Create the new model
model = Model(inputs=base_model.input, outputs=output)

In [None]:
# Compile the model
model.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model.summary()

In [None]:
# Number of epochs
epochs = 10

# Train the model
history = model.fit(train_gen, epochs=epochs, verbose=1, validation_data=valid_gen, shuffle=False)

In [None]:
# Evaluate the model's performance
model_performance(history, epochs)

In [None]:
# Model evaluation
model_evaluation(model)

In [None]:
# get predictions
y_pred = get_pred(model, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:

#================================
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()
#===========================


In [None]:
print(classification_report(test_gen.classes, y_pred, target_names= classes))

### InceptionV3

In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax

img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))

# Load the pre-trained InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=img_shape, pooling=None)

In [None]:
# Add a Global Average Pooling layer
x = GlobalAveragePooling2D()(base_model.output)

# Add a Dense layer with the number of classes
output = Dense(class_counts, activation='softmax')(x)

# Create the new model
model_Iv3 = Model(inputs=base_model.input, outputs=output)

In [None]:
# Compile the model
model_Iv3.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model_Iv3.summary()

In [None]:
epochs = 10   # number of all epochs in training

# Train the model
history = model_Iv3.fit(train_gen, epochs=epochs, verbose=1, validation_data=valid_gen, shuffle=False)

In [None]:
# Evaluate the model's performance
model_performance(history, epochs)

In [None]:
# Model evaluation
model_evaluation(model_Iv3)

In [None]:
# get predictions
y_pred = get_pred(model_Iv3, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:

#================================
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()
#===========================


In [None]:
print(classification_report(test_gen.classes, y_pred, target_names= classes))

### ResNet50

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax

In [None]:
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))

In [None]:
# Load the pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=img_shape)

# Add a Global Average Pooling layer
x = GlobalAveragePooling2D()(base_model.output)

# Add a Dense layer with the number of classes
output = Dense(class_counts, activation='softmax')(x)

# Create the new model
model_resnet50 = Model(inputs=base_model.input, outputs=output)

# Compile the model
model_resnet50.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model_resnet50.summary()

In [None]:
# Number of epochs
epochs = 10

# Train the model
history = model_resnet50.fit(train_gen, epochs=epochs, verbose=1, validation_data=valid_gen, shuffle=False)

In [None]:
# Evaluate the model's performance
model_performance(history, epochs)

In [None]:
# Model evaluation
model_evaluation(model_resnet50)

In [None]:
# get predictions
y_pred = get_pred(model_resnet50, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:

#================================
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()
#===========================


In [None]:
print(classification_report(test_gen.classes, y_pred, target_names= classes))

### VGG16

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax

img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))

# # Load the pre-trained VGG16 model
# base_model = VGG16(weights='imagenet', include_top=False, input_shape=img_shape, pooling=None)

In [None]:
# Load the pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=img_shape)

# Add a Global Average Pooling layer
x = GlobalAveragePooling2D()(base_model.output)

# Add a Dense layer with the number of classes
output = Dense(class_counts, activation='softmax')(x)

# Create the new model
model_vgg16 = Model(inputs=base_model.input, outputs=output)

# Compile the model
model_vgg16.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model summary
model_vgg16.summary()

In [None]:
# Number of epochs
epochs = 10

# Train the model
history = model_vgg16.fit(train_gen, epochs=epochs, verbose=1, validation_data=valid_gen, shuffle=False)

In [None]:
# Evaluate the model's performance
model_performance(history, epochs)

In [None]:
# Model evaluation
model_evaluation(model_vgg16)

In [None]:
# get predictions
y_pred = get_pred(model_vgg16, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:

#================================
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()
#===========================


In [None]:
print(classification_report(test_gen.classes, y_pred, target_names= classes))

### CNN

In [None]:
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

class_counts = len(list(train_gen.class_indices.keys()))

In [None]:
cnn_model = Sequential()

# first conv block
cnn_model.add(Conv2D(filters=16, kernel_size=(3,3), padding="same", activation="relu", input_shape= img_shape))
cnn_model.add(BatchNormalization())
cnn_model.add(MaxPooling2D())

# second conv block
cnn_model.add(conv_block(32))

# third conv block
cnn_model.add(conv_block(64))

# fourth conv bolck
cnn_model.add(conv_block(128))

# fifth conv block
cnn_model.add(conv_block(256))

# flatten layer
cnn_model.add(Flatten())

# first dense block
cnn_model.add(dense_block(128, 0.5))

# second dense block
cnn_model.add(dense_block(64, 0.3))

# third dense block
cnn_model.add(dense_block(32, 0.2))

# output layer
cnn_model.add(Dense(class_counts, activation = "sigmoid"))

In [None]:
cnn_model.compile(Adamax(learning_rate= 0.001), loss= 'binary_crossentropy', metrics= ['accuracy'])

cnn_model.summary()

In [None]:
epochs = 10   # number of all epochs in training

history = cnn_model.fit(train_gen, epochs= epochs, verbose= 1, validation_data= valid_gen, shuffle= False)

In [None]:
# Evaluate the model's performance
model_performance(history, epochs)

In [None]:
# Model evaluation
model_evaluation(cnn_model)

In [None]:
# get predictions
y_pred = get_pred(cnn_model, test_gen)

# plot the confusion matrix
plot_confusion_matrix(test_gen, y_pred)

In [None]:

#================================
tr_acc = history.history['accuracy']
tr_loss = history.history['loss']
val_acc = history.history['val_accuracy']
val_loss = history.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f'best epoch= {str(index_loss + 1)}'
acc_label = f'best epoch= {str(index_acc + 1)}'

plt.figure(figsize= (20, 8))
plt.style.use('fivethirtyeight')

plt.subplot(1, 2, 1)
plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()
#===========================


In [None]:
print(classification_report(test_gen.classes, y_pred, target_names= classes))

# Explainable AI


In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model

In [None]:
test_dict=test_gen.class_indices
classes=list(test_dict.keys())
x_batch,labels=next(valid_gen) # get a sample batch from the generator
plt.figure(figsize=(20, 20))
length=len(labels)
if length<25:   #show maximum of 25 images
    r=length
else:
    r=25
for i in range(r):
    plt.subplot(5, 5, i + 1)
    #image=(images[i]+1 )/2 # scale images between 0 and 1 becaue pre-processor set them between -1 and +1
    plt.imshow(x_batch[i])
    index=np.argmax(labels[i])
    class_name=classes[index]
    plt.title(class_name, color='blue', fontsize=16)
    plt.axis('off')
plt.show()

In [None]:
batch_size=32
fig,ax=plt.subplots(4,3)
fig.set_size_inches(30,30)
for next_element in test_gen:
    x_batch, y_batch = next_element
    print(y_batch)
    for i in range (0,4):
        for j in range(3):
            random_example = np.random.randint(0, batch_size)
            ax[i,j].imshow(x_batch[random_example])
            ax[i,j].set_title(class_name)
    break

### LIME

In [None]:
!pip install lime

In [None]:
import lime
from lime import lime_image
explainer = lime_image.LimeImageExplainer()

In [None]:
explanation = explainer.explain_instance(x_batch[9], EfficientNetB3_model.predict, top_labels=5, hide_color=0, num_samples=10000)

In [None]:
from skimage.segmentation import mark_boundaries
temp_1, mask_1 = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10000, hide_rest=True)
temp_2, mask_2 = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10000, hide_rest=True)
#plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
fig, (ax,ax1, ax2) = plt.subplots(1, 3, figsize=(15,15))
ax.imshow(x_batch[9])
ax1.imshow(mark_boundaries(temp_1, mask_1))
ax2.imshow(mark_boundaries(temp_2, mask_2))

ax.axis('off')
ax1.axis('off')
ax2.axis('off')

plt.savefig('mask_default.png')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from lime import lime_image
from skimage.segmentation import mark_boundaries

img_path = "/content/lung_colon_image_set/colon_image_sets/colon_aca/colonca1009.jpeg"
img = plt.imread(img_path)

def predict_fn(images):

    return np.random.rand(len(images), 5)

explainer = lime_image.LimeImageExplainer()

explanation = explainer.explain_instance(img, predict_fn, top_labels=5, hide_color=0, num_samples=1000)

plt.imshow(img)
plt.axis('off')
plt.title('Original Image')
plt.show()

temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=5, hide_rest=False)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
plt.axis('off')
plt.title('LIME Explanation')
plt.show()

### SHAP

In [None]:
!pip install shap

In [None]:
import os
import numpy as np
import tensorflow as tf
import shap
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

# Define the dataset directory
# dataset_dir = "/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set"

# Define the path to the image you want to explain
image_path = "/content/lung_colon_image_set/lung_image_sets/lung_aca/lungaca1000.jpeg"

# Define the preprocess function
def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize image to (128, 128)
    image = tf.cast(image, tf.float32)    # Normalize pixel values
    return image.numpy()

# Preprocess the image
image = preprocess_image(image_path)

# Load the pre-trained ResNet50 model
# model = ResNet50(weights="imagenet")

def f(x):
    tmp = x.copy()
    preprocess_input(tmp)
    return EfficientNetB3_model(tmp)

# Create an Image masker for SHAP
masker_blur = shap.maskers.Image("blur(224,224)", shape=(224, 224, 3))

# Create the SHAP explainer
explainer_blur = shap.Explainer(f, masker_blur)

# Explain predictions on the image
shap_values_fine = explainer_blur(image[np.newaxis, :, :, :], max_evals=500, outputs=shap.Explanation.argsort.flip[:4])

# Plot the SHAP values
shap.image_plot(shap_values_fine)

In [None]:
def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize image to (128, 128)
    image = tf.cast(image, tf.float32) / 255.0   # Normalize pixel values
    return image.numpy()

image = preprocess_image(image_path)

# Load the pre-trained ResNet50 model
# model = ResNet50(weights="imagenet")

def f(x):
    tmp = x.copy()
    preprocess_input(tmp)
    return EfficientNetB3_model(tmp)

# Create an Image masker for SHAP
masker_blur = shap.maskers.Image("blur(224,224)", shape=(224, 224, 3))

# Create the SHAP explainer
explainer_blur = shap.Explainer(f, masker_blur)

# Explain predictions on the image
shap_values_fine = explainer_blur(image[np.newaxis, :, :, :], max_evals=5000, outputs=shap.Explanation.argsort.flip[2:8:2])

# Plot the SHAP values
shap.image_plot(shap_values_fine)