In [None]:
# Print a message to indicate the start of the process
print("Uploading packages...")

# Import necessary libraries
import os               # For operating system-related functions
import pandas as pd    # For data manipulation and analysis
import numpy as np     # For numerical operations
import tensorflow as tf # For deep learning with TensorFlow
import matplotlib.pyplot as plt # For plotting graphs and charts
import seaborn as sns  # For creating informative and attractive visualizations

# Import specific modules and functions from TensorFlow and scikit-learn
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Print a message to indicate that the packages have been successfully loaded
print("Packages loaded.")

In [None]:
def training_validation_plots(history_model, start_epoch=0):
    '''
    Function to generate plots of accuracy and loss for training and validation sets.

    Parameters:
    - history_model: Historical training data (output from model training).
    - start_epoch: The epoch number from which to generate the plots (default is 0).

    Output:
    - Two plots: The first one shows training and validation losses,
                 and the second one shows training and validation accuracies.
    '''

    # Extract relevant historical training data
    tr_acc = history_model.history['accuracy']        # Training accuracy
    tr_loss = history_model.history['loss']           # Training loss
    val_acc = history_model.history['val_accuracy']   # Validation accuracy
    val_loss = history_model.history['val_loss']      # Validation loss
    
    # Find the index of the epoch with the lowest validation loss
    index_loss = np.argmin(val_loss)
    val_lowest = val_loss[index_loss]
    
    # Find the index of the epoch with the highest validation accuracy
    index_acc = np.argmax(val_acc)
    acc_highest = val_acc[index_acc]
    
    # Create a list of epoch numbers for the x-axis of the plots
    Epochs = [i+1 for i in range(len(tr_acc))]
    
    # Labels to indicate the best epochs for loss and accuracy
    loss_label = f'best epoch= {str(index_loss + 1)}'
    acc_label = f'best epoch= {str(index_acc + 1)}'

    # Plot training history
    sns.set(font_scale=1.5)  
    plt.figure(figsize= (20, 8), facecolor="w")
    plt.style.use('fivethirtyeight')

    # Subplot 1: Training and Validation Loss
    plt.subplot(1, 2, 1)
    plt.plot(Epochs[start_epoch:], tr_loss[start_epoch:], 'r', label= 'Training loss')
    plt.plot(Epochs[start_epoch:], val_loss[start_epoch:], 'g', label= 'Validation loss')
    plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Subplot 2: Training and Validation Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Ensure tight layout and display the plots
    plt.tight_layout()
    plt.show()


In [None]:
def plot_confusion_matrix(test_true_labels, test_predictions, class_names):
    '''
    Function to create and display a confusion matrix plot.

    Parameters:
    - test_true_labels: True class labels for the test data.
    - test_predictions: Predicted class labels for the test data.
    - class_names: List of class names for labeling the matrix.

    Output:
    - Displayed confusion matrix plot.
    '''
    
    # Create a confusion matrix
    confusion = confusion_matrix(test_true_labels, test_predictions)

    # Create a figure for the confusion matrix plot
    plt.figure(figsize=(5.7, 3.7), facecolor="w")
    
    # Set the font scale for better readability
    sns.set(font_scale=1.4)  
    
    # Create a heatmap of the confusion matrix
    sns.heatmap(confusion, annot=True, fmt="d", cmap="Blues", cbar=False,
                xticklabels=class_names, yticklabels=class_names)
    
    # Add labels for the x and y axes
    plt.xlabel("Predicted", labelpad=40)
    plt.ylabel("True", labelpad=40)
    
    # Add a title to the plot
    plt.title("Colon Confusion Matrix")
    
    # Display the plot
    plt.show()

In [None]:
# Define the directory where the data is located
data_dir = "/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set"

# Initialize empty lists to store file paths and labels
filepaths = []
labels = []

# List all the subdirectories (folds) in the main data directory
folds = os.listdir(data_dir)

# Iterate through each fold
for fold in folds:
    foldpath = os.path.join(data_dir, fold)  # Create the full path to the fold
    flist = os.listdir(foldpath)             # List all files in the fold

    # Iterate through each file in the fold
    for f in flist:
        f_path = os.path.join(foldpath, f)    # Create the full path to the file
        filelist = os.listdir(f_path)         # List all files in the subdirectory

        # Iterate through each file in the subdirectory
        for file in filelist:
            fpath = os.path.join(f_path, file)  # Create the full path to the file
            filepaths.append(fpath)            # Append the file path to the list

            # Determine the label based on the subdirectory name (fold)
            if f == "colon_aca":
                labels.append("Colon Adenocarcinoma")
            elif f == "colon_n":
                labels.append("Colon Benign Tissue")
            elif f == "lung_aca":
                labels.append("Lung Adenocarcinoma")
            elif f == "lung_n":
                labels.append("Lung Benign Tissue")
            elif f == "lung_scc":
                labels.append("Lung Squamous Cell Carcinoma")

# Create two Pandas Series for file paths and labels
Fseries = pd.Series(filepaths, name="filepaths")
Lseries = pd.Series(labels, name="labels")

# Concatenate the two Series into one DataFrame
df = pd.concat([Fseries, Lseries], axis=1)

In [None]:
# Split colon and lung images into different data frames 
df_colon = df[(df["labels"] == "Colon Benign Tissue") |
              (df["labels"] == "Colon Adenocarcinoma")]
df_lung = df[(df["labels"] == "Lung Adenocarcinoma") |
             (df["labels"] == "Lung Benign Tissue") |
             (df["labels"] == "Lung Squamous Cell Carcinoma")]

In [None]:
# Number of lung images for each class
df_lung["labels"].value_counts()

In [None]:
# Number of colon images for each class
df_colon["labels"].value_counts()

# Colon

In [None]:
# Split colon images into training, validation, and test subsets

# Extract labels for stratified splitting
strat_colon = df_colon["labels"]

# Split the data into training and temporary subsets with an 80-20 split ratio
train_df_colon, tmp_df_colon = train_test_split(df_colon,  
                                                train_size=0.8, 
                                                shuffle=True, 
                                                random_state=42, 
                                                stratify=strat_colon)

# Extract labels for further stratified splitting
strat_colon = tmp_df_colon["labels"]

# Split the temporary subset into validation and test subsets with a 50-50 split ratio
val_df_colon, test_df_colon = train_test_split(tmp_df_colon,  
                                               train_size=0.5, 
                                               shuffle=True, 
                                               random_state=42, 
                                               stratify=strat_colon)

In [None]:
# Create generators for train, validation, and test colon data

# Define batch size and image dimensions
batch_size = 128
X = Y = 224

# Create a generator for the training data from the DataFrame train_df_colon
train_generator_colon = ImageDataGenerator().flow_from_dataframe(train_df_colon,
                                    x_col= "filepaths",        # Column containing file paths
                                    y_col= "labels",           # Column containing labels
                                    class_mode = "binary",     # Classification mode
                                    target_size = (X, Y),      # Target image size
                                    color_mode="rgb",          # Color mode (RGB)
                                    batch_size = batch_size,    # Batch size
                                    shuffle = True,             # Shuffle the data
                                    seed = 42)                  # Random seed for reproducibility

# Create a generator for the validation data from the DataFrame val_df_colon
val_generator_colon = ImageDataGenerator().flow_from_dataframe(val_df_colon,
                                      x_col= "filepaths",        # Column containing file paths
                                      y_col= "labels",           # Column containing labels
                                      class_mode = "binary",     # Classification mode
                                      target_size = (X, Y),      # Target image size
                                      color_mode="rgb",          # Color mode (RGB)
                                      batch_size = batch_size,    # Batch size
                                      shuffle = True,             # Shuffle the data
                                      seed = 42)                  # Random seed for reproducibility

# Create a generator for the test data from the DataFrame test_df_colon
test_generator_colon = ImageDataGenerator().flow_from_dataframe(test_df_colon,
                                      x_col= "filepaths",        # Column containing file paths
                                      y_col= "labels",           # Column containing labels
                                      class_mode = "binary",     # Classification mode
                                      target_size = (X, Y),      # Target image size
                                      color_mode="rgb",          # Color mode (RGB)
                                      batch_size = batch_size,    # Batch size
                                      shuffle = False,            # Do not shuffle the data (for evaluation)
                                      seed = 42)                  # Random seed for reproducibility

In [None]:
# Get a dictionary mapping class names to their assigned labels
class_indices = train_generator_colon.class_indices

# Print the dictionary, which shows the mapping of class names to labels
print(class_indices)

In [None]:
# Define a list of class names for colon image classification
class_names_colon = ["Colon Adenocarcinoma", "Colon Benign Tissue"]

In [None]:
# Take true labels from test data generator
test_true_labels_colon = test_generator_colon.classes

# Colon Model 1

In [None]:
# Create a model architecture for binary classification
model_1_colon = keras.models.Sequential([
    # Convolutional layers with max pooling
    keras.layers.Conv2D(64, 3, activation="relu", padding="same", input_shape=(X, Y, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(512, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    
    # Flatten the output
    keras.layers.Flatten(),
    
    # Fully connected layers with dropout for regularization
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dropout(0.5),
    
    # Output layer for binary classification with sigmoid activation
    keras.layers.Dense(1, activation="sigmoid") # binary classification
])

# Compile the model with an optimizer, loss function, and evaluation metric
model_1_colon.compile(optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
                loss="binary_crossentropy", # binary classification
                metrics=["accuracy"])

# Display a summary of the model architecture
model_1_colon.summary()

In [None]:
# Train the model
history_1_colon = model_1_colon.fit(train_generator_colon,  # Training data generator
                                    epochs=30,                # Number of training epochs
                                    validation_data=val_generator_colon,  # Validation data generator
                                    steps_per_epoch=len(train_generator_colon),  # Number of steps per training epoch
                                    validation_steps=len(val_generator_colon))  # Number of steps per validation epoch


In [None]:
# Predict on the test data using the trained model
test_predictions_model_1_colon = model_1_colon.predict(test_generator_colon,
                                                       steps=len(test_generator_colon),
                                                       verbose=1)

# Threshold the predicted probabilities to get binary predictions (0 or 1)
test_predictions_model_1_colon = (test_predictions_model_1_colon > 0.5).astype(int)

In [None]:
# Visualize the training and validation history of model_1_colon
training_validation_plots(history_1_colon)

In [None]:
# Visualize the training and validation history of model_1_colon
training_validation_plots(history_1_colon, start_epoch=1)

In [None]:
# Generate a classification report using the true labels and model predictions
class_report = classification_report(test_true_labels_colon,
                                     test_predictions_model_1_colon,
                                     target_names=class_names_colon,
                                     digits=4)

# Print the classification report
print("Classification Report:")
print(class_report)

In [None]:
# Plot the confusion matrix to visualize model performance
plot_confusion_matrix(test_true_labels=test_generator_colon.classes,   # True class labels from the test generator
                      test_predictions=test_predictions_model_1_colon, # Model's predicted class labels
                      class_names=class_names_colon)                   # Names of the classes for labeling


# Colon Model 2

In [None]:
# Create a model architecture for binary classification (model_2_colon)

# Define the model architecture using Sequential
model_2_colon = keras.models.Sequential([
    # Convolutional layers with max pooling
    keras.layers.Conv2D(64, 3, activation="tanh", padding="same", input_shape=(X, Y, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(512, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(700, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    
    # Flatten the output
    keras.layers.Flatten(),
    
    # Fully connected layers with dropout for regularization
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dropout(0.5),
    
    # Output layer for binary classification with sigmoid activation
    keras.layers.Dense(1, activation="sigmoid") # binary classification
])

# Compile the model with an optimizer, loss function, and evaluation metric
model_2_colon.compile(optimizer=tf.keras.optimizers.legacy.SGD(momentum=0.9,
                                                               learning_rate=0.001,
                                                               decay=0.01),
                      loss="binary_crossentropy", # binary classification
                      metrics=["accuracy"])

# Display a summary of the model architecture
model_2_colon.summary()

In [None]:
# Train the second model (model_2_colon)

# Fit the model to the training data
history_2_colon = model_2_colon.fit(train_generator_colon,      
                                    epochs=30,      # Number of training epochs
                                    validation_data=val_generator_colon,  
                                    steps_per_epoch=len(train_generator_colon),  #
                                    validation_steps=len(val_generator_colon))  

In [None]:
# Predict on test data
test_predictions_model_2_colon = model_2_colon.predict(test_generator_colon,
                                                       steps=len(test_generator_colon),
                                                       verbose=1)
test_predictions_model_2_colon = (test_predictions_model_2_colon > 0.5).astype(int)

In [None]:
training_validation_plots(history_2_colon)

In [None]:
class_report = classification_report(test_true_labels_colon,
                                     test_predictions_model_2_colon,
                                     target_names=class_names_colon,
                                     digits=4)
print("Classification Report:")
print(class_report)

In [None]:
plot_confusion_matrix(test_true_labels_colon,
                      test_predictions_model_2_colon,
                      class_names_colon)

# Lung

In [None]:
# Split lung images into training, validation, and test subsets

# Extract labels for stratified splitting
strat_lung = df_lung["labels"]

# Split the data into training and temporary subsets with an 80-20 split ratio
train_df_lung, tmp_df_lung = train_test_split(df_lung,  
                                              train_size=0.8, 
                                              shuffle=True, 
                                              random_state=42, 
                                              stratify=strat_lung)

# Extract labels for further stratified splitting
strat_lung = tmp_df_lung["labels"]

# Split the temporary subset into validation and test subsets with a 50-50 split ratio
val_df_lung, test_df_lung = train_test_split(tmp_df_lung,  
                                             train_size=0.5, 
                                             shuffle=True, 
                                             random_state=42, 
                                             stratify=strat_lung)

In [None]:
# Define batch size and image dimensions
batch_size = 128
X = Y = 224

# Create a generator for the training data from the DataFrame train_df_lung
train_generator_lung = ImageDataGenerator().flow_from_dataframe(train_df_lung,
                                    x_col= "filepaths",        # Column containing file paths
                                    y_col= "labels",           # Column containing labels
                                    class_mode = "categorical", # Classification mode (one-hot encoded)
                                    target_size = (X, Y),      # Target image size
                                    color_mode="rgb",          # Color mode (RGB)
                                    batch_size = batch_size,    # Batch size
                                    shuffle = True,             # Shuffle the data
                                    seed = 42)                  # Random seed for reproducibility

# Create a generator for the validation data from the DataFrame val_df_lung
val_generator_lung = ImageDataGenerator().flow_from_dataframe(val_df_lung,
                                      x_col= "filepaths",        # Column containing file paths
                                      y_col= "labels",           # Column containing labels
                                      class_mode = "categorical", # Classification mode (one-hot encoded)
                                      target_size = (X, Y),      # Target image size
                                      color_mode="rgb",          # Color mode (RGB)
                                      batch_size = batch_size,    # Batch size
                                      shuffle = True,             # Shuffle the data
                                      seed = 42)                  # Random seed for reproducibility

# Create a generator for the test data from the DataFrame test_df_lung
test_generator_lung = ImageDataGenerator().flow_from_dataframe(test_df_lung,
                                      x_col= "filepaths",        # Column containing file paths
                                      y_col= "labels",           # Column containing labels
                                      class_mode = "categorical", # Classification mode (one-hot encoded)
                                      target_size = (X, Y),      # Target image size
                                      color_mode="rgb",          # Color mode (RGB)
                                      batch_size = batch_size,    # Batch size
                                      shuffle = False,            # Do not shuffle the data (for evaluation)
                                      seed = 42)                  # Random seed for reproducibility


In [None]:
# Get a dictionary mapping class names to their assigned labels
class_indices = train_generator_lung.class_indices

# Print the dictionary, which shows the mapping of class names to labels
print(class_indices)

In [None]:
# Define a list of class names for lung image classification
class_names_lung = ["Lung Adenocarcinoma",
                    "Lung Benign Tissue",
                    "Lung Squamous Cell Carcinoma"]

In [None]:
# Take true labels from test data generator
test_true_labels_lung = test_generator_lung.classes

# Lung Model 1

In [None]:
# Define the number of classes based on the class indices
class_number = len(list(train_generator_lung.class_indices.keys()))

model_1_lung = keras.models.Sequential([
    # Convolutional layers with max pooling
    keras.layers.Conv2D(64, 3, activation="relu", padding="same", input_shape=(X, Y, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(512, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    
    # Flatten the output
    keras.layers.Flatten(),
    
    # Fully connected layers with dropout for regularization
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dropout(0.5),
    # Multi-class classification using softmax activation
    keras.layers.Dense(class_number, activation="softmax")
])

# Compile model
model_1_lung.compile(optimizer=tf.keras.optimizers.Adamax(learning_rate= 0.001),
                     loss="categorical_crossentropy", # Categorical cross-entropy loss for multi-class
                     metrics=["accuracy"])

# Show model summary
model_1_lung.summary()

In [None]:
# Train model
history_1_lung = model_1_lung.fit(train_generator_lung,
                    epochs=30,
                    validation_data=val_generator_lung,
                    steps_per_epoch=len(train_generator_lung),
                    validation_steps=len(val_generator_lung))

In [None]:
# Predict class probabilities for the test data using the trained model
test_predictions_model_1_lung = model_1_lung.predict(test_generator_lung)

# Extract the class labels (indices) with the highest predicted probabilities for each sample
test_predictions_model_1_lung = np.argmax(test_predictions_model_1_lung, axis=1)

In [None]:
# Visualize the training and validation history of model_1_lung
training_validation_plots(history_1_lung)

In [None]:
training_validation_plots(history_1_lung, start_epoch=1)

In [None]:
# Generate a classification report using the true labels and model predictions
class_report = classification_report(test_true_labels_lung,
                                     test_predictions_model_1_lung,
                                     target_names=class_names_lung,
                                     digits=4)

# Print the classification report
print("Classification Report:")
print(class_report)

In [None]:
plot_confusion_matrix(test_true_labels_lung,
                      test_predictions_model_1_lung,
                      class_names_lung)

# Lung Model 2

In [None]:
class_number = len(list(train_generator_lung.class_indices.keys()))

# Create model architecture for multi-class classification
model_2_lung = keras.models.Sequential([
    keras.layers.Conv2D(64, 3, activation="tanh", padding="same", input_shape=(X, Y, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(512, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(700, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(class_number, activation="softmax") # multi-class classification
])

# Compile model
model_2_lung.compile(optimizer=tf.keras.optimizers.legacy.SGD(momentum=0.9,
                                                              learning_rate=0.001,
                                                              decay=0.01),
                     loss="categorical_crossentropy",
                     metrics=["accuracy"])

# Show model summary
model_2_lung.summary()

In [None]:
# Train model
history_2_lung = model_2_lung.fit(train_generator_lung,
                    epochs=30,
                    validation_data=val_generator_lung,
                    steps_per_epoch=len(train_generator_lung),
                    validation_steps=len(val_generator_lung))

In [None]:
test_predictions_model_2_lung = model_2_lung.predict(test_generator_lung)
test_predictions_model_2_lung = np.argmax(test_predictions_model_2_lung, axis=1)

In [None]:
training_validation_plots(history_2_lung)

In [None]:
# Predict on test data
test_predictions = model_2_lung.predict(test_generator_lung,
                                        steps=len(test_generator_lung), 
                                        verbose=1)

# Take true labels from test data generator
test_true_labels = test_generator_lung.classes

In [None]:
# Generate a classification report using the true labels and model predictions
class_report = classification_report(test_true_labels_lung, 
                                     test_predictions_model_2_lung, 
                                     target_names=class_names_lung, 
                                     digits=4)
print("Classification Report:")
print(class_report)

In [None]:
plot_confusion_matrix(test_true_labels_lung, 
                      test_predictions_model_2_lung, 
                      class_names_lung)

# Experiments

In [None]:
# Create a model architecture for binary classification without dropout (model_1_colon_no_dropout)

# Define the model architecture using Sequential
model_1_colon_no_dropout = keras.models.Sequential([
    keras.layers.Conv2D(64, 3, activation="relu", padding="same", input_shape=(X, Y, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(512, 3, activation="relu", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid") # binary classification
])

# Compile the model with an optimizer, loss function, and evaluation metric
model_1_colon_no_dropout.compile(optimizer=tf.keras.optimizers.Adamax(learning_rate=0.001),
                                 loss="binary_crossentropy", # Binary cross-entropy loss
                                 metrics=["accuracy"])

# Display a summary of the model architecture
model_1_colon_no_dropout.summary()

In [None]:
# Train model
history_1_colon_no_dropout = model_1_colon_no_dropout.fit(train_generator_colon,
                    epochs=30,
                    validation_data=val_generator_colon,
                    steps_per_epoch=len(train_generator_colon),
                    validation_steps=len(val_generator_colon))

In [None]:
training_validation_plots(history_1_colon_no_dropout)

In [None]:
training_validation_plots(history_1_colon_no_dropout, start_epoch=1)

In [None]:
# Create a model architecture for binary classification with "tanh" activation (model_1_colon_tanh)

# Define the model architecture using Sequential
model_1_colon_tanh = keras.models.Sequential([
    keras.layers.Conv2D(64, 3, activation="tanh", padding="same", input_shape=(X, Y, 3)),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(128, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(256, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Conv2D(512, 3, activation="tanh", padding="same"),
    keras.layers.MaxPooling2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation="relu"),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(1, activation="sigmoid") # binary classification
])

# Compile the model with an optimizer, loss function, and evaluation metric
model_1_colon_tanh.compile(optimizer=tf.keras.optimizers.legacy.SGD(momentum=0.9,
                                                                   learning_rate=0.001,
                                                                   decay=0.01),
                           loss="binary_crossentropy", # Binary cross-entropy loss
                           metrics=["accuracy"])

# Display a summary of the model architecture
model_1_colon_tanh.summary()


In [None]:
# Train model
history_1_colon_tanh = model_1_colon_tanh.fit(train_generator_colon,
                    epochs=30,
                    validation_data=val_generator_colon,
                    steps_per_epoch=len(train_generator_colon),
                    validation_steps=len(val_generator_colon))

In [None]:
training_validation_plots(history_1_colon_tanh)

In [None]:
# Predict on test data
test_predictions_model_1_colon_tanh = model_1_colon_tanh.predict(test_generator_colon, steps=len(test_generator_colon), verbose=1)
test_predictions_model_1_colon_tanh = (test_predictions_model_1_colon_tanh > 0.5).astype(int)  # binary classification threshold

In [None]:
# Generate a classification report using the true labels and model predictions
class_report = classification_report(test_true_labels_colon,
                                     test_predictions_model_1_colon_tanh,
                                     target_names=class_names_colon,
                                     digits=4)

# Print the classification report
print("Classification Report:")
print(class_report)

In [None]:
plot_confusion_matrix(test_true_labels_colon,
                      test_predictions_model_1_colon_tanh,
                      class_names_colon)