In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetV2B0, ResNet50V2, DenseNet121
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from warnings import filterwarnings

# Suppress warnings
filterwarnings('ignore')

# Global Constants
dataset_path = '/kaggle/input/the-iqothnccd-lung-cancer-dataset/The IQ-OTHNCCD lung cancer dataset'
output_path = '/kaggle/working/'  # Path to save models in Kaggle output
labels = ['bengin', 'malignant', 'normal']
image_size = 150
batch_size = 32
epochs = 2

# Function to load and preprocess data
def load_data(dataset_path, labels, image_size):
    X_data = []
    y_data = []
    
    for label in labels:
        folder_path = os.path.join(dataset_path, f'{label.capitalize()} cases')
        for img_name in tqdm(os.listdir(folder_path)):
            img_path = os.path.join(folder_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue
            img = cv2.resize(img, (image_size, image_size))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Ensure the image is in RGB format
            X_data.append(img)
            y_data.append(label)
    
    X_data = np.array(X_data)
    y_data = np.array(y_data)
    
    # Shuffle data here
    X_data, y_data = shuffle(X_data, y_data, random_state=101)
    
    return X_data, y_data

# Load and split data
X_data, y_data = load_data(dataset_path, labels, image_size)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=101, stratify=y_data)

# Convert labels to categorical
y_train = tf.keras.utils.to_categorical([labels.index(i) for i in y_train], num_classes=len(labels))
y_test = tf.keras.utils.to_categorical([labels.index(i) for i in y_test], num_classes=len(labels))

# Function to build and train individual models
def build_and_train_model(base_model, model_name, X_train, y_train):
    base_model = base_model(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))
    model = tf.keras.models.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(len(labels), activation='softmax')  # Number of classes
    ])
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    tensorboard = TensorBoard(log_dir=f'logs/{model_name}')
    checkpoint = ModelCheckpoint(os.path.join(output_path, f"{model_name}.keras"), monitor="val_accuracy", save_best_only=True, mode="max", verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=2, min_delta=0.001, mode='max', verbose=1)
    
    history = model.fit(X_train, y_train, validation_split=0.1, epochs=epochs, batch_size=batch_size,
                        callbacks=[tensorboard, checkpoint, reduce_lr])
    
    return model, history

# Function to build and train ensemble model
def build_ensemble_model(models, X_train, y_train):
    inputs = tf.keras.Input(shape=(image_size, image_size, 3))
    outputs = [model(inputs) for model in models]
    averaged_outputs = tf.keras.layers.Average()(outputs)
    
    ensemble_model = tf.keras.models.Model(inputs=inputs, outputs=averaged_outputs)
    ensemble_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    tensorboard = TensorBoard(log_dir='logs/Ensemble')
    checkpoint = ModelCheckpoint(os.path.join(output_path, "Ensemble.keras"), monitor="val_accuracy", save_best_only=True, mode="max", verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=2, min_delta=0.001, mode='max', verbose=1)
    
    history = ensemble_model.fit(X_train, y_train, validation_split=0.1, epochs=epochs, batch_size=batch_size,
                                 callbacks=[tensorboard, checkpoint, reduce_lr])
    
    return ensemble_model, history

# Build and train individual models
model_builders = [EfficientNetV2B0, ResNet50V2, DenseNet121]
trained_models = []
histories = {}

for model_builder in model_builders:
    model_name = model_builder.__name__
    model, history = build_and_train_model(model_builder, model_name, X_train, y_train)
    trained_models.append(model)
    histories[model_name] = history

# Save each individual model
for model_builder in model_builders:
    model_name = model_builder.__name__
    model.save(os.path.join(output_path, f"{model_name}.keras"))

# Build and train ensemble model using trained individual models
ensemble_model, ensemble_history = build_ensemble_model(trained_models, X_train, y_train)
models = {"Ensemble": ensemble_model}
histories["Ensemble"] = ensemble_history

# Save the ensemble model
ensemble_model.save(os.path.join(output_path, "Ensemble.keras"))

# Function to print model probabilities for a given test image
def print_model_probabilities(models, X_test, labels):
    test_image = X_test[0:1]  # Take the first image for demonstration
    print("Probabilities for the first test image:")
    for model_name, model in models.items():
        probabilities = model.predict(test_image)[0]
        print(f"\nModel: {model_name}")
        for i, label in enumerate(labels):
            print(f"Probability of {label}: {probabilities[i]:.4f}")

print_model_probabilities(models, X_test, labels)

# Function to plot training history for each model
def plot_history(histories):
    for model_name, history in histories.items():
        plt.figure(figsize=(14, 5))
        
        # Plot accuracy
        plt.subplot(1, 2, 1)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title(f'{model_name} - Training and Validation Accuracy')
        plt.legend()
        plt.grid(True)
        
        # Plot loss
        plt.subplot(1, 2, 2)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title(f'{model_name} - Training and Validation Loss')
        plt.legend()
        plt.grid(True)
        
        plt.tight_layout()
        plt.show()

plot_history(histories)

# Function to evaluate models and plot confusion matrix
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    y_true = np.argmax(y_test, axis=-1)
    
    print(f"Classification Report for {model_name}:")
    print(classification_report(y_true, y_pred, target_names=labels, digits=4))  # Display with 4 decimal places
    
    cm = confusion_matrix(y_true, y_pred)
    
    # Convert confusion matrix to percentages
    cm_percentage = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis] * 100
    
    # Plot confusion matrix with percentages
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_percentage, display_labels=labels)
    disp.plot(cmap=plt.cm.Blues, values_format='.2f')  # Display with 2 decimal places
    plt.title(f'{model_name} - Confusion Matrix (Percentage)')
    plt.show()

# Evaluate ensemble model and plot confusion matrix
evaluate_model(ensemble_model, X_test, y_test, "Ensemble")

# Print the number of parameters for the ensemble model
print(f"Ensemble model has {ensemble_model.count_params()} parameters.")


In [None]:
import ipywidgets as widgets
from IPython.display import clear_output

# Function to predict uploaded image
def img_pred(uploader):
    if uploader.value:
        # Convert uploaded file to image format
        content = uploader.value[next(iter(uploader.value))]['content']
        img = np.frombuffer(content, dtype=np.uint8)
        img = cv2.imdecode(img, cv2.IMREAD_COLOR)
        
        # Preprocess the image for prediction
        img_resized = cv2.resize(img, (image_size, image_size))
        img_resized = np.expand_dims(img_resized, axis=0)
        
        # Get predictions from ensemble model
        predictions = ensemble_model.predict(img_resized)
        
        # Display the prediction results
        for i, label in enumerate(labels):
            print(f"Probability of {label}: {predictions[0][i]:.4f}")
        
        # Show the uploaded image
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.title("Uploaded Image")
        plt.axis('off')
        plt.show()
    else:
        print("No image uploaded!")

# Create the file uploader widget
uploader = widgets.FileUpload(accept='image/*', multiple=False)

# Create the prediction button and output widget
button = widgets.Button(description='Predict')
out = widgets.Output()

# Define the button click event
def on_button_clicked(_):
    with out:
        clear_output()  # Clear the previous output
        try:
            img_pred(uploader)
        except Exception as e:
            print(f"Error: {e}")
            print("No Image Uploaded/Invalid Image File")

# Attach the button click event to the function
button.on_click(on_button_clicked)

# Display the widgets
display(widgets.VBox([uploader, button, out]))


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm import tqdm
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import EfficientNetV2B0, ResNet50V2, DenseNet121
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from warnings import filterwarnings

# Suppress warnings to keep the output clean
filterwarnings('ignore')

# Global Constants
dataset_path = '/kaggle/input/the-iqothnccd-lung-cancer-dataset/The IQ-OTHNCCD lung cancer dataset'
output_path = '/kaggle/working/'  # Path to save models in Kaggle output

# Define labels for the dataset
labels = ['bengin', 'malignant', 'normal']  # Classes in the dataset

# Define image parameters
image_size = 150  # Size to which images will be resized
batch_size = 32  # Number of samples per gradient update
epochs = 2  # Number of epochs for training

# Function to load and preprocess data
def load_data(dataset_path, labels, image_size):
    X_data = []  # List to hold image data
    y_data = []  # List to hold labels
    
    # Loop through each label (class)
    for label in labels:
        # Construct the folder path for the current label
        folder_path = os.path.join(dataset_path, f'{label.capitalize()} cases')
        
        # Loop through each image in the folder
        for img_name in tqdm(os.listdir(folder_path)):
            img_path = os.path.join(folder_path, img_name)
            img = cv2.imread(img_path)  # Read the image
            
            if img is None:
                continue  # Skip if the image is not found
            
            # Resize the image
            img = cv2.resize(img, (image_size, image_size))
            
            # Convert image to RGB format
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            # Append processed image and its label to lists
            X_data.append(img)
            y_data.append(label)
    
    # Convert lists to NumPy arrays
    X_data = np.array(X_data)
    y_data = np.array(y_data)
    
    # Shuffle data here for randomness
    X_data, y_data = shuffle(X_data, y_data, random_state=101)
    
    return X_data, y_data

# Load data from the specified dataset path
X_data, y_data = load_data(dataset_path, labels, image_size)

# Split data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=101, stratify=y_data)

# Convert labels to categorical format for training
y_train = tf.keras.utils.to_categorical([labels.index(i) for i in y_train], num_classes=len(labels))
y_test = tf.keras.utils.to_categorical([labels.index(i) for i in y_test], num_classes=len(labels))

# Function to build and train individual models
def build_and_train_model(base_model, model_name, X_train, y_train):
    # Initialize the base model with ImageNet weights
    base_model = base_model(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3))
    
    # Build the full model architecture
    model = tf.keras.models.Sequential([
        base_model,  # Add the base model
        tf.keras.layers.GlobalAveragePooling2D(),  # Pooling layer to reduce dimensions
        tf.keras.layers.Dropout(0.5),  # Dropout layer for regularization
        tf.keras.layers.Dense(len(labels), activation='softmax')  # Output layer for classification
    ])
    
    # Compile the model with loss function and optimizer
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    # Set up callbacks for training
    tensorboard = TensorBoard(log_dir=f'logs/{model_name}')  # For TensorBoard logging
    checkpoint = ModelCheckpoint(os.path.join(output_path, f"{model_name}.keras"),
                                 monitor="val_accuracy", save_best_only=True, mode="max", verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=2,
                                   min_delta=0.001, mode='max', verbose=1)
    
    # Train the model with training data
    history = model.fit(X_train, y_train, validation_split=0.1, epochs=epochs,
                        batch_size=batch_size, callbacks=[tensorboard, checkpoint, reduce_lr])
    
    return model, history

# Function to build and train an ensemble model
def build_ensemble_model(models, X_train, y_train):
    # Define input layer for the ensemble model
    inputs = tf.keras.Input(shape=(image_size, image_size, 3))
    
    # Collect outputs from each individual model
    outputs = []
    for model in models:
        output = model(inputs)  # Get output from the model
        outputs.append(output)  # Append to outputs list
    
    # Average the outputs from all models
    averaged_outputs = tf.keras.layers.Average()(outputs)
    
    # Construct the ensemble model
    ensemble_model = tf.keras.models.Model(inputs=inputs, outputs=averaged_outputs)
    
    # Compile the ensemble model
    ensemble_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    # Set up callbacks for training the ensemble model
    tensorboard = TensorBoard(log_dir='logs/Ensemble')
    checkpoint = ModelCheckpoint(os.path.join(output_path, "Ensemble.keras"),
                                 monitor="val_accuracy", save_best_only=True, mode="max", verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.3, patience=2,
                                   min_delta=0.001, mode='max', verbose=1)
    
    # Train the ensemble model
    history = ensemble_model.fit(X_train, y_train, validation_split=0.1, epochs=epochs,
                                 batch_size=batch_size, callbacks=[tensorboard, checkpoint, reduce_lr])
    
    return ensemble_model, history

# Build and train individual models
model_builders = [EfficientNetV2B0, ResNet50V2, DenseNet121]
trained_models = []  # List to store trained models
histories = {}  # Dictionary to store training histories

# Loop through model builders to train each model
for model_builder in model_builders:
    model_name = model_builder.__name__  # Get the model name
    print(f"Training model: {model_name}")  # Log the model being trained
    
    model, history = build_and_train_model(model_builder, model_name, X_train, y_train)
    
    # Store the trained model and its training history
    trained_models.append(model)
    histories[model_name] = history

# Save each individual model to the output path
for model_builder in model_builders:
    model_name = model_builder.__name__
    model.save(os.path.join(output_path, f"{model_name}.keras"))  # Save model
    print(f"Saved model: {model_name}.keras")  # Log the model saving

# Build and train ensemble model using trained individual models
print("Building and training ensemble model...")  # Log the start of ensemble training
ensemble_model, ensemble_history = build_ensemble_model(trained_models, X_train, y_train)

# Store the ensemble model in a dictionary for reference
models = {"Ensemble": ensemble_model}
histories["Ensemble"] = ensemble_history  # Store history of ensemble training

# Save the ensemble model
ensemble_model.save(os.path.join(output_path, "Ensemble.keras"))
print("Saved ensemble model: Ensemble.keras")  # Log the saving of ensemble model

# Function to print model probabilities for a given test image
def print_model_probabilities(models, X_test, labels):
    test_image = X_test[0:1]  # Take the first image for demonstration
    print("Probabilities for the first test image:")
    
    # Loop through each model and display its predictions
    for model_name, model in models.items():
        print(f"\nPredicting probabilities using model: {model_name}")  # Log model prediction
        probabilities = model.predict(test_image)[0]  # Get predicted probabilities
        
        # Display probabilities for each label
        for i, label in enumerate(labels):
            print(f"Probability of {label}: {probabilities[i]:.4f}")

# Print probabilities for the first test image
print_model_probabilities(models, X_test, labels)

# Function to plot training history for each model
def plot_history(histories):
    # Loop through each model's history for plotting
    for model_name, history in histories.items():
        plt.figure(figsize=(14, 5))  # Set figure size
        
        # Plot accuracy
        plt.subplot(1, 2, 1)  # Create subplot for accuracy
        plt.plot(history.history['accuracy'], label='Training Accuracy', color='blue')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='orange')
        plt.xlabel('Epochs')  # X-axis label
        plt.ylabel('Accuracy')  # Y-axis label
        plt.title(f'{model_name} - Training and Validation Accuracy')  # Title
        plt.legend()  # Show legend
        plt.grid(True)  # Add grid for better readability
        
        # Plot loss
        plt.subplot(1, 2, 2)  # Create subplot for loss
        plt.plot(history.history['loss'], label='Training Loss', color='red')
        plt.plot(history.history['val_loss'], label='Validation Loss', color='green')
        plt.xlabel('Epochs')  # X-axis label
        plt.ylabel('Loss')  # Y-axis label
        plt.title(f'{model_name} - Training and Validation Loss')  # Title
        plt.legend()  # Show legend
        plt.grid(True)  # Add grid for better readability
        
        plt.tight_layout()  # Adjust layout for subplots
        plt.show()  # Display the plots

# Call the function to plot training histories for all models
print("Plotting training histories...")  # Log the start of plotting
plot_history(histories)

# Function to evaluate models and plot confusion matrix
def evaluate_model(model, X_test, y_test, model_name):
    print(f"Evaluating model: {model_name}")  # Log evaluation
    y_pred = np.argmax(model.predict(X_test), axis=-1)  # Get predictions
    y_true = np.argmax(y_test, axis=-1)  # Get true labels
    
    print(f"Classification Report for {model_name}:")  # Print classification report header
    print(classification_report(y_true, y_pred, target_names=labels, digits=4))  # Display with 4 decimal places
    
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Convert confusion matrix to percentages
    cm_percentage = cm.astype(float) / cm.sum(axis=1)[:, np.newaxis] * 100
    
    # Plot confusion matrix with percentages
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_percentage, display_labels=labels)
    disp.plot(cmap=plt.cm.Blues, values_format='.2f')  # Display with 2 decimal places
    plt.title(f'{model_name} - Confusion Matrix (Percentage)')  # Title
    plt.show()  # Show the plot

# Evaluate each individual model and plot confusion matrix
for model in trained_models:
    model_name = model.name  # Get model name
    evaluate_model(model, X_test, y_test, model_name)

# Evaluate ensemble model and plot confusion matrix
evaluate_model(ensemble_model, X_test, y_test, "Ensemble")

# Print the number of parameters for the ensemble model
print(f"Ensemble model has {ensemble_model.count_params()} parameters.")  # Log parameter count
