In [3]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers, models
from tensorflow.keras.optimizers import Adamax
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from google.colab import drive


# Mounting Google Drive to access dataset
drive.mount('/content/drive')
%cd /content/drive/MyDrive/BTC_Dataset/


# Function to crop image and focus on the Region of Interest (ROI) around the tumor
def crop_to_roi(image, margin=20):
    img_copy = image.copy()
    try:
        height, width, channels = img_copy.shape
    except:
        height, width = img_copy.shape


    # Convert to grayscale and apply binary thresholding
    gray_img = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
    _, threshold_img = cv2.threshold(gray_img, 20, 255, cv2.THRESH_BINARY)


    # Find contours and pick the largest one (tumor)
    contours, _ = cv2.findContours(threshold_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)


    # Get the bounding box of the largest contour
    left = min(largest_contour, key=lambda x: x[0][0])[0][0] - margin
    top = min(largest_contour, key=lambda x: x[0][1])[0][1] - margin
    right = max(largest_contour, key=lambda x: x[0][0])[0][0] + margin
    bottom = max(largest_contour, key=lambda x: x[0][1])[0][1] + margin


    # Ensure bounding box stays within image dimensions
    left = max(0, left)
    top = max(0, top)
    right = min(width, right)
    bottom = min(height, bottom)


    # Crop and return the image
    return image[top:bottom, left:right]


# Function to load and preprocess images from the dataset
def load_and_process_images(dataset_path, target_size):
    images = []
    labels = []
    filenames = []


    # Traverse dataset directory and process each image
    for class_folder in os.listdir(dataset_path):
        for img_file in os.listdir(f"{dataset_path}/{class_folder}"):
            img_path = f"{dataset_path}/{class_folder}/{img_file}"
            img = cv2.imread(img_path)


            # Crop and resize the image
            img = crop_to_roi(img, margin=5)
            img = cv2.resize(img, target_size)


            # Augment by flipping images horizontally and vertically
            images.append(img)
            labels.append(class_folder)
            filenames.append(img_file)


            # Horizontal flip
            images.append(cv2.flip(img, 1))
            labels.append(class_folder)
            filenames.append(img_file)


            # Vertical flip
            images.append(cv2.flip(img, 0))
            labels.append(class_folder)
            filenames.append(img_file)


    return np.array(images), np.array(labels), np.array(filenames)


# Define paths
TRAIN_PATH = "Training"
VAL_PATH = "Validation"
TEST_PATH = "Testing"


# Get list of classes from the training set
class_names = [folder for folder in os.listdir(TRAIN_PATH)]


# Define image target size
image_size = (128, 128, 3)


# Load and preprocess images for training, validation, and testing
train_images, train_labels, _ = load_and_process_images(TRAIN_PATH, image_size[:2])
val_images, val_labels, _ = load_and_process_images(VAL_PATH, image_size[:2])
test_images, test_labels, _ = load_and_process_images(TEST_PATH, image_size[:2])


# Convert string labels to categorical (one-hot encoding)
train_labels_cat = tf.keras.utils.to_categorical([class_names.index(label) for label in train_labels])
val_labels_cat = tf.keras.utils.to_categorical([class_names.index(label) for label in val_labels])
test_labels_cat = tf.keras.utils.to_categorical([class_names.index(label) for label in test_labels])



# Training and evaluation for EfficientNetB4
def train_efficientnet():
    efficient_net = tf.keras.applications.EfficientNetB4(include_top=False, weights="imagenet", input_shape=image_size, pooling=None)
    x = layers.GlobalAveragePooling2D()(efficient_net.output)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.02),
                     activity_regularizer=regularizers.l1(0.006))(x)
    x = layers.Dropout(0.45)(x)
    output = layers.Dense(len(class_names), activation='softmax')(x)


    model = models.Model(inputs=efficient_net.input, outputs=output)
    model.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])



    history = model.fit(train_images, train_labels_cat, epochs=10, validation_data=(val_images, val_labels_cat))



    test_loss, test_acc = model.evaluate(test_images, test_labels_cat)
    print(f"EfficientNetB4 Test Accuracy: {test_acc}")
    print(f"EfficientNetB4 Test Loss: {test_loss}")



    test_predictions = np.argmax(model.predict(test_images), axis=1)
    test_labels_class = np.argmax(test_labels_cat, axis=1)


    print("EfficientNetB4 Confusion Matrix:")
    cm = confusion_matrix(test_labels_class, test_predictions)
    print(cm)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title("EfficientNetB4 Confusion Matrix")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()


    print("EfficientNetB4 Classification Report:")
    print(classification_report(test_labels_class, test_predictions, target_names=class_names))


    plt.plot(history.history['accuracy'], label='train accuracy')
    plt.plot(history.history['val_accuracy'], label='val accuracy')
    plt.title('EfficientNetB4 Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()
    train_efficientnet()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/BTC_Dataset
