In [None]:
# Importing required libraries for image preprocessing, model training, and evaluation
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers, models
from tensorflow.keras.optimizers import Adamax
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive

# Mounting Google Drive to access dataset
drive.mount('/content/drive')
%cd /content/drive/MyDrive/BTC/BTC_Dataset/
# Function to crop image and focus on the Region of Interest (ROI) around the tumor
def crop_to_roi(image, margin=20):
    img_copy = image.copy()

    try:
        height, width, channels = img_copy.shape
    except:
        height, width = img_copy.shape

    # Convert to grayscale and apply binary thresholding
    gray_img = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
    _, threshold_img = cv2.threshold(gray_img, 20, 255, cv2.THRESH_BINARY)

    # Find contours and pick the largest one (tumor)
    contours, _ = cv2.findContours(threshold_img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)

    # Get the bounding box of the largest contour
    left = min(largest_contour, key=lambda x: x[0][0])[0][0] - margin
    top = min(largest_contour, key=lambda x: x[0][1])[0][1] - margin
    right = max(largest_contour, key=lambda x: x[0][0])[0][0] + margin
    bottom = max(largest_contour, key=lambda x: x[0][1])[0][1] + margin

    # Ensure bounding box stays within image dimensions
    left = max(0, left)
    top = max(0, top)
    right = min(width, right)
    bottom = min(height, bottom)

    # Crop and return the image
    return image[top:bottom, left:right]

# Function to load and preprocess images from the dataset
def load_and_process_images(dataset_path, target_size):
    images = []
    labels = []
    filenames = []

    # Traverse dataset directory and process each image
    for class_folder in os.listdir(dataset_path):
        for img_file in os.listdir(f"{dataset_path}/{class_folder}"):
            img_path = f"{dataset_path}/{class_folder}/{img_file}"
            img = cv2.imread(img_path)

            # Crop and resize the image
            img = crop_to_roi(img, margin=5)
            img = cv2.resize(img, target_size)

            # Augment by flipping images horizontally and vertically
            images.append(img)
            labels.append(class_folder)
            filenames.append(img_file)

            # Horizontal flip
            images.append(cv2.flip(img, 1))
            labels.append(class_folder)
            filenames.append(img_file)

            # Vertical flip
            images.append(cv2.flip(img, 0))
            labels.append(class_folder)
            filenames.append(img_file)

    return np.array(images), np.array(labels), np.array(filenames)

# Define paths
TRAIN_PATH = "Training"
VAL_PATH = "Validation"
TEST_PATH = "Testing"

# Get list of classes from the training set
class_names = [folder for folder in os.listdir(TRAIN_PATH)]

# Define image target size
image_size = (128, 128, 3)

# Load and preprocess images for training, validation, and testing
train_images, train_labels, _ = load_and_process_images(TRAIN_PATH, image_size[:2])
val_images, val_labels, _ = load_and_process_images(VAL_PATH, image_size[:2])
test_images, test_labels, _ = load_and_process_images(TEST_PATH, image_size[:2])

# Display sample images (optional)
def display_image(image):
    cv2.imshow('Image', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

display_image(train_images[0])
display_image(val_images[0])
display_image(test_images[0])

# Convert string labels to categorical (one-hot encoding)
train_labels_cat = tf.keras.utils.to_categorical([class_names.index(label) for label in train_labels])
val_labels_cat = tf.keras.utils.to_categorical([class_names.index(label) for label in val_labels])
test_labels_cat = tf.keras.utils.to_categorical([class_names.index(label) for label in test_labels])

# Build the model using EfficientNetB4
def build_model(base_model_name='EfficientNetB4'):
    base_model = tf.keras.applications.EfficientNetB4(include_top=False, weights="imagenet", input_shape=image_size, pooling='max')

    # Add custom layers on top
    x = base_model.output
    x = layers.BatchNormalization()(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.02),
                     activity_regularizer=regularizers.l1(0.006))(x)
    x = layers.Dropout(0.45)(x)
    output = layers.Dense(len(class_names), activation='softmax')(x)

    model = models.Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adamax(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    return model

# Initialize the model
model = build_model()

# Train the model
history = model.fit(train_images, train_labels_cat, epochs=10, validation_data=(val_images, val_labels_cat))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_images, test_labels_cat, verbose=2)

# Generate predictions on the test set
test_preds = model.predict(test_images)
test_preds = np.argmax(test_preds, axis=1)
test_labels_max = np.argmax(test_labels_cat, axis=1)

# Print classification report
print(classification_report(test_labels_max, test_preds))

# Plot confusion matrix
cm = confusion_matrix(test_labels_max, test_preds)
print("Confusion Matrix: \n", cm)
print("Per-class accuracy: \n", cm.diagonal() / cm.sum(axis=1))

# Plot accuracy and loss
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Accuracy & Loss')
plt.legend(loc='upper center', bbox_to_anchor=(1.1, 0.5), ncol=2, fancybox=True, shadow=True)
plt.show()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[Errno 20] Not a directory: '/content/drive/MyDrive/BTC/BTC_Dataset-20241127T202613Z-001.zip/BTC_Dataset/'
/content/drive/MyDrive/BTC/BTC_Dataset


In [None]:
# Function to load and preprocess images
def load_images(path, image_size):
    images, labels, names = [], [], []
    for sub_dir in os.listdir(path):
        sub_path = os.path.join(path, sub_dir)
        if not os.path.isdir(sub_path):
            continue

        # Check if subdirectory contains images
        if not os.listdir(sub_path):
            print(f"Warning: Directory '{sub_path}' is empty.")
            continue

        for img_name in os.listdir(sub_path):
            img_path = os.path.join(sub_path, img_name)
            img = cv.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image '{img_path}'. Skipping.")
                continue

            img = crop_image(img, GAP_VAL=5)
            img = cv.resize(img, image_size)

            # Add original and augmented images
            for aug_img in [img, cv.flip(img, 1), cv.flip(img, 0)]:
                images.append(aug_img)
                labels.append(sub_dir)
                names.append(img_name)

    # Check if any images were loaded
    if not images:
        raise ValueError(f"No images found in directory '{path}'.")

    return np.array(images), np.array(labels), np.array(names)