In [1]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, Conv2D, BatchNormalization, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def load_dataset():
    # Base directories
    train_dir = "/kaggle/input/thermal-images-for-breast-cancer-diagnosis-dmrir/Imagens e Matrizes da Tese de Thiago Alves Elias da Silva/Desenvolvimento da Metodologia"
    
    # Find the DOENTES and SAUDÁVEIS directories
    train_folders = os.listdir(train_dir)
    doentes_dir = os.path.join(train_dir, [folder for folder in train_folders if "DOENTES" in folder][0])
    saudaveis_dir = os.path.join(train_dir, [folder for folder in train_folders if "SAUD" in folder][0])
    
    # Function to load images from the "Segmentadas" subdirectories
    def load_images_from_segmentadas(directory, label):
        images = []
        labels = []
        
        for root, dirs, files in os.walk(directory):
            # Only process files inside "Segmentadas" subdirectories
            if "Segmentadas" in root:
                for file_name in files:
                    file_path = os.path.join(root, file_name)
                    
                    # Skip non-image files
                    if not file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                        continue
                    
                    # Preprocess the image
                    img = preprocess_image(file_path)
                    if img is not None:
                        images.append(img)
                        labels.append(label)
        
        return images, labels
    
    # Load images from both directories
    print("Loading cancer (DOENTES) images...")
    cancer_images, cancer_labels = load_images_from_segmentadas(doentes_dir, 1)
    
    print("Loading healthy (SAUDÁVEIS) images...")
    healthy_images, healthy_labels = load_images_from_segmentadas(saudaveis_dir, 0)
    
    # Combine datasets
    all_images = cancer_images + healthy_images
    all_labels = cancer_labels + healthy_labels
    
    # Convert to numpy arrays
    X = np.array(all_images)
    y = np.array(all_labels)
    
    # Print the shape before conversion
    print(f"Dataset shape before RGB conversion: {X.shape}")
    
    # Convert grayscale to RGB (duplicate channels)
    if X.ndim == 3:  # If shape is (n, 224, 224)
        X = np.stack([X, X, X], axis=-1)
    
    # Print the shape after conversion
    print(f"Dataset shape after RGB conversion: {X.shape}")
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    return X_train, X_test, y_train, y_test


In [3]:
# Function to preprocess images
def preprocess_image(img_path):
    # Read image in grayscale mode
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    
    # Noise reduction using median filtering
    img = cv2.medianBlur(img, 3)
    
    # Resize to 224x224 (standard for CNNs)
    img = cv2.resize(img, (224, 224))
    
    # Normalize pixel values to [0, 1]
    img = img.astype('float32') / 255.0

    return img

In [19]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG19
import tensorflow_probability as tfp
from sklearn.metrics import cohen_kappa_score, roc_auc_score

# Define Gaussian Process Layer
class GaussianProcessLayer(tf.keras.layers.Layer):
    def __init__(self, num_outputs, kernel_initializer='glorot_uniform', **kwargs):
        super(GaussianProcessLayer, self).__init__(**kwargs)
        self.num_outputs = num_outputs
        self.kernel_initializer = kernel_initializer
    
    def build(self, input_shape):
        self.kernel = self.add_weight(
            name='kernel',
            shape=(input_shape[-1], self.num_outputs),
            initializer=self.kernel_initializer,
            trainable=True
        )
        super(GaussianProcessLayer, self).build(input_shape)
    
    def call(self, inputs):
        mean = tf.matmul(inputs, self.kernel)
        cov = tf.matmul(inputs, self.kernel)
        dist = tfp.distributions.MultivariateNormalFullCovariance(
            loc=mean,
            covariance_matrix=tf.linalg.diag(cov)
        )
        return dist.mean()

def build_dgp_mil_model():
    input_shape = (224, 224, 3)
    input_tensor = Input(shape=input_shape)
    
    # Load VGG19 as feature extractor (pretrained on ImageNet)
    base_model = VGG19(weights='imagenet', include_top=False, input_tensor=input_tensor)
    
    for layer in base_model.layers[:-4]:  # Freeze all layers except the last few
        layer.trainable = False
    
    feature_extractor = Model(inputs=input_tensor, outputs=base_model.output)
    
    x = feature_extractor.output
    x = Flatten()(x)
    x = Dropout(0.5)(x)  # Add Dropout to prevent overfitting
    
    # Add Gaussian Process Layer
    gp_layer = GaussianProcessLayer(num_outputs=1)(x)
    
    # Final output layer with sigmoid activation
    output = Dense(1, activation='sigmoid')(gp_layer)
    
    model = Model(inputs=feature_extractor.input, outputs=output)
    return model

def calculate_kappa(y_true, y_pred):
    kappa = cohen_kappa_score(y_true, y_pred)
    print(f"Cohen's Kappa: {kappa:.4f}")
    return kappa

def calculate_auc_roc(y_true, y_pred_prob):
    auc_roc = roc_auc_score(y_true, y_pred_prob)
    print(f"AUC-ROC: {auc_roc:.4f}")
    return auc_roc

# Function to compile, train, and evaluate the model
def train_and_evaluate(X_train, X_test, y_train, y_test):
    model = build_dgp_mil_model()
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Smaller learning rate
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Class weights to address class imbalance
    class_weights = {0: 1, 1: 3}  # Adjust depending on your dataset

    # Train the model
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weights)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {test_loss}")
    print(f"Test Accuracy: {test_accuracy}")
    
    # Predict and evaluate the model performance
    y_pred_prob = model.predict(X_test)  # Get probabilities, not binary predictions
    y_pred_binary = (y_pred_prob > 0.5).astype(int)  # Binarize for Kappa and classification report
    
    # Calculate Kappa and AUC-ROC
    calculate_kappa(y_test, y_pred_binary)
    calculate_auc_roc(y_test, y_pred_prob)
    
    from sklearn.metrics import classification_report, confusion_matrix
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred_binary, zero_division=0))
    
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred_binary))

# Example call to train_and_evaluate with your dataset
train_and_evaluate(X_train, X_test, y_train, y_test)


Epoch 1/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 193ms/step - accuracy: 0.6291 - loss: 1.0085 - val_accuracy: 0.9455 - val_loss: 0.1752
Epoch 2/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 110ms/step - accuracy: 0.9218 - loss: 0.2946 - val_accuracy: 0.9805 - val_loss: 0.0450
Epoch 3/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 110ms/step - accuracy: 0.9779 - loss: 0.0941 - val_accuracy: 0.9922 - val_loss: 0.0181
Epoch 4/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 110ms/step - accuracy: 0.9941 - loss: 0.0319 - val_accuracy: 1.0000 - val_loss: 0.0130
Epoch 5/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 110ms/step - accuracy: 0.9962 - loss: 0.0163 - val_accuracy: 1.0000 - val_loss: 0.0013
Epoch 6/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 109ms/step - accuracy: 1.0000 - loss: 0.0022 - val_accuracy: 1.0000 - val_loss: 4.8537e-04
Epoch 7/50
[1m33/33[0

In [17]:
model.summary()