In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
import os
import json
import cv2
import matplotlib.pyplot as plt
from skimage import io
from skimage.feature import graycomatrix, graycoprops
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D,Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.losses import BinaryFocalCrossentropy
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.models import Sequential, Model 
from keras.optimizers import SGD, Adam
from keras.callbacks import TensorBoard
import keras
import matplotlib.pyplot as plt


IMAGES_FOLDER = "/kaggle/input/glaucoma-datasets/G1020/Images"
df = pd.read_csv("/kaggle/input/glaucoma-datasets/G1020/G1020.csv")
MASK_FOLDER = "/kaggle/input/glaucoma-datasets/G1020/Masks"
HEIGHT, WIDTH = 256, 256
NUM_CLASSES = 2
BATCH_SIZE = 16
EPOCHS = 150
LR = 1e-4
print(df)

             imageID  binaryLabels
0        image_0.jpg             0
1        image_1.jpg             0
2        image_3.jpg             0
3        image_4.jpg             0
4        image_5.jpg             0
...              ...           ...
1015  image_3198.jpg             0
1016  image_3199.jpg             0
1017  image_3201.jpg             1
1018  image_3202.jpg             1
1019  image_2568.jpg             0

[1020 rows x 2 columns]


In [2]:
def load_data(df, image_size=(HEIGHT, WIDTH)):
    images, labels = [], []
    od_masks, oc_masks = [], []
    print(f"Total images in dataframe: {len(df)}")

    for index, row in df.iterrows():
        img_name = row['imageID']
        label = row['binaryLabels']
        img_path = os.path.join(IMAGES_FOLDER, img_name)

        base_name = os.path.splitext(img_name)[0]  
        mask_name = f"{base_name}.png" 
        mask_path = os.path.join(MASK_FOLDER, mask_name)
        
        if os.path.exists(img_path) and os.path.exists(mask_path):
            image = cv2.imread(img_path, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, image_size)
            
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            mask = cv2.resize(mask, image_size)
            
            optic_disc = np.where(mask == 1, 255, 0).astype(np.uint8).reshape(image_size[0], image_size[1], 1)
            optic_cup = np.where(mask == 2, 255, 0).astype(np.uint8).reshape(image_size[0], image_size[1], 1)
            
            images.append(image)
            labels.append(label)
            od_masks.append(optic_disc)
            oc_masks.append(optic_cup)
    
    return np.array(images), np.array(labels), np.array(od_masks), np.array(oc_masks)

images, labels, od_masks, oc_masks = load_data(df)
images = images.astype('float32') / 255.0
od_masks = od_masks.astype('float32') / 255.0
oc_masks = oc_masks.astype('float32') / 255.0
labels = to_categorical(labels, num_classes=NUM_CLASSES)

X_train, X_test, y_train, y_test, od_train, od_test, oc_train, oc_test = train_test_split(images, labels, od_masks, oc_masks, test_size=0.3, stratify=labels, random_state=42)

labels_indices = np.argmax(labels, axis=1)
print("Raw label counts:", np.unique(labels_indices, return_counts=True))

class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(labels_indices), y=labels_indices)
class_weights = dict(enumerate(class_weights))

print(f"Dataset Shape: {images.shape}")
print(f"Train Shape: {X_train.shape}, Test Shape: {X_test.shape}")

def calculate_cdr(disc_mask, cup_mask):
    disc_area = np.sum(disc_mask)
    cup_area = np.sum(cup_mask)
    return round((2 * cup_area) / disc_area, 4) if disc_area != 0 else 0

def extract_isnt_quadrants(disc_mask, cup_mask):
    disc_mask = disc_mask.squeeze()
    cup_mask = cup_mask.squeeze()

    disc_mask_rotated = np.rot90(disc_mask)
    cup_mask_rotated = np.rot90(cup_mask)

    nrr_mask = cv2.bitwise_xor(disc_mask_rotated, cup_mask_rotated)
    
    height, width = nrr_mask.shape
    I, S = np.sum(nrr_mask[height//2:, :]), np.sum(nrr_mask[:height//2, :])
    N, T = np.sum(nrr_mask[:, :width//2]), np.sum(nrr_mask[:, width//2:])
    
    return round((1 + (I + S)) / (1 + (N + T)), 4) if (N + T) != 0 else 0

def extract_blood_vessels(fundus_image):
    green_channel = fundus_image[:, :, 1].astype(np.uint8) if fundus_image.shape[2] == 3 else fundus_image[:, :, 0].astype(np.uint8)
    clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(6,6))
    enhanced = clahe.apply(green_channel)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
    bottom_hat = cv2.morphologyEx(enhanced, cv2.MORPH_BLACKHAT, kernel)
    
    threshold_value = max(10, 3.15 * np.std(bottom_hat))
    _, vessel_mask = cv2.threshold(bottom_hat, threshold_value, 255, cv2.THRESH_BINARY)
    
    height, width = vessel_mask.shape
    I, S = np.sum(vessel_mask[height//2:, :]), np.sum(vessel_mask[:height//2, :])
    N, T = np.sum(vessel_mask[:, :width//2]), np.sum(vessel_mask[:, width//2:])
    
    return round((1 + (I + S)) / (1 + (N + T)), 4) if (N + T) != 0 else 0

def extract_features(image, disc_mask, cup_mask):
    return np.array([
        calculate_cdr(disc_mask, cup_mask),
        extract_isnt_quadrants(disc_mask, cup_mask),
        extract_blood_vessels(image)
    ])

def extract_features_batch(images, od_masks, oc_masks):
    return np.array([extract_features(img, od, oc) for img, od, oc in zip(images, od_masks, oc_masks)])

train_features = extract_features_batch(X_train, od_train, oc_train)
test_features = extract_features_batch(X_test, od_test, oc_test)

class HybridDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X, features, y, batch_size=32, shuffle=True):
        self.X = np.array(X, dtype=np.float32)
        self.features = np.array(features, dtype=np.float32)
        self.y = np.array(y, dtype=np.float32)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch_size))
    
    def __getitem__(self, index):
        batch_X = self.X[index*self.batch_size:(index+1)*self.batch_size]
        batch_features = self.features[index*self.batch_size:(index+1)*self.batch_size]
        batch_y = self.y[index*self.batch_size:(index+1)*self.batch_size]
        
        return (batch_X, batch_features), batch_y  

    def on_epoch_end(self):
        if self.shuffle:
            indices = np.arange(len(self.X))
            np.random.shuffle(indices)
            self.X = self.X[indices]
            self.features = self.features[indices]
            self.y = self.y[indices]

train_generator = HybridDataGenerator(X_train, train_features, y_train, batch_size=BATCH_SIZE)
test_generator = HybridDataGenerator(X_test, test_features, y_test, batch_size=BATCH_SIZE, shuffle=False)

def build_hybrid_model(base_model, num_features, dropout=0.5, fc_layers=[512, 256], num_classes=2):
    for layer in base_model.layers[:-20]:
        layer.trainable = True

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu', kernel_regularizer='l2')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.6)(x)
    x = Dense(512, activation='relu', kernel_regularizer='l2')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.6)(x)

    feature_input = Input(shape=(num_features,))
    feature_x = Dense(64, activation="relu")(feature_input)

    combined = Concatenate()([x, feature_x])
    predictions = Dense(num_classes, activation='softmax')(combined)
    
    return Model(inputs=[base_model.input, feature_input], outputs=predictions)

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(HEIGHT, WIDTH, 3))
# base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(HEIGHT, WIDTH, 3))

hybrid_model = build_hybrid_model(base_model, num_features=3)

hybrid_model.compile(optimizer=Adam(learning_rate=LR), 
                     loss=BinaryFocalCrossentropy(gamma=2.0), 
                     metrics=["accuracy"])

checkpoint = ModelCheckpoint("resnet50_model.keras", monitor="val_accuracy", verbose=1, save_best_only=True, mode="max")
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
tensorboard = TensorBoard(log_dir="./logs")
callbacks_list = [checkpoint, tensorboard]

hybrid_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator,
    callbacks=[checkpoint, reduce_lr, early_stop],
    class_weight=class_weights
)

test_features = extract_features_batch(X_test, od_test, oc_test)
test_loss, test_acc = hybrid_model.evaluate([X_test, test_features], y_test)
y_pred = hybrid_model.predict([X_test, test_features])
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_true_labels, y_pred_labels)
report = classification_report(y_true_labels, y_pred_labels, target_names=["Normal", "Glaucoma"])
print("\nClassification Report:\n", report)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc * 100:.2f}%")

Total images in dataframe: 1020
Raw label counts: (array([0, 1]), array([724, 296]))
Dataset Shape: (1020, 256, 256, 3)
Train Shape: (714, 256, 256, 3), Test Shape: (306, 256, 256, 3)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Epoch 1/150


  self._warn_if_super_not_called()


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 656ms/step - accuracy: 0.5135 - loss: 21.0048
Epoch 1: val_accuracy improved from -inf to 0.70915, saving model to resnet50_model.keras
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 856ms/step - accuracy: 0.5135 - loss: 21.0009 - val_accuracy: 0.7092 - val_loss: 19.9523 - learning_rate: 1.0000e-04
Epoch 2/150
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.5400 - loss: 20.2499
Epoch 2: val_accuracy did not improve from 0.70915
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 126ms/step - accuracy: 0.5396 - loss: 20.2485 - val_accuracy: 0.7092 - val_loss: 19.5032 - learning_rate: 1.0000e-04
Epoch 3/150
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.5832 - loss: 19.7016
Epoch 3: val_accuracy did not improve from 0.70915
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 126ms/step - accuracy: 0.58