In [1]:
import os
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf

from sklearn.utils import shuffle, resample
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from keras.utils import load_img, img_to_array, to_categorical
from keras.models import model_from_json, Model
from keras.optimizers import SGD, RMSprop, Adam

In [2]:
# Load the CSV file
df = pd.read_csv('path/to/DDR/DR_grading/labels.csv')
df = df[df['label'] != 5]

# Define constants
IMAGE_SIZE = (256, 256)  # Example size, adjust to your models
NUM_CLASSES = 5  # Grading levels 0-5

In [None]:
# EX
with open('../models_segmentation/EX.json', 'r') as json_file:
    model_json = json_file.read()
model_hard_exudates = model_from_json(model_json)
model_hard_exudates.load_weights('../models_segmentation/EX.weights.h5')
model_hard_exudates.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# SE
with open('../models_segmentation/SE.json', 'r') as json_file:
    model_json = json_file.read()
model_soft_exudates = model_from_json(model_json)
model_soft_exudates.load_weights('../models_segmentation/SE.weights.h5')
model_soft_exudates.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# HE
with open('../models_segmentation/HE.json', 'r') as json_file:
    model_json = json_file.read()
model_haemorrhages = model_from_json(model_json)
model_haemorrhages.load_weights('../models_segmentation/HE.weights.h5')
model_haemorrhages.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# MA
with open('../models_segmentation/MA.json', 'r') as json_file:
    model_json = json_file.read()
model_microaneurysms = model_from_json(model_json)
model_microaneurysms.load_weights('../models_segmentation/MA.weights.h5')
model_microaneurysms.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [4]:
def enhance_img(img):
    # Step 1: Apply median filter with a 3x3 kernel
    img = cv2.medianBlur(img.astype(np.uint8), ksize=3)

    # Step 2: Convert to LAB color space
    lab_img = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab_img)

    # Step 3: Apply CLAHE on the Luminosity (L) channel with 8x8 tile grid
    clahe = cv2.createCLAHE(clipLimit=6.0, tileGridSize=(8, 8))
    l = clahe.apply(l)

    # Step 4: Merge CLAHE enhanced L with original A and B channels
    lab_img = cv2.merge((l, a, b))

    # Step 5: Convert back to RGB color space
    enhanced_img = cv2.cvtColor(lab_img, cv2.COLOR_LAB2RGB)
    
    return enhanced_img

def get_mask(path, target_size):     
    image = cv2.imread(path)
    image = cv2.resize(image, target_size)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Convert image to HSV (Hue, Saturation, Value) color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Define the red color range for masking
    lower_red = np.array([0, 100, 50])
    upper_red = np.array([12, 250, 250])
    mask1 = cv2.inRange(hsv_image, lower_red, upper_red)

    # lower_red2 = np.array([170, 120, 70])
    lower_red2 = np.array([10, 60, 70])
    upper_red2 = np.array([180, 255, 255])
    mask2 = cv2.inRange(hsv_image, lower_red2, upper_red2)
    mask2 = cv2.bitwise_not(mask2)

    # Combine masks for red
    red_mask = mask1 + mask2
    
    # overlay the mask on the original image
    red_mask_3ch = cv2.cvtColor(red_mask, cv2.COLOR_GRAY2BGR)
    mask = cv2.addWeighted(image_rgb, 0.7, red_mask_3ch, 0.3, 0)
    
    return mask

# Define function to preprocess images
def preprocess_image_EX(image_path):
    img = load_img(image_path, target_size=IMAGE_SIZE)
    img = img_to_array(img)
    img = img / 255.0
    return img

def preprocess_image_SE(image_path):
    img = load_img(image_path, target_size=IMAGE_SIZE)
    img = img_to_array(img)
    img = img / 255.0
    return img

def preprocess_image_HE(image_path):
    img = load_img(image_path, target_size=IMAGE_SIZE)
    img = img_to_array(img)
    img = enhance_img(img)
    img = img / 255.0
    return img

def preprocess_image_MA(image_path):
    img = img = get_mask(image_path, target_size=IMAGE_SIZE)
    img = img_to_array(img)
    img = img / 255.0
    return img

def generate_feature_maps(image_path, size=(128, 128)):
    # Apply the specific preprocessing method for each model
    img_hard = preprocess_image_EX(image_path)
    img_soft = preprocess_image_SE(image_path)
    img_haem = preprocess_image_HE(image_path)
    img_micro = preprocess_image_MA(image_path)

    # Add batch dimensions for predictions
    img_hard = np.expand_dims(img_hard, axis=0)
    img_soft = np.expand_dims(img_soft, axis=0)
    img_haem = np.expand_dims(img_haem, axis=0)
    img_micro = np.expand_dims(img_micro, axis=0)

    # Generate masks
    
    mask1 = model_hard_exudates.predict(img_hard, verbose=False)  # Predict mask
    mask1 = (mask1 > 0.1).astype(int)  # Convert to binary
    
    mask2 = model_soft_exudates.predict(img_soft, verbose=False)  # Predict mask
    mask2 = (mask2 > 0.1).astype(int)  # Convert to binary
    
    mask3 = model_haemorrhages.predict(img_haem, verbose=False)  # Predict mask
    mask3 = (mask3 > 0.1).astype(int)  # Convert to binary
    
    mask4 = model_microaneurysms.predict(img_micro, verbose=False)  # Predict mask
    mask4 = (mask4 > 0.1).astype(int)  # Convert to binary
    
    # make hard exudates mask red channel, soft exudates mask green channel, haemorrhages mask blue channel, microaneurysms mask alpha channel
    mask1 = tf.image.resize(mask1, size)
    mask2 = tf.image.resize(mask2, size)
    mask3 = tf.image.resize(mask3, size)
    mask4 = tf.image.resize(mask4, size)

    # Combine masks into a single feature map
    combined = np.concatenate([mask1, mask2, mask3, mask4])  # Shape: (H, W, 4)
    combined = np.transpose(combined, (1, 2, 0, 3))
    combined = np.squeeze(combined)
    
    return combined


In [None]:
# Define target sample sizes for each label
target_sizes = {
    0: 500,
    1: 500,
    2: 500,
    3: 500,
    4: 500
}

# Initialize a list to hold the sliced DataFrames
sliced_dfs = []

# Slice the DataFrame for each label
for label, size in target_sizes.items():
    class_df = df[df['label'] == label]
    if len(class_df) >= size:
        # Undersample if the class size is greater than or equal to the target size
        sliced_df = class_df.sample(size, random_state=42)
    else:
        # Oversample if the class size is smaller than the target size
        sliced_df = resample(class_df, replace=True, n_samples=size, random_state=42)
    sliced_dfs.append(sliced_df)

# Combine all sliced DataFrames
final_df = pd.concat(sliced_dfs)

# Shuffle the final dataset
final_df = shuffle(final_df, random_state=42)

final_df['label'].value_counts()

In [None]:
# Prepare dataset
dataset_path = 'path/to/DDR/DR_grading/all'

X_features = []
y_labels = []

for index, row in tqdm(final_df.iterrows(), total=len(final_df), desc="Processing rows"):
    image_path = row['image']
    label = row['label']
    
    image_path = os.path.join(dataset_path, image_path)
    
    combined_features = generate_feature_maps(image_path, size=(256, 256))
    X_features.append(combined_features)
    y_labels.append(label)

X = np.array(X_features)
y = np.array(y_labels)

X.shape, y.shape

In [7]:
y_present = (y > 0).astype(int)  # Binary: 0 (no disease), 1 (disease present)
y_grades = np.where(y_present == 1, y, 0)  # Multiclass: 1-4 if disease present, 0 otherwise
y_grades = to_categorical(y_grades, num_classes=5)

X_train, X_test, y_present_train, y_present_test, y_grades_train, y_grades_test = train_test_split(
    X, y_present, y_grades, test_size=0.2, random_state=42)

X_train, X_val, y_present_train, y_present_val, y_grades_train, y_grades_val = train_test_split(
    X_train, y_present_train, y_grades_train, test_size=0.1, random_state=42)

In [None]:
X_train.shape, X_val.shape, X_test.shape

In [None]:
y_present_train.shape, y_grades_train.shape, y_present_val.shape, y_grades_val.shape

In [11]:
# Input layer
input_layer = tf.keras.layers.Input(shape=(256, 256, 4))

# Shared convolutional layers
conv1 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(input_layer)
pool1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(pool1)
pool2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu')(pool2)
pool3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu')(pool3)
pool4 = tf.keras.layers.GlobalAveragePooling2D()(conv4)
# Fully connected layers
fc0 = tf.keras.layers.Dense(2048, activation='relu')(pool4)
drop0 = tf.keras.layers.Dropout(0.3)(fc0)
fc1 = tf.keras.layers.Dense(1024, activation='relu')(fc0)
drop1 = tf.keras.layers.Dropout(0.3)(fc1)
fc2 = tf.keras.layers.Dense(512, activation='relu')(drop1)
drop2 = tf.keras.layers.Dropout(0.3)(fc2)
fc3 = tf.keras.layers.Dense(256, activation='relu')(drop2)
drop3 = tf.keras.layers.Dropout(0.3)(fc3)
fc4 = tf.keras.layers.Dense(128, activation='relu')(drop3)
drop4 = tf.keras.layers.Dropout(0.3)(fc4)
fc5 = tf.keras.layers.Dense(64, activation='relu')(drop4)
drop5 = tf.keras.layers.Dropout(0.3)(fc5)
fc6 = tf.keras.layers.Dense(32, activation='relu')(drop5)
drop6 = tf.keras.layers.Dropout(0.3)(fc6)
fc7 = tf.keras.layers.Dense(16, activation='relu')(drop6)
drop7 = tf.keras.layers.Dropout(0.3)(fc7)

# Output 1: Presence of disease (binary classification)
present_output = tf.keras.layers.Dense(1, activation='sigmoid', name='present_output')(drop0)

# Output 2: Disease grading (multi-class classification, conditioned on disease presence)
grade_output = tf.keras.layers.Dense(5, activation='softmax', name='grading_output')(drop7)

classification_model = tf.keras.Model(inputs=input_layer, outputs=[present_output, grade_output])

# Compile the model
losses = {
    'present_output': 'binary_crossentropy',
    'grading_output': 'categorical_crossentropy'
}
loss_weights = {
    'present_output': 1.0,  # Weight for disease presence
    'grading_output': 1.0  # Weight for disease grading
}

# adam optimizer is used with a learning rate of 0.001
classification_model.compile(optimizer=Adam(learning_rate=0.001), loss=losses, loss_weights=loss_weights, metrics=['accuracy'])

classification_model.summary()

In [12]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=10,          # Number of epochs with no improvement before stopping
    restore_best_weights=True  # Restore weights from the epoch with the best validation loss
)

In [None]:
classification_model.fit(X_train, {'present_output': y_present_train, 'grading_output': y_grades_train}, 
                         batch_size=32, steps_per_epoch=int(len(X_train) / 32),
                         epochs=100, callbacks=[early_stopping],
                         validation_data=(X_val, {'present_output': y_present_val, 'grading_output': y_grades_val}),
                         verbose=1)

In [None]:
# Evaluate the model
scores = classification_model.evaluate(X_val, {'present_output': y_present_val, 'grading_output': y_grades_val})
print("Val Accuracy (Presence): %.2f%%" % (scores[3] * 100))  # Accuracy for present_output
print("Val Loss (Presence): %.2f" % scores[1])
print("Val Accuracy (Grading): %.2f%%" % (scores[4] * 100))  # Accuracy for grading_output
print("Val Loss (Grading): %.2f" % scores[2])

In [None]:
# Evaluate the model
scores = classification_model.evaluate(X_test, {'present_output': y_present_test, 'grading_output': y_grades_test})
print("Test Accuracy (Presence): %.2f%%" % (scores[3] * 100))  # Accuracy for present_output
print("Test Loss (Presence): %.2f" % scores[1])
print("Test Accuracy (Grading): %.2f%%" % (scores[4] * 100))  # Accuracy for grading_output
print("Test Loss (Grading): %.2f" % scores[2])

In [16]:
# Save the model
model_json = classification_model.to_json()
with open("../models_features/CNN_all_features.json", "w") as json_file:
    json_file.write(model_json)

# Saving the model and weights
classification_model.save_weights('../models_features/CNN_all_features.weights.h5')