In [1]:
import os
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf

from sklearn.utils import shuffle, resample
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from keras.utils import load_img, img_to_array, to_categorical
from keras.models import model_from_json, Model

In [2]:
# Define constants
IMAGE_SIZE = (256, 256)  # Example size, adjust to your models
NUM_CLASSES = 2

In [3]:
# OD and OD
with open('../models_segmentation/ODOC.json', 'r') as json_file:
    model_json = json_file.read()
model = model_from_json(model_json)
model.load_weights('../models_segmentation/ODOC.weights.h5')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

2024-12-09 11:38:50.985952: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-12-09 11:38:50.986083: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M2 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



In [4]:
# Define function to preprocess images
def preprocess_image(image_path):
    img = load_img(image_path, target_size=IMAGE_SIZE)
    img = img_to_array(img)
    img = img / 255.0
    return img

def generate_feature_maps(image_path):
    # Apply the specific preprocessing method for each model
    img = preprocess_image(image_path)
    img = np.expand_dims(img, axis=0)
    
    mask = model.predict(img, verbose=False)  # Predict mask    
    mask_discrete = np.zeros_like(mask)  # Initialize with zeros
    mask_discrete[(mask >= 0.25) & (mask < 0.75)] = 0.5  # Set to 0.5 where within range
    mask_discrete[mask >= 0.75] = 1
    # Combine masks into a single feature map
    combined = np.transpose(mask_discrete, (1, 2, 0, 3))
    combined = np.squeeze(combined)
    
    return combined

In [5]:
# Function to load images and labels - ORIGA
def load_data(img_path, csv_path):
    df = pd.read_csv(csv_path)
    X, y = [], []
    
    for row in tqdm(df.itertuples(), total=len(df)):
        img = generate_feature_maps(os.path.join(img_path, row[2])) 
        X.append(img)
        y.append(row[5])
        
    return np.array(X), np.array(y)

def load_balanced_data(img_path, csv_path):
    df = pd.read_csv(csv_path)
    X, y = [], []

    # Define target sample sizes for each label
    target_sizes = {
        0: 200,
        1: 200
    }

    # Initialize a list to hold the sliced DataFrames
    sliced_dfs = []

    # Slice the DataFrame for each label
    for label, size in target_sizes.items():
        class_df = df[df['Glaucoma'] == label]
        if len(class_df) >= size:
            # Undersample if the class size is greater than or equal to the target size
            sliced_df = class_df.sample(size, random_state=42)
        else:
            # Oversample if the class size is smaller than the target size
            sliced_df = resample(class_df, replace=True, n_samples=size, random_state=42)
        sliced_dfs.append(sliced_df)

    # Combine all sliced DataFrames
    final_df = pd.concat(sliced_dfs)

    # Shuffle the final dataset
    final_df = shuffle(final_df, random_state=42)
    
    for row in tqdm(final_df.itertuples(), total=len(final_df)):
        img = generate_feature_maps(os.path.join(img_path, row[2]))
        X.append(img)
        y.append(row[5])
        
    return np.array(X), np.array(y)

# Function to load images and labels - REFUGE
def load_dataset_refuge(train_dir, val_dir):
    X = []
    y = []
    
    train_img_dir = os.path.join(train_dir, 'Images')
    df_train = pd.read_csv(os.path.join(train_dir, 'output.csv'))
    
    val_img_dir = os.path.join(val_dir, 'Images')
    df_val = pd.read_csv(os.path.join(val_dir, 'output.csv'))
    
    # iterate through the train df
    for row in tqdm(df_train.itertuples(), total=len(df_train)):
        img_path = os.path.join(train_img_dir, row[1])
        img = generate_feature_maps(img_path)
        X.append(img)
        y.append(row[6])
    
    # iterate through the val df
    for row in tqdm(df_val.itertuples(), total=len(df_val)):
        img_path = os.path.join(val_img_dir, row[1])
        img = generate_feature_maps(img_path)
        X.append(img)
        y.append(row[2])

    return np.array(X), np.array(y)

# Function to load images and labels - ORIGA
def load_data_origa(img_path, csv_path):
    df = pd.read_csv(csv_path)
    X, y = [], []
    
    for row in tqdm(df.itertuples(), total=len(df)):
        img = generate_feature_maps(os.path.join(img_path, row[2])) 
        X.append(img)
        y.append(row[5])
        
    return np.array(X), np.array(y)

In [6]:
csv_path = 'path/to/ORIGA/OrigaList.csv'
img_path = 'path/to/ORIGA/Images'

X, y = load_data_origa(img_path, csv_path)

# train test validation split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=NUM_CLASSES)
y_val = to_categorical(y_val, num_classes=NUM_CLASSES)
y_test = to_categorical(y_test, num_classes=NUM_CLASSES)

print(f"Train set: {X_train.shape}, {y_train.shape}")
print(f"Validation set: {X_val.shape}, {y_val.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")

  0%|          | 0/650 [00:00<?, ?it/s]2024-12-09 11:39:17.172029: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2024-12-09 11:39:17.250741: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
100%|██████████| 650/650 [00:42<00:00, 15.17it/s]


Train set: (455, 256, 256), (455, 2)
Validation set: (98, 256, 256), (98, 2)
Test set: (97, 256, 256), (97, 2)


In [7]:
classification_model = tf.keras.Sequential([
    # First Conv2D layer
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),  # Optional pooling layer

    # Second Conv2D layer
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),  # Optional pooling layer

    # Third Conv2D layer
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    
    tf.keras.layers.Conv2D(256, (3, 3), activation='relu'),

    # Global pooling to reduce to 2D tensor
    tf.keras.layers.GlobalAveragePooling2D(),

    # Fully connected layers
    tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(2, activation='softmax')  # Output layer with 5 classes
])

classification_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
classification_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 62, 62, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 60, 60, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 30, 30, 128)      0

In [8]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=20,          # Number of epochs with no improvement before stopping
    restore_best_weights=True  # Restore weights from the epoch with the best validation loss
)

In [9]:
# Train the model
history = classification_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    batch_size=32,
    epochs=200,
    callbacks=[early_stopping]
)

Epoch 1/200


2024-12-09 11:40:32.726778: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2024-12-09 11:40:37.902329: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

In [10]:
# Evaluate the model on the val set
val_loss, val_accuracy = classification_model.evaluate(X_val, y_val, verbose=2)
print(f"Val Accuracy: {val_accuracy * 100:.2f}%")
print(f"Val Loss: {val_loss:.4f}")

4/4 - 0s - loss: 0.5359 - accuracy: 0.7041 - 348ms/epoch - 87ms/step
Val Accuracy: 70.41%
Val Loss: 0.5359


In [11]:
# Evaluate the model on the test set
test_loss, test_accuracy = classification_model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

4/4 - 1s - loss: 0.4940 - accuracy: 0.7629 - 672ms/epoch - 168ms/step
Test Accuracy: 76.29%
Test Loss: 0.4940


In [14]:
# Save the model
model_json = classification_model.to_json()
with open("../models_features/CNN_ODOC.json", "w") as json_file:
    json_file.write(model_json)

# Saving the model and weights
classification_model.save_weights('../models_features/CNN_ODOC.weights.h5')