Link to Kaggle dataset: https://www.kaggle.com/datasets/deathtrooper/multichannel-glaucoma-benchmark-dataset/data

In [1]:
import zipfile
import pandas as pd
import keras
import os
import numpy as np
from keras.layers import Dense, Activation, Flatten, Dropout, GlobalAveragePooling2D, Conv2D, BatchNormalization, MaxPooling2D, Input, Concatenate, ReLU, AveragePooling2D, UpSampling2D, GlobalMaxPooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import SGD, Adam
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.mobilenet_v3 import preprocess_input 
from keras.layers import GlobalMaxPool2D

In [2]:
# Unzip files for processing
with zipfile.ZipFile("full-fundus.zip","r") as zip_ref:
    zip_ref.extractall("full-fundus")
    
with zipfile.ZipFile("blood-vessel.zip","r") as zip_ref:
    zip_ref.extractall("blood-vessel")


In [3]:
# Read in metadata, and filter for a specific subset of images
# TODO: will add more variety of images later
image_data = pd.read_csv('metadata.csv')
image_data = image_data[image_data['names'].str.contains('FIVES', case=True) | image_data['names'].str.contains('HAGIS', case=True)]
image_data = image_data[['types', 'fundus', 'names']]

In [4]:
# There are some invalid data types for relevant columns
print(image_data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 410 entries, 6631 to 12448
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   types   410 non-null    int64 
 1   fundus  410 non-null    object
 2   names   410 non-null    object
dtypes: int64(1), object(2)
memory usage: 12.8+ KB
None


In [5]:
# Change datatypes to the desired values
image_data['types'] = image_data['types'].astype('string')
image_data['fundus'] = image_data['fundus'].astype('string')
image_data['names'] = image_data['names'].astype('string')
image_data['image_names'] = image_data['names'] + '.png'
print(image_data['image_names'])

6631     DR-HAGIS-1.png
6632     DR-HAGIS-2.png
6633     DR-HAGIS-3.png
6634     DR-HAGIS-4.png
6635     DR-HAGIS-5.png
              ...      
12444     FIVES-396.png
12445     FIVES-397.png
12446     FIVES-398.png
12447     FIVES-399.png
12448     FIVES-400.png
Name: image_names, Length: 410, dtype: string


In [6]:
print(image_data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 410 entries, 6631 to 12448
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   types        410 non-null    string
 1   fundus       410 non-null    string
 2   names        410 non-null    string
 3   image_names  410 non-null    string
dtypes: string(4)
memory usage: 16.0 KB
None


In [7]:
# Randomize data 
image_data_random = image_data.sample(frac=1, random_state=1)

# Split into healthy and glaucoma positive sets
healthy = image_data_random[image_data_random['types'] == "0"]
glaucoma = image_data_random[image_data_random['types'] == "1"]

print(f"Number of healthy samples: {len(healthy)}")
print(f"Number of unhealthy samples: {len(glaucoma)}")

Number of healthy samples: 250
Number of unhealthy samples: 160


In [8]:
# Partition data into test and train sets
healthy_train_size = 175
glaucoma_train_size = 105

healthy_test_subset = image_data_random.head(len(healthy) - healthy_train_size)
glaucoma_test_subset = image_data_random.head(len(glaucoma) - glaucoma_train_size)
test_subset = pd.concat([healthy_test_subset, glaucoma_test_subset])

healthy_train_subset = image_data_random.tail(healthy_train_size)
glaucoma_train_subset = image_data_random.tail(glaucoma_train_size)
train_subset = pd.concat([healthy_train_subset, glaucoma_train_subset])

print(f"Test size: {len(test_subset)}")
print(f"Train size: {len(train_subset)}")

Test size: 130
Train size: 280


In [9]:
train_data_generator = ImageDataGenerator(preprocessing_function= preprocess_input)

flow_train_data = train_data_generator.flow_from_dataframe(dataframe=train_subset, 
                                            batch_size= 8, 
                                            shuffle=True, 
                                            x_col="image_names", 
                                            y_col="types", 
                                            validate_filenames=True, 
                                            target_size=(224, 224), 
                                            directory='full-fundus/full-fundus/', 
                                            color_mode='rgb')

test_data_generator = ImageDataGenerator(preprocessing_function= preprocess_input)

flow_test_data = train_data_generator.flow_from_dataframe(dataframe=test_subset, 
                                            batch_size= 1, 
                                            shuffle=False, 
                                            x_col="image_names", 
                                            y_col="types", 
                                            validate_filenames=True, 
                                            target_size=(224, 224), 
                                            directory='full-fundus/full-fundus/', 
                                            color_mode='rgb')



Found 280 validated image filenames belonging to 2 classes.
Found 130 validated image filenames belonging to 2 classes.


In [10]:
def conv_model(image_size):
    
    m_input = keras.Input(shape=(image_size, image_size, 3))
    transfer = keras.applications.MobileNetV3Large(
        weights='imagenet', include_top= False, input_tensor= m_input, alpha=0.75
    )
    m_output = Dropout(0.5)(transfer.output)
    m_output = Conv2D(filters=256, kernel_size=1)(m_output) 
    m_output = GlobalMaxPool2D()(m_output)
    m_output = Dropout(0.5)(m_output)
    m_output = Dense(2, activation='softmax')(m_output)

    return keras.Model(inputs=m_input, outputs=m_output)

model = conv_model(image_size=224)

# Reduce the learning rate if an epoch occurs where there is no improvement to the output of the loss function
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.8, patience=1, min_lr=1e-6)

# Using the Adam optimizer with binary cross entropy, compile the model using the given metrics
model.compile(optimizer= Adam(1e-5), 
              loss='binary_crossentropy', 
              metrics=['binary_accuracy',keras.metrics.AUC(),keras.metrics.Precision(), keras.metrics.Recall()])

trained_model = model.fit(flow_train_data, 
                    steps_per_epoch= len(flow_train_data) // 8,
                    validation_data= flow_test_data, 
                    validation_steps= len(flow_test_data), 
                    epochs=20, 
                    callbacks=[reduce_lr])

Epoch 1/20


  self._warn_if_super_not_called()


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 873ms/step - auc: 0.4747 - binary_accuracy: 0.4875 - loss: 4.7222 - precision: 0.4875 - recall: 0.4875 - val_auc: 0.3005 - val_binary_accuracy: 0.3538 - val_loss: 3.4263 - val_precision: 0.3538 - val_recall: 0.3538 - learning_rate: 1.0000e-05
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 216ms/step - auc: 0.4089 - binary_accuracy: 0.4167 - loss: 6.7208 - precision: 0.4167 - recall: 0.4167 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-05
Epoch 3/20


2024-03-29 14:12:04.698651: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 636ms/step - auc: 0.4475 - binary_accuracy: 0.4542 - loss: 4.9864 - precision: 0.4542 - recall: 0.4542 - val_auc: 0.3098 - val_binary_accuracy: 0.3846 - val_loss: 3.3431 - val_precision: 0.3846 - val_recall: 0.3846 - learning_rate: 8.0000e-06
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 199ms/step - auc: 0.2533 - binary_accuracy: 0.2792 - loss: 5.7350 - precision: 0.2792 - recall: 0.2792 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 6.4000e-06
Epoch 5/20


2024-03-29 14:12:07.649573: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 623ms/step - auc: 0.2666 - binary_accuracy: 0.2042 - loss: 6.4997 - precision: 0.2042 - recall: 0.2042 - val_auc: 0.3179 - val_binary_accuracy: 0.3846 - val_loss: 3.2877 - val_precision: 0.3846 - val_recall: 0.3846 - learning_rate: 5.1200e-06
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 196ms/step - auc: 0.5743 - binary_accuracy: 0.5542 - loss: 4.0969 - precision: 0.5542 - recall: 0.5542 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 4.0960e-06
Epoch 7/20


2024-03-29 14:12:10.507976: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 600ms/step - auc: 0.4787 - binary_accuracy: 0.4958 - loss: 4.6294 - precision: 0.4958 - recall: 0.4958 - val_auc: 0.3239 - val_binary_accuracy: 0.4077 - val_loss: 3.2488 - val_precision: 0.4077 - val_recall: 0.4077 - learning_rate: 4.0960e-06
Epoch 8/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 196ms/step - auc: 0.4932 - binary_accuracy: 0.5083 - loss: 4.9508 - precision: 0.5083 - recall: 0.5083 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 3.2768e-06
Epoch 9/20


2024-03-29 14:12:13.294844: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 190ms/step - auc: 0.5135 - binary_accuracy: 0.5208 - loss: 4.9814 - precision: 0.5208 - recall: 0.5208

2024-03-29 14:12:13.881260: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 551ms/step - auc: 0.5114 - binary_accuracy: 0.5156 - loss: 4.6789 - precision: 0.5156 - recall: 0.5156 - val_auc: 0.3291 - val_binary_accuracy: 0.4077 - val_loss: 3.2238 - val_precision: 0.4077 - val_recall: 0.4077 - learning_rate: 2.6214e-06
Epoch 10/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 227ms/step - auc: 0.4732 - binary_accuracy: 0.5375 - loss: 4.5151 - precision: 0.5375 - recall: 0.5375 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.6214e-06
Epoch 11/20


2024-03-29 14:12:16.287599: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 598ms/step - auc: 0.5463 - binary_accuracy: 0.4667 - loss: 3.8188 - precision: 0.4667 - recall: 0.4667 - val_auc: 0.3347 - val_binary_accuracy: 0.4077 - val_loss: 3.2003 - val_precision: 0.4077 - val_recall: 0.4077 - learning_rate: 2.0972e-06
Epoch 12/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 199ms/step - auc: 0.5019 - binary_accuracy: 0.4750 - loss: 4.2038 - precision: 0.4750 - recall: 0.4750 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 2.0972e-06
Epoch 13/20


2024-03-29 14:12:19.069304: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 664ms/step - auc: 0.5147 - binary_accuracy: 0.5167 - loss: 5.0180 - precision: 0.5167 - recall: 0.5167 - val_auc: 0.3428 - val_binary_accuracy: 0.4077 - val_loss: 3.1790 - val_precision: 0.4077 - val_recall: 0.4077 - learning_rate: 1.6777e-06
Epoch 14/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 199ms/step - auc: 0.4897 - binary_accuracy: 0.4667 - loss: 4.0900 - precision: 0.4667 - recall: 0.4667 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.3422e-06
Epoch 15/20


2024-03-29 14:12:22.057989: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 601ms/step - auc: 0.3825 - binary_accuracy: 0.3792 - loss: 5.4520 - precision: 0.3792 - recall: 0.3792 - val_auc: 0.3498 - val_binary_accuracy: 0.4077 - val_loss: 3.1622 - val_precision: 0.4077 - val_recall: 0.4077 - learning_rate: 1.0737e-06
Epoch 16/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 193ms/step - auc: 0.4698 - binary_accuracy: 0.5000 - loss: 4.0032 - precision: 0.5000 - recall: 0.5000 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-06
Epoch 17/20


2024-03-29 14:12:24.835847: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 603ms/step - auc: 0.5905 - binary_accuracy: 0.5125 - loss: 3.5563 - precision: 0.5125 - recall: 0.5125 - val_auc: 0.3557 - val_binary_accuracy: 0.4077 - val_loss: 3.1473 - val_precision: 0.4077 - val_recall: 0.4077 - learning_rate: 1.0000e-06
Epoch 18/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 137ms/step - auc: 0.3258 - binary_accuracy: 0.4219 - loss: 4.6117 - precision: 0.4219 - recall: 0.4219 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-06
Epoch 19/20


2024-03-29 14:12:27.443485: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-03-29 14:12:27.449850: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 608ms/step - auc: 0.5750 - binary_accuracy: 0.6417 - loss: 4.2815 - precision: 0.6417 - recall: 0.6417 - val_auc: 0.3621 - val_binary_accuracy: 0.4231 - val_loss: 3.1358 - val_precision: 0.4231 - val_recall: 0.4231 - learning_rate: 1.0000e-06
Epoch 20/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 210ms/step - auc: 0.5051 - binary_accuracy: 0.5125 - loss: 4.3681 - precision: 0.5125 - recall: 0.5125 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-06


2024-03-29 14:12:30.542519: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


In [11]:
# Demonstration:
img_path = 'full-fundus/full-fundus/OIA-ODIR-TEST-ONLINE-252.png'
img = image.load_img(img_path, target_size=(224, 224))
 
img_array_representation = np.expand_dims(image.img_to_array(img), axis=0)
img_array_representation = preprocess_input(img_array_representation)  # Assuming preprocess_input is defined in your code

prediction = model.predict(img_array_representation)
print(prediction)
# Interpret the prediction
if prediction[0][0] > prediction[0][1]:
    print(f"Prediction: Glaucoma, with {prediction[0][0] * 100}% confidence")
else:
    print(f"Prediction: Healthy, with {prediction[0][1] * 100}% confidence")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step
[[0.14959124 0.85040873]]
Prediction: Healthy, with 85.04087328910828% confidence
