Link to Kaggle dataset: https://www.kaggle.com/datasets/deathtrooper/multichannel-glaucoma-benchmark-dataset/data

In [1]:
import zipfile
import pandas as pd
import keras
import os
from keras.layers import Dense, Activation, Flatten, Dropout, GlobalAveragePooling2D, Conv2D, BatchNormalization, MaxPooling2D, Input, Concatenate, ReLU, AveragePooling2D, UpSampling2D, GlobalMaxPooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import SGD, Adam
from keras.preprocessing import image

In [2]:
# Unzip files for processing
with zipfile.ZipFile("full-fundus.zip","r") as zip_ref:
    zip_ref.extractall("full-fundus")
    
with zipfile.ZipFile("blood-vessel.zip","r") as zip_ref:
    zip_ref.extractall("blood-vessel")


In [3]:
# Read in metadata, and filter for a specific subset of images
# TODO: will add more variety of images later
image_data = pd.read_csv('metadata.csv')
image_data = image_data[image_data['names'].str.contains('FIVES', case=False)]
image_data = image_data[['types', 'fundus', 'names']]

In [4]:
# There are some invalid data types for relevant columns
print(image_data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 400 entries, 12049 to 12448
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   types   400 non-null    int64 
 1   fundus  400 non-null    object
 2   names   400 non-null    object
dtypes: int64(1), object(2)
memory usage: 12.5+ KB
None


In [5]:
# Change datatypes to the desired values
image_data['types'] = image_data['types'].astype('string')
image_data['fundus'] = image_data['fundus'].astype('string')
image_data['names'] = image_data['names'].astype('string')
image_data['image_names'] = image_data['names'] + '.png'
print(image_data['image_names'])

12049      FIVES-1.png
12050      FIVES-2.png
12051      FIVES-3.png
12052      FIVES-4.png
12053      FIVES-5.png
             ...      
12444    FIVES-396.png
12445    FIVES-397.png
12446    FIVES-398.png
12447    FIVES-399.png
12448    FIVES-400.png
Name: image_names, Length: 400, dtype: string


In [6]:
print(image_data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 400 entries, 12049 to 12448
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   types        400 non-null    string
 1   fundus       400 non-null    string
 2   names        400 non-null    string
 3   image_names  400 non-null    string
dtypes: string(4)
memory usage: 15.6 KB
None


In [7]:
# Randomize data 
image_data_random = image_data.sample(frac=1, random_state=1)

# Split into healthy and glaucoma positive sets
healthy = image_data_random[image_data_random['types'] == "0"]
glaucoma = image_data_random[image_data_random['types'] == "1"]

print(f"Number of healthy samples: {len(healthy)}")
print(f"Number of unhealthy samples: {len(glaucoma)}")

Number of healthy samples: 250
Number of unhealthy samples: 150


In [8]:
# Partition data into test and train sets
healthy_train_size = 175
glaucoma_train_size = 105

healthy_test_subset = image_data_random.head(len(healthy) - healthy_train_size)
glaucoma_test_subset = image_data_random.head(len(glaucoma) - glaucoma_train_size)
test_subset = pd.concat([healthy_test_subset, glaucoma_test_subset])

healthy_train_subset = image_data_random.tail(healthy_train_size)
glaucoma_train_subset = image_data_random.tail(glaucoma_train_size)
train_subset = pd.concat([healthy_train_subset, glaucoma_train_subset])

print(f"Test size: {len(test_subset)}")
print(f"Train size: {len(train_subset)}")

Test size: 120
Train size: 280


In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.mobilenet_v3 import preprocess_input 

train_data_generator = ImageDataGenerator(preprocessing_function= preprocess_input)

flow_train_data = train_data_generator.flow_from_dataframe(dataframe=train_subset, 
                                            batch_size= 8, 
                                            shuffle=True, 
                                            x_col="image_names", 
                                            y_col="types", 
                                            validate_filenames=True, 
                                            target_size=(224, 224), 
                                            directory='full-fundus/full-fundus/', 
                                            color_mode='rgb')

test_data_generator = ImageDataGenerator(preprocessing_function= preprocess_input)

flow_test_data = train_data_generator.flow_from_dataframe(dataframe=test_subset, 
                                            batch_size= 1, 
                                            shuffle=False, 
                                            x_col="image_names", 
                                            y_col="types", 
                                            validate_filenames=True, 
                                            target_size=(224, 224), 
                                            directory='full-fundus/full-fundus/', 
                                            color_mode='rgb')



Found 280 validated image filenames belonging to 2 classes.
Found 120 validated image filenames belonging to 2 classes.


In [10]:
from keras.layers import GlobalMaxPool2D

def conv_model(image_size):
    model_input = keras.Input(shape=(image_size, image_size, 3))
    transfer = keras.applications.MobileNetV3Large(
        weights='imagenet', include_top=False, input_tensor=model_input, alpha=0.75
    )

    model_output = Dense(2, activation='sigmoid')(Dropout(0.5)(GlobalMaxPool2D()(Conv2D(filters=256, kernel_size=1)(Dropout(0.5)(transfer.output)))))

    return keras.Model(inputs=model_input, outputs=model_output)

model = conv_model(image_size=224)
                     
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.8, patience=1, min_lr=1e-6)



model.compile(optimizer= Adam(1e-5), 
              loss='binary_crossentropy', 
              metrics=['binary_accuracy',keras.metrics.AUC(),keras.metrics.Precision(), keras.metrics.Recall()])


trained_model = model.fit(flow_train_data, 
                    steps_per_epoch= len(flow_train_data) // 8,
                    validation_data= flow_test_data, 
                    validation_steps= len(flow_test_data), 
                    epochs=8, 
                    callbacks=[reduce_lr])


Epoch 1/8


  self._warn_if_super_not_called()


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 802ms/step - auc: 0.5827 - binary_accuracy: 0.5063 - loss: 4.1198 - precision: 0.5045 - recall: 0.8917 - val_auc: 0.3353 - val_binary_accuracy: 0.4625 - val_loss: 1.6265 - val_precision: 0.4737 - val_recall: 0.6750 - learning_rate: 1.0000e-05
Epoch 2/8
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236ms/step - auc: 0.4259 - binary_accuracy: 0.5271 - loss: 4.5874 - precision: 0.5173 - recall: 0.8250 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 1.0000e-05
Epoch 3/8


2024-03-29 09:47:57.973967: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 587ms/step - auc: 0.4371 - binary_accuracy: 0.4250 - loss: 4.4563 - precision: 0.4479 - recall: 0.6375 - val_auc: 0.3356 - val_binary_accuracy: 0.4708 - val_loss: 1.5994 - val_precision: 0.4793 - val_recall: 0.6750 - learning_rate: 8.0000e-06
Epoch 4/8
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 237ms/step - auc: 0.5120 - binary_accuracy: 0.5688 - loss: 3.7787 - precision: 0.5463 - recall: 0.8083 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 6.4000e-06
Epoch 5/8


2024-03-29 09:48:00.937318: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 580ms/step - auc: 0.5143 - binary_accuracy: 0.5375 - loss: 3.8440 - precision: 0.5333 - recall: 0.7167 - val_auc: 0.3360 - val_binary_accuracy: 0.4792 - val_loss: 1.5811 - val_precision: 0.4850 - val_recall: 0.6750 - learning_rate: 5.1200e-06
Epoch 6/8
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 202ms/step - auc: 0.5909 - binary_accuracy: 0.6438 - loss: 3.3726 - precision: 0.5926 - recall: 0.9125 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 4.0960e-06
Epoch 7/8


2024-03-29 09:48:03.710004: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 596ms/step - auc: 0.4493 - binary_accuracy: 0.4396 - loss: 5.5110 - precision: 0.4521 - recall: 0.5958 - val_auc: 0.3383 - val_binary_accuracy: 0.4708 - val_loss: 1.5701 - val_precision: 0.4788 - val_recall: 0.6583 - learning_rate: 4.0960e-06
Epoch 8/8
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236ms/step - auc: 0.3614 - binary_accuracy: 0.4167 - loss: 5.5175 - precision: 0.4479 - recall: 0.7000 - val_auc: 0.0000e+00 - val_binary_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - learning_rate: 3.2768e-06


2024-03-29 09:48:06.640029: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
