In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import os
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount = True)

Mounted at /content/drive


In [None]:
# Define the CNN model
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add Dense layers on top
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

# Compile the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
def load_and_preprocess_image(path):
    with Image.open(path) as img:
        img = img.resize((32, 32)).convert('RGB')  # Resize and convert to RGB
        img_array = np.array(img) / 255.0  # Normalize to 0-1 range
        return img_array

image_folder = '/content/drive/My Drive/CS final/Base11'
image_files = os.listdir(image_folder)

images = []
for file in image_files:
    file_path = os.path.join(image_folder, file)
    img_array = load_and_preprocess_image(file_path)
    images.append(img_array)

images = np.array(images)

In [None]:
train_path = os.path.join(os.getcwd(), "drive", "My Drive","CS final","Annotation_Base11.xls")

In [None]:
train_labels = pd.read_excel(train_path)

In [None]:
train_labels.columns

Index(['Image name', 'Ophthalmologic department', 'Retinopathy grade',
       'Risk of macular edema '],
      dtype='object')

In [None]:
labels = np.array(train_labels["Risk of macular edema "])

In [None]:
labels

array([1, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 2, 1, 1, 0, 0, 1, 2,
       2, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2,
       2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 1,
       2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.33, random_state=42)


In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)

2/2 - 0s - loss: 1.2355 - accuracy: 0.5152 - 35ms/epoch - 17ms/step

Test accuracy: 0.5151515007019043


Conclusion: \\
Using a subset of the Messidor macular edema data, I was able to achieve a training accuracy of .6716 and a testing accuracy of .515. While I was inable to recreate the results from the Abramoff et al. paper, somethings that could be done to improve these results would be using the whole dataset for training and testing and then increasing the depth of the CNN which would be difficult for my project given my device's capacity and the time constraints, but I think even an simple CNN could achieve decent accuracy on this dataset, which in turn proves that these datasets can be not only used in clinical practices but also improved upon even further than the Abramoff et al. study does.