# Database from
https://www.kaggle.com/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data?select=colored_images

# Install all the needed libraries

In [1]:
%pip install matplotlib
%pip install scikit-learn
%pip install scipy
%pip install tensorflow
%pip install numpy
%pip install pandas
%pip install setuptools

Collecting matplotlib
  Using cached matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.55.0-cp312-cp312-macosx_10_13_universal2.whl.metadata (164 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.3 kB)
Collecting numpy>=1.23 (from matplotlib)
  Downloading numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pillow>=8 (from matplotlib)
  Using cached pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.1 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.2.0-py3-none-any.whl.metadata (5.0 kB)
Using cached matplotlib-3.9.2-cp312-cp312-ma

# Imports

In [2]:
import os
import tensorflow as tf
from tensorflow.image import resize
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical
from keras.metrics import  Recall, CategoricalAccuracy
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import entropy

from helpers.help import *
np.random.seed(0)

# Dataset pre-processing

In [3]:
# Open Diabetic Retinopathy dataset
path = os.path.join(os.getcwd(),'gaussian_ds')
label_dict={'Mild':1,'Moderate':1,'Proliferate_DR':1,'Severe':1,'No_DR':0}

# remove macOS file
folders = os.listdir(path)
folders.remove('.DS_Store')

# get all the samples
array = []
for i in folders:
    detailPath = os.path.join(path,i)
    for j in os.listdir(detailPath):
        array.append([os.path.join(detailPath,j),label_dict[i.split('.')[0]]])

# transforms the array into nparray
dataset=np.array(array)

np.size(dataset,0)

3662

In [4]:
X,y=dataset[::,0],dataset[::,1]
y = y.astype(int)

#One hot encode the labels
y = to_categorical(y)

#Shuffle the dataset (to make a unbiased model)
p = np.random.permutation(len(X))
X,y = X[p], y[p]

#Strip off 10% samples for hold out test set
test_idxs = np.random.choice(len(X), size=int(0.1*len(X)), replace=False, p=None)
x_test, y_test = X[test_idxs],y[test_idxs]

#Delete the test set samples from X,y 
X = np.delete(X, test_idxs)
y = np.delete(y, test_idxs, axis = 0)

#usual train-val split. We use 11% here just match the test set size to validation set.
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.11, random_state=42)

In [5]:
print(f"Samples in Training set: {x_train.shape[0]}")
print(f"Samples in Validation set: {x_val.shape[0]}")
print(f"Samples in Test set: {x_test.shape[0]}")

Samples in Training set: 2933
Samples in Validation set: 363
Samples in Test set: 366


In [6]:
# Check if imbalance
for i in [y_train, y_test, y_val]:
    print(np.unique(i, return_counts = True, axis = 0))

(array([[0., 1.],
       [1., 0.]]), array([1506, 1427]))
(array([[0., 1.],
       [1., 0.]]), array([171, 195]))
(array([[0., 1.],
       [1., 0.]]), array([180, 183]))


# Prepares the data to feed to the model

In [7]:
#We use the helper function to convert the data into tensorflow dataset objects. Note that , the repeat flag needs
#to be set only for the train set , which by default is true.
#The buid_dataset is a custom function that returns tensor batches
# Prepares the datasets for train, validation and testing

val_dataset=build_dataset(x_val,y_val,repeat=False,batch=64)
test_dataset=build_dataset(x_test,y_test,repeat=False,batch=64)

BATCH_SIZE=32
STEPS_PER_EPOCH=len(x_train)/BATCH_SIZE

train_dataset=build_dataset(x_train,y_train,batch=BATCH_SIZE)

# input shape for the model
input_shape=train_dataset.element_spec[0].shape[1:]

print(input_shape)

(224, 224, 3)


# Define model architecture (the CNN)

In [8]:
model=simple_model(input_shape)
model.compile(
        loss = "categorical_crossentropy",
        optimizer = Adam(),
        metrics=[CategoricalAccuracy()]
    )
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Model training

In [9]:
# saves the model with the lowest validation Loss
checkpoint=ModelCheckpoint(filepath='model/model_baseline.keras',
                           monitor='val_loss',save_best_only=True,verbose=1)

# logs the training progress to a CSV
csv_logger=keras.callbacks.CSVLogger('logger/trainlog_baseline.csv',
                                     separator=',',append=False)

# defines a early stop if in 10 epoches the validation loss dont improve
early_stopper=keras.callbacks.EarlyStopping(monitor='val_loss',
                                            min_delta=0.001,
                                            restore_best_weights=True,
                                            patience=10)

callbacks_list=[checkpoint,early_stopper,csv_logger]

# EPOCHS = 20 # minimalist
# EPOCHS = 200 # standard
EPOCHS = 100 # standard
model.fit(train_dataset,steps_per_epoch=int(STEPS_PER_EPOCH),epochs=EPOCHS,
          validation_data=val_dataset,validation_steps=None,
          callbacks=callbacks_list)

Epoch 1/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - categorical_accuracy: 0.8763 - loss: 0.4775
Epoch 1: val_loss improved from inf to 40.15501, saving model to model/model_baseline.keras
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 1s/step - categorical_accuracy: 0.8765 - loss: 0.4759 - val_categorical_accuracy: 0.4959 - val_loss: 40.1550
Epoch 2/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - categorical_accuracy: 0.9297 - loss: 0.2193
Epoch 2: val_loss improved from 40.15501 to 16.56611, saving model to model/model_baseline.keras
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - categorical_accuracy: 0.9297 - loss: 0.2193 - val_categorical_accuracy: 0.4959 - val_loss: 16.5661
Epoch 3/100
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - categorical_accuracy: 0.9249 - loss: 0.2080
Epoch 3: val_loss improved from 16.56611 to 3.32515, saving model to mode

<keras.src.callbacks.history.History at 0x1454e5160>

# Evaluation

In [10]:
# load the best model, trained before
model = keras.models.load_model("model/model_baseline.keras")
print("-" * 100)

# evaluates with the test_dataset
print(model.evaluate(test_dataset, verbose=0,return_dict=True))

----------------------------------------------------------------------------------------------------
{'categorical_accuracy': 0.46721312403678894, 'loss': 3.36596941947937}


# Measuring Uncertainties
In this section we evaluate the three metrics to measure uncertainty. We use the formula to find out the prediction probabilities of the 10 test samples with most uncertainty.

In [11]:
y_test_proba = model.predict(test_dataset)  

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 401ms/step


Now that we have the prediction probabilities of the entire test set, we can apply the formula to calculate the uncertainty metric and select the top 10 uncertain samples.
Let us start with Least Confidence or LC


P_imax is the maximum probability of the i_th sample

In [12]:
#Calculate Least Confidence
y_test_uncert = 1 - y_test_proba.max(axis=1)
#Indices of the top 10 Least Confidence
y_test_top_lc = np.argsort(y_test_uncert)[-10:]
#Print the predictions for the top 10 least confidence
print(y_test_proba[y_test_top_lc])

[[0.00429254 0.99570745]
 [0.004318   0.995682  ]
 [0.00435345 0.99564654]
 [0.00436991 0.9956301 ]
 [0.00466951 0.9953306 ]
 [0.0054029  0.99459714]
 [0.00600469 0.9939953 ]
 [0.00663485 0.99336517]
 [0.0075765  0.99242353]
 [0.0080883  0.9919117 ]]


Margin of confidence of a sample is given by the 1st and 2nd highest prediction probability of a sample

In [13]:
part = np.partition(-y_test_proba, 1, axis=1)
# margin calculation
margin = - part[:, 0] + part[:, 1]
# indices of the lowest margin scores
y_test_least_mc = np.argsort(margin)[:10]
#Print the predictions for the 10 least margins
print(y_test_proba[y_test_least_mc])

[[0.0080883  0.9919117 ]
 [0.0075765  0.99242353]
 [0.00663485 0.99336517]
 [0.00600469 0.9939953 ]
 [0.0054029  0.99459714]
 [0.00466951 0.9953306 ]
 [0.00436991 0.9956301 ]
 [0.00435345 0.99564654]
 [0.004318   0.995682  ]
 [0.00429254 0.99570745]]


Finally entropy of the i_th sample is given by. Thankfully , we don't have to write the code for this calculation , as scipy provides a neat method called entropy to do precisely that.

In [14]:
#indices of the predictions with 10 largest entropies
y_test_max_ents = np.argsort(entropy(y_test_proba.T))[-10:]
#Print the 10 predictions with largest entropies
print(y_test_proba[y_test_max_ents])

[[0.00429254 0.99570745]
 [0.004318   0.995682  ]
 [0.00435345 0.99564654]
 [0.00436991 0.9956301 ]
 [0.00466951 0.9953306 ]
 [0.0054029  0.99459714]
 [0.00600469 0.9939953 ]
 [0.00663485 0.99336517]
 [0.0075765  0.99242353]
 [0.0080883  0.9919117 ]]
