# Evaluation of Transfer Learning

In [26]:
import os
import tensorflow as tf
from tensorflow.image import resize
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
import pandas as pd

from tensorflow.keras.utils import to_categorical
from keras.metrics import  Recall, CategoricalAccuracy
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import entropy
from sklearn.utils import resample


from helpers.help import *
np.random.seed(0)

# Read data

In [27]:
# load dataset
df = pd.read_csv('hr-dataset/full_df.csv')

#Left eye
# get the diagnostic of hypertensive retinopathy
ds_hr_left = df[df['Left-Diagnostic Keywords'].str.contains('hypertensive retinopathy', na=False)]
# get the diagnostic of diabetic retinopathy
ds_dr_left = df[df['Left-Diagnostic Keywords'].str.contains('diabetic retinopathy', na=False)]
# get the diagnostic of normal fundus
ds_normal_left = df[df['Left-Diagnostic Keywords'] == 'normal fundus']


#Right eye
# get the diagnostic of hypertensive retinopathy
ds_hr_right = df[df['Right-Diagnostic Keywords'].str.contains('hypertensive retinopathy', na=False)]
# get the diagnostic of diabetic retinopathy
ds_dr_right = df[df['Right-Diagnostic Keywords'].str.contains('diabetic retinopathy', na=False)]
# get the diagnostic of normal fundus
ds_normal_right = df[df['Right-Diagnostic Keywords'] == 'normal fundus']


# Specific dataframe
# Left eye
df_hr_left = ds_hr_left[['Left-Diagnostic Keywords', 'Left-Fundus']]
df_dr_left = ds_dr_left[['Left-Diagnostic Keywords', 'Left-Fundus']]
df_normal_left = ds_normal_left[['Left-Diagnostic Keywords', 'Left-Fundus']]

# Right eye
df_hr_right = ds_hr_right[['Right-Diagnostic Keywords', 'Right-Fundus']]
df_dr_right = ds_dr_right[['Right-Diagnostic Keywords', 'Right-Fundus']]
df_normal_right = ds_normal_right[['Right-Diagnostic Keywords', 'Right-Fundus']]


# Droping class
# Left eye
df_hr_left = df_hr_left.drop('Left-Diagnostic Keywords', axis=1)
df_dr_left = df_dr_left.drop('Left-Diagnostic Keywords', axis=1)
df_normal_left = df_normal_left.drop('Left-Diagnostic Keywords', axis=1)
# Right eye
df_hr_right = df_hr_right.drop('Right-Diagnostic Keywords', axis=1)
df_dr_right = df_dr_right.drop('Right-Diagnostic Keywords', axis=1)
df_normal_right = df_normal_right.drop('Right-Diagnostic Keywords', axis=1)


# Undersample
df_hr = pd.concat([df_hr_left, df_hr_right])
df_dr = pd.concat([df_dr_left, df_dr_right])
df_normal = pd.concat([df_normal_left, df_normal_right])

df_hr_downsampled = resample(df_hr, replace=False, n_samples=200, random_state=10)
df_dr_downsampled = resample(df_dr, replace=False, n_samples=165, random_state=10)
df_normal_downsampled = resample(df_normal, replace=False, n_samples=220, random_state=10)

# Class transformation

In [28]:
# Open Diabetic Retinopathy dataset
path = os.path.join(os.getcwd(),'hr-dataset/preprocessed_images')

# 0 - Normal
# 1 - Diabetic Rethinopaty
# 2 - Hipertensive Rethinopaty

# roam Hipertensive rethinopaty
array = []
detailPath = ""
for index, row in df_hr_downsampled.iterrows():
    if type(row['Left-Fundus']) != float:
        detailPath = os.path.join(path,row['Left-Fundus'])
    else:
        detailPath = os.path.join(path,row['Right-Fundus'])
    if(os.path.exists(detailPath)):
        array.append([detailPath,2])


# roam Diabeic rethinopaty
for index, row in df_dr_downsampled.iterrows():
    if type(row['Left-Fundus']) != float:
        detailPath = os.path.join(path,row['Left-Fundus'])
    else:
        detailPath = os.path.join(path,row['Right-Fundus'])
    if(os.path.exists(detailPath)):
        array.append([detailPath,1])

# roam no rethinopaty
for index, row in df_normal_downsampled.iterrows():
    if type(row['Left-Fundus']) != float:
        detailPath = os.path.join(path,row['Left-Fundus'])
    else:
        detailPath = os.path.join(path,row['Right-Fundus'])
    if(os.path.exists(detailPath)):
        array.append([detailPath,0])


    
# transforms the array into nparray
dataset=np.array(array)

np.size(dataset,0)

579

# Dataset division

In [29]:
X,y=dataset[::,0],dataset[::,1]
y = y.astype(int)

#One hot encode the labels
y = to_categorical(y)

#Shuffle the dataset (to make a unbiased model)
p = np.random.permutation(len(X))
X,y = X[p], y[p]

#Strip off 20% samples for hold out test set
test_idxs = np.random.choice(len(X), size=int(0.2*len(X)), replace=False, p=None)
x_test, y_test = X[test_idxs],y[test_idxs]

#Delete the test set samples from X,y 
X = np.delete(X, test_idxs)
y = np.delete(y, test_idxs, axis = 0)

#usual train-val split. We use 20% here just match the test set size to validation set.
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.11, random_state=42)


val_dataset=build_dataset(x_val,y_val,repeat=False,batch=16)
test_dataset=build_dataset(x_test,y_test,repeat=False,batch=16)

BATCH_SIZE=16
STEPS_PER_EPOCH=len(x_train)/BATCH_SIZE

train_dataset=build_dataset(x_train,y_train,batch=BATCH_SIZE)

# input shape for the model
input_shape=train_dataset.element_spec[0].shape[1:]

input_shape=train_dataset.element_spec[0].shape[1:]

for batch in train_dataset.take(1):
    features, labels = batch  # Unpack the tuple

# Model

In [30]:
# load the best model
model_tl = keras.models.load_model("model/model_transferlearning.keras")

# Evaluation

In [31]:
class_names = {0: 'Normal', 1: 'Diabetic Rethinopaty', 2: 'Hipertensive Rethinopaty'}

# Initialize lists to store predictions and true labels
y_pred = []
y_true = []

# Iterate over the test dataset
for x_batch, y_batch in test_dataset:
    # Predict probabilities for each batch
    y_test_proba = model_tl.predict(x_batch)

    # Convert probabilities to predicted class labels (0, 1, or 2)
    y_pred.extend(np.argmax(y_test_proba, axis=1))

    # Convert true labels from one-hot encoding to class labels (0, 1, or 2)
    y_true.extend(np.argmax(y_batch.numpy(), axis=1))

# Convert lists to numpy arrays
y_pred = np.array(y_pred)
y_true = np.array(y_true)

# Transform numerical labels into class names
y_pred_names = [class_names[label] for label in y_pred]
y_true_names = [class_names[label] for label in y_true]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


In [34]:
print(model_tl.evaluate(test_dataset, verbose=0,return_dict=True))

conf_matrix = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:\n", conf_matrix)

# Print classification report
print("\nClassification Report:\n", classification_report(y_true_names, y_pred_names, target_names=list(class_names.values())))

{'categorical_accuracy': 0.40869563817977905, 'loss': 1.0673002004623413}

Confusion Matrix:
 [[46  0  0]
 [30  0  1]
 [37  0  1]]

Classification Report:
                           precision    recall  f1-score   support

                  Normal       0.00      0.00      0.00        31
    Diabetic Rethinopaty       0.50      0.03      0.05        38
Hipertensive Rethinopaty       0.41      1.00      0.58        46

                accuracy                           0.41       115
               macro avg       0.30      0.34      0.21       115
            weighted avg       0.33      0.41      0.25       115



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
