In [None]:
# Import necessary libraries
import tensorflow as tf
import numpy as np
import pandas as pd
from PIL import Image
import sys
from tensorflow.keras.utils import to_categorical
import functools
import mitdeeplearning as mdl
from tqdm import tqdm 
import matplotlib.pyplot as plt
import os

# Define the path and import your CNN model
os.chdir(r"C:\Users\janan\OneDrive\Desktop\AI\AI Project\Final Project\diabetic-retinopathy-detection\models_build")
get_ipython().run_line_magic('run', 'cnn.ipynb')

NameError: name 'build_model' is not defined

NameError: name 'build_model' is not defined

In [None]:
# Class to load and preprocess training data
class TrainingDatasetLoader(object):
    def __init__(self,nb_classes):
        print("Loading data into memory...")
        sys.stdout.flush()
        labels = pd.read_csv(r"C:\Users\janan\OneDrive\Desktop\AI\AI Project\Final Project\diabetic-retinopathy-detection\preprocessing\trainLabels_augmented.csv")
        self.y = np.array([1 if l >= 1 else 0 for l in labels['level']]) if (nb_classes == 2) else np.array([l for l in labels['level']])
        self.y = to_categorical(self.y, nb_classes)
        self.X_location = "C:/Users/janan/OneDrive/Desktop/AI/AI Project/Final Project/diabetic-retinopathy-detection/preprocessing/train_resized/resized-256/"
        self.size = self.y.shape[0]
        self.inds = np.random.permutation(np.arange(self.size))
        split = int(0.8 * self.size)  # 80:20 split
        self.X_train, self.X_val = self.inds[:split], self.inds[split:]
        self.lst_imgs = np.array([l for l in labels['train_image_name']])
        print("Done.")

    def get_train_size(self):
        return self.X_train.shape[0]

    def get_val_size(self):
        return self.X_val.shape[0]

    def get_train_steps_per_epoch(self, batch_size, factor=10):
        return self.get_train_size() // factor // batch_size

    def get_batch(self, n, src='t', return_inds=False):
        choice = self.X_train if (src == 't') else self.X_val
        selected_inds = np.random.choice(choice, size=n, replace=False)
        sorted_inds = np.sort(selected_inds)
        img_names = self.lst_imgs[sorted_inds]
        label = self.y[sorted_inds]
        img = np.array([np.array(Image.open(self.X_location + img)) for img in img_names]).astype(np.float32)
        img /= 255.0
        return (img, label, sorted_inds) if return_inds else (img, label)



In [None]:
# Initialize the loader with number of classes
nb_classes = 2
loader = TrainingDatasetLoader(nb_classes)
print('Training Size: ' + str(loader.get_train_size()) + ' images')
print('Validation Size: ' + str(loader.get_val_size()) + ' images')



In [None]:
# Define input shape
IMG_SHAPE = (256, 256, 3)
model = build_cnn(IMG_SHAPE, nb_classes)



In [None]:
# Metrics for evaluation
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_fscore_support

train_acc_results = []
val_acc_results = []
train_loss_results = []
val_loss_results = []



In [None]:
# Training hyperparameters
batch_size = 128
nb_epochs = 100
learning_rate = 0.001
validation_steps_per_epoch = 100

optimizer = tf.keras.optimizers.Adam(learning_rate)
loss_fn = tf.keras.losses.BinaryCrossentropy()



In [None]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss = loss_fn(y_true=y, y_pred=logits)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return logits, loss



In [None]:
@tf.function
def test_step(x, y):
    val_logits = model(x, training=False)
    return val_logits



In [None]:
for epoch in range(nb_epochs):
    y_true_train, y_pred_train = [], []
    y_true_val, y_pred_val = [], []

    # Training loop
    for _ in tqdm(range(loader.get_train_steps_per_epoch(batch_size, factor=20))):
        x, y = loader.get_batch(batch_size)
        logits, loss = train_step(x, y)
        train_loss_results.append(loss.numpy())
        y_true_train.extend(np.argmax(y, axis=1))
        y_pred_train.extend(np.argmax(logits.numpy(), axis=1))

    # Validation loop
    for _ in range(validation_steps_per_epoch):
        x, y = loader.get_batch(batch_size, 'v')
        val_logits = test_step(x, y)
        y_true_val.extend(np.argmax(y, axis=1))
        y_pred_val.extend(np.argmax(val_logits.numpy(), axis=1))

    # Calculate and log metrics for training
    report_train = classification_report(y_true_train, y_pred_train, output_dict=True)
    conf_matrix_train = confusion_matrix(y_true_train, y_pred_train)
    roc_auc_train = roc_auc_score(y_true_train, y_pred_train)
    precision_train, recall_train, f1_train, _ = precision_recall_fscore_support(y_true_train, y_pred_train, average='binary')

    train_acc_results.append(report_train['accuracy'])

    # Calculate and log metrics for validation
    report_val = classification_report(y_true_val, y_pred_val, output_dict=True)
    conf_matrix_val = confusion_matrix(y_true_val, y_pred_val)
    roc_auc_val = roc_auc_score(y_true_val, y_pred_val)
    precision_val, recall_val, f1_val, _ = precision_recall_fscore_support(y_true_val, y_pred_val, average='binary')

    val_acc_results.append(report_val['accuracy'])

    print(f"Epoch {epoch + 1}/{nb_epochs}")
    print(f"Training Metrics: Accuracy={report_train['accuracy']:.2f}, Precision={precision_train:.2f}, Recall={recall_train:.2f}, "
          f"F1 Score={f1_train:.2f}, AUC={roc_auc_train:.2f}")
    print(f"Validation Metrics: Accuracy={report_val['accuracy']:.2f}, Precision={precision_val:.2f}, Recall={recall_val:.2f}, "
          f"F1 Score={f1_val:.2f}, AUC={roc_auc_val:.2f}")



In [None]:
# Final averaged metrics after training
final_train_acc = np.mean(train_acc_results)
final_val_acc = np.mean(val_acc_results)
final_train_loss = np.mean(train_loss_results)
final_val_loss = np.mean(val_loss_results)

# Compute final metrics
final_train_precision, final_train_recall, final_train_f1, _ = precision_recall_fscore_support(y_true_train, y_pred_train, average='binary')
final_val_precision, final_val_recall, final_val_f1, _ = precision_recall_fscore_support(y_true_val, y_pred_val, average='binary')
final_train_auc = roc_auc_score(y_true_train, y_pred_train)
final_val_auc = roc_auc_score(y_true_val, y_pred_val)

# Print final results
print("\nFinal Metrics After Training:")
print(f"Final Training Accuracy: {final_train_acc:.4f}")
print(f"Final Validation Accuracy: {final_val_acc:.4f}")
print(f"Final Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Final Training Precision: {final_train_precision:.4f}")
print(f"Final Validation Precision: {final_val_precision:.4f}")
print(f"Final Training Recall: {final_train_recall:.4f}")
print(f"Final Validation Recall: {final_val_recall:.4f}")
print(f"Final Training F1 Score: {final_train_f1:.4f}")
print(f"Final Validation F1 Score: {final_val_f1:.4f}")
print(f"Final Training AUC: {final_train_auc:.4f}")
print(f"Final Validation AUC: {final_val_auc:.4f}")

# Plot metrics
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].plot(train_acc_results, label='Training Accuracy')
ax[0].plot(val_acc_results, label='Validation Accuracy')
ax[0].legend()
ax[0].set_title("Accuracy")

ax[1].plot(train_loss_results, label='Training Loss')
ax[1].plot(val_loss_results, label='Validation Loss')
ax[1].legend()
ax[1].set_title("Loss")

plt.show()
