In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

# Load the dataset
data = pd.read_csv("train_test_DDR_APTOS.csv")

filtered_data = data[data["Data_source"] == "DDR"]
# Split the dataset into training and testing sets
train_data = filtered_data[filtered_data["Split"] == "Train"]
test_data = filtered_data[filtered_data["Split"] == "Test"]

# Define output directories
preprocessed_output_dir_train = "preprocessed_images_train/"
cropped_output_dir_train = "cropped_images_train/"
normalized_output_dir_train = "normalized_images_train/"
preprocessed_output_dir_test = "preprocessed_images_test/"
cropped_output_dir_test = "cropped_images_test/"
normalized_output_dir_test = "normalized_images_test/"

# Create directories if they don't exist
os.makedirs(preprocessed_output_dir_train, exist_ok=True)
os.makedirs(cropped_output_dir_train, exist_ok=True)
os.makedirs(normalized_output_dir_train, exist_ok=True)
os.makedirs(preprocessed_output_dir_test, exist_ok=True)
os.makedirs(cropped_output_dir_test, exist_ok=True)
os.makedirs(normalized_output_dir_test, exist_ok=True)


# Preprocessing function
def preprocess_image(image_path, output_dir):
    # Load and resize the image to 512x512
    resized_image = cv2.imread(image_path)

    # Convert to LAB color space
    lab = cv2.cvtColor(resized_image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)

    # Apply CLAHE to L channel
    clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(8, 8))
    l_eq = clahe.apply(l)

    # Merge back LAB channels
    lab_eq = cv2.merge((l_eq, a, b))
    enhanced_image = cv2.cvtColor(lab_eq, cv2.COLOR_LAB2BGR)

    # Image noise removal using Gaussian filter
    filtered_image = cv2.GaussianBlur(enhanced_image, (5, 5), 0)

    # Save the preprocessed image
    image_name = os.path.basename(image_path)
    output_path = os.path.join(output_dir, image_name)
    cv2.imwrite(output_path, filtered_image)

    return output_path


def crop_image(img, threshold=15, resize_flag=False, desired_size=(512, 512)):
    img_np = np.array(img)
    x_dim = img_np.shape[0]
    y_dim = img_np.shape[1]
    pixel_sums = img_np.sum(axis=2)
    x_arr = pixel_sums.sum(axis=1)
    y_arr = pixel_sums.sum(axis=0)
    x_start = np.where(x_arr > threshold * y_dim)[0][0]
    x_end = np.where(x_arr > threshold * y_dim)[0][-1]
    y_start = np.where(y_arr > threshold * x_dim)[0][0]
    y_end = np.where(y_arr > threshold * x_dim)[0][-1]
    new_img = img_np[x_start:x_end, y_start:y_end]
    new_img = Image.fromarray(new_img)
    if resize_flag:
        new_img = new_img.resize(desired_size)
    return new_img


def normalize_image(img):
    # Convert image to numpy array
    img_np = np.array(img)

    # Calculate mean and standard deviation (std) channel-wise
    mean_channels = np.mean(img_np, axis=(0, 1))
    std_channels = np.std(img_np, axis=(0, 1))

    # Normalize each channel separately
    normalized_image = np.zeros_like(img_np, dtype=np.float32)
    for channel in range(img_np.shape[2]):
        normalized_image[:, :, channel] = (
            img_np[:, :, channel] - mean_channels[channel]
        ) / std_channels[channel]

    # Scale values to be within [0, 255]
    normalized_image = (
        (normalized_image - np.min(normalized_image))
        / (np.max(normalized_image) - np.min(normalized_image))
        * 255
    )

    # Clip and return the normalized image
    normalized_image = np.clip(normalized_image, 0, 255)
    return normalized_image.astype(np.uint8)


X_train = []
y_train = []

# Load and preprocess each image
for img_name, label in zip(train_data["Image_ID"], train_data["Retinopathy_Grade"]):
    # Load the image
    image_path = "train_new/" + img_name

    # Preprocess the image
    preprocessed_img_path = preprocess_image(image_path, preprocessed_output_dir_train)

    # Crop and resize the image
    preprocessed_img = Image.open(preprocessed_img_path)
    cropped_resized_img = crop_image(
        preprocessed_img, resize_flag=True, desired_size=(512, 512)
    )

    # Save cropped image
    cropped_img_path = os.path.join(cropped_output_dir_train, img_name)
    cropped_resized_img.save(cropped_img_path)

    # Normalize the image
    normalized_img = normalize_image(cropped_resized_img)

    # Save normalized image
    normalized_img_path = os.path.join(normalized_output_dir_train, img_name)
    cv2.imwrite(normalized_img_path, cv2.cvtColor(normalized_img, cv2.COLOR_RGB2BGR))

    # Append to X_train and y_train
    X_train.append(normalized_img)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
X_test = []
y_test = []

# Load and preprocess each image
for img_name, label in zip(test_data["Image_ID"], test_data["Retinopathy_Grade"]):
    # Load the image
    image_path = "test_new/" + img_name

    # Preprocess the image
    preprocessed_img_path = preprocess_image(image_path, preprocessed_output_dir_test)

    # Crop and resize the image
    preprocessed_img = Image.open(preprocessed_img_path)
    cropped_resized_img = crop_image(
        preprocessed_img, resize_flag=True, desired_size=(512, 512)
    )

    # Save cropped image
    cropped_img_path = os.path.join(cropped_output_dir_test, img_name)
    cropped_resized_img.save(cropped_img_path)

    # Normalize the image
    normalized_img = normalize_image(cropped_resized_img)

    # Save normalized image
    normalized_img_path = os.path.join(normalized_output_dir_test, img_name)
    cv2.imwrite(normalized_img_path, cv2.cvtColor(normalized_img, cv2.COLOR_RGB2BGR))

    # Append to X_test and y_test
    X_test.append(normalized_img)
    y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)

### IGNORE THIS PART ONWARDS

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image

X_train = []
y_train = []

# Load the dataset
data = pd.read_csv("/workspaces/Diabetic-Retinopathy/train_test_DDR_APTOS.csv")

filtered_data = data[data["Data_source"] == "DDR"]
# Split the dataset into training and testing sets
train_data = filtered_data[filtered_data["Split"] == "Train"]
test_data = filtered_data[filtered_data["Split"] == "Test"]

# Define output directories
normalized_output_dir_train = (
    "/workspaces/Diabetic-Retinopathy/normalized_images_train/"
)
normalized_output_dir_test = "/workspaces/Diabetic-Retinopathy/normalized_images_test/"

X_train = []
y_train = []

# Load and preprocess each image
for img_name, label in zip(train_data["Image_ID"], train_data["Retinopathy_Grade"]):
    # Load the image
    print(img_name)
    image_path = normalized_output_dir_train + img_name
    print(image_path)
    normalized_img = cv2.imread(image_path)

    # Append to X_train and y_train
    X_train.append(normalized_img)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

In [None]:
X_test = []
y_test = []

# Load and preprocess each image
for img_name, label in zip(train_data["Image_ID"], train_data["Retinopathy_Grade"]):
    # Load the image
    image_path = normalized_output_dir_test + img_name
    normalized_img = cv2.imread(image_path)

    # Append to X_test and y_test
    X_test.append(normalized_img)
    y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)

In [None]:
from tensorflow.keras.layers import (
    Conv2D,
    MaxPooling2D,
    Flatten,
    Dense,
    BatchNormalization,
    ZeroPadding2D,
    Activation,
    Dropout,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import LearningRateScheduler
import math

# Define the CNN512 model architecture
model = Sequential()

# Input Layer (Zero Padding)
model.add(ZeroPadding2D(padding=(2, 2), input_shape=(512, 512, 3)))

# Layer 1, 2, 3
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 4
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 5, 6, 7
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 8
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 9, 10, 11
model.add(Conv2D(96, (3, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 12
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 13, 14, 15
model.add(Conv2D(96, (3, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 16
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 17, 18, 19
model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 20
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 21, 22, 23
model.add(Conv2D(200, (3, 3)))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 24
model.add(MaxPooling2D(pool_size=(2, 2)))

# Layer 25
model.add(Flatten())

# Layer 26 (Dropout)
model.add(Dropout(0.5))

# Layer 27, 28, 29
model.add(Dense(1000))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 30 (Dropout)
model.add(Dropout(0.5))

# Layer 31, 32, 33
model.add(Dense(500))
model.add(BatchNormalization())
model.add(Activation("relu"))

# Layer 34 (Dropout)
model.add(Dropout(0.5))

# Layer 35
model.add(Dense(4, activation="softmax"))  # Assuming 4 classes for Retinopathy grade

In [None]:
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.callbacks import LearningRateScheduler
import math

# Define other training parameters
batch_size = 32
epochs = 50
learning_rate = 1e-1
momentum = 0.9

# Compile the model
optimizer = SGD(learning_rate=learning_rate, momentum=momentum)
model.compile(
    optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
)

# Get class weights
class_weights = compute_class_weight(
    "balanced",
    np.unique(train_data["Retinopathy_Grade"]),
    train_data["Retinopathy_Grade"],
)


# Define learning rate scheduler
def lr_scheduler(epoch, lr):
    if epoch < 25:
        return lr
    else:
        return lr * math.exp(-0.1)


scheduler = LearningRateScheduler(lr_scheduler)

# Define data augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=35,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=15,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.25, 1.25],
)

# Fit the model with augmented data
history = model.fit(
    datagen.flow(
        X_train, y_train, batch_size=20 * batch_size
    ),  # Apply augmentation 20 times
    steps_per_epoch=len(X_train)
    / (20 * batch_size),  # Adjust steps_per_epoch accordingly
    epochs=epochs,
    class_weight=class_weights,
    callbacks=[scheduler],
)

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)