In [None]:
!pip install imutils
!pip install efficientnet
!pip install Pillow

In [None]:
import pandas as pd
import numpy as np 
from tqdm import tqdm
import os
import glob
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from PIL import Image


import matplotlib as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from efficientnet.tfkeras import EfficientNetB0
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
import warnings
warnings.filterwarnings('ignore')

In [None]:
from pathlib import Path

images_directory = Path('/gaussian_filtered_images/gaussian_filtered_images')
file_paths = list(images_directory.glob(r'**/*.png'))

labels = []
for filepath in filepaths:
    diagnosis_labels = [os.path.split(os.path.split(fp)[0])[1] for fp in file_paths]
    labels.append(label)


In [None]:
file_paths_series = pd.Series(file_paths, name='Filepaths').astype(str)
labels_series = pd.Series(diagnosis_labels, name='Diagnosis')

images_df = pd.concat([file_paths_series, labels_series], axis=1)
images_df = images_df.sample(frac=1).reset_index(drop=True)


images_df.head(4)
images_df['Diagnosis'].value_counts()

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
images_df['Diagnosis'] = label_encoder.fit_transform(images_df['Diagnosis'])


In [None]:
from sklearn.utils import resample

n_samples = 400
label_counts = df['Label'].value_counts()
balanced_dataframes = {}

for label in range(5):    
    df_subset = images_df[images_df['Diagnosis'] == label]
    if df_subset.shape[0] < n_samples:
        resampled_df = pd.concat([df_subset] * ((n_samples + df_subset.shape[0] - 1) // df_subset.shape[0]))
        resampled_df = resampled_df.sample(n_samples, replace=True, random_state=42)
    else:
        resampled_df = resample(df_subset, n_samples=n_samples, replace=True, random_state=42)
    
    balanced_dataframes[label] = resampled_df

balanced_images_df = pd.concat(balanced_dataframes.values())
balanced_images_df = balanced_images_df.sample(frac=1, random_state=42).reset_index(drop=True)

balanced_images_df['Diagnosis'].value_counts()


In [None]:
from glob import glob
from pathlib import Path

directory = '/kaggle/input/diabetic-retinopathy-224x224-gaussian-filtered/gaussian_filtered_images/gaussian_filtered_images/'

# Create a pattern to access all PNG files in the directory
pattern = os.path.join(directory, '*', '*.png')

# Use glob to get all file paths matching the pattern
image_paths = glob(pattern)

size = (224, 224)


def resize_image(file_path, target_size=(224, 224)):
    image = Image.open(file_path)
    image = image.resize(target_size)
    image_array = np.asarray(image)
    return image_array

balanced_images_df['ResizedImage'] = balanced_images_df['Filepaths'].apply(resize_image)



In [None]:
balanced_images_df['ResizedImage']

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots
sampled_images = balanced_df['image'].sample(6, random_state=42)
size = (224, 224)
sampled_images = [Image.fromarray(img).resize(size) for img in sampled_images]

sampled_images = [np.array(img) for img in sampled_images]

fig = make_subplots(rows=2, cols=3, subplot_titles=("Img 1", "Img 2", "Img 3", "Img 4", "Img 5", "Img 6"))

for i, img in enumerate(sampled_images, start=1):
    fig.add_trace(go.Image(z=img), row=(i-1) // 3 + 1, col=(i-1) % 3 + 1)

fig.update_layout(
    title="Sample Images From Dataset",
    width=800,
    height=600,
    margin=dict(l=20, r=20, t=60, b=20),
)

fig.show()

In [None]:
from tensorflow.keras.utils import to_categorical
images_array = np.asarray(balanced_images_df['ResizedImage'].tolist()) / 255.0
labels_array = to_categorical(balanced_images_df['Diagnosis'], num_classes=5)
X_train, X_test, Y_train, Y_test = train_test_split(images_array, labels_array, test_size=0.2, random_state=42, shuffle=True)


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.applications.densenet import DenseNet121
from keras.callbacks import EarlyStopping


es = EarlyStopping(monitor='val_accuracy', min_delta=0.005, patience=10, verbose=1, mode='auto')
vgg = DenseNet121(input_shape=(224, 224, 3), weights='imagenet', include_top=False)
vgg.trainable = False
x = Flatten()(vgg.output)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.4)(x)
prediction = Dense(5, activation='softmax')(x)
model = Model(inputs=vgg.input, outputs=prediction)
model.compile(Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
training_history = model.fit(X_train, Y_train, epochs=25, batch_size=8, validation_data=(X_test, Y_test), callbacks=[es])

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


In [None]:
import matplotlib.pyplot as plt

training_accuracy = training_history.history['accuracy']
training_loss = training_history.history['loss']
validation_accuracy = training_history.history['val_accuracy']
validation_loss = training_history.history['val_loss']

epoch_numbers = range(1, len(training_accuracy) + 1)

plt.figure(figsize=(20, 8))
plt.subplot(1, 2, 1)
plt.plot(epoch_numbers, training_loss, 'b-', label='Training Loss')
plt.plot(epoch_numbers, validation_loss, 'g-', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epoch_numbers, training_accuracy, 'b-', label='Training Accuracy')
plt.plot(epoch_numbers, validation_accuracy, 'g-', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()

