In [None]:
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, BatchNormalization
from keras.models import Sequential
from keras.utils import to_categorical
from keras.callbacks import LearningRateScheduler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
import cv2
import os
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

In [None]:

DATASET_PATH = r"./dataset/"
IMAGE_WIDTH = 256
IMAGE_HEIGHT = 256
EPOCHS = 30
BATCH_SIZE = 32
SEED = 42
dataset_folders = []
total = 0
print(f"There are {len(os.listdir(DATASET_PATH))} folder in dataset.")
for path in sorted(os.listdir(DATASET_PATH)):
    print(f"\t-There are {len(os.listdir(DATASET_PATH + path))} images in {path} folder.")
    total += len(os.listdir(DATASET_PATH + path))
    folder_path = DATASET_PATH + path
    dataset_folders.append(folder_path)

df = pd.DataFrame(0,
                  columns=['paths',
                           'class-label'],
                  index=range(total))
i = 0
for p, path in enumerate(dataset_folders):
    for sub_path in sorted(os.listdir(path)):
        df.iloc[i, 0] = path + "/" + sub_path
        df.iloc[i, 1] = p
        i += 1
print(df.sample(frac=1, random_state=SEED).head(10))

In [None]:
#normalizes the histogram of images
def normalize_histograms(img):
    image_copy = img.copy()
    for i in range(3):
        imi = img[:, :, i]
        minval = np.min(imi)
        maxval = np.max(imi)
        imrange = maxval - minval
        # imi-minval will turn the color range between 0-imrange, and the scaling will stretch the range between 0-255
        image_copy[:, :, i] = (255 / (imrange + 0.0001) * (imi - minval))
    return image_copy


#Cropping image to avoid from unuseful black areas
def deleteBlackAreas(filename):
    image_size = (IMAGE_WIDTH, IMAGE_HEIGHT)
    img = cv2.imread(filename)  #read image from file
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # convert to grayscale
    _, thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)  # turn it into a binary image
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # find contours
    if len(contours) != 0:
        #find the biggest area
        cnt = max(contours, key=cv2.contourArea)

        #find the bounding rect
        x, y, w, h = cv2.boundingRect(cnt)

        crop = img[y:y + h, x:x + w]  # crop image
        crop1 = cv2.cvtColor(cv2.resize(crop, image_size, interpolation=cv2.INTER_AREA),
                             cv2.COLOR_BGR2RGB)  # resize to image_size and change color space from BGR to RGB for matplotlib
        return crop1
    else:
        return cv2.resize(img, image_size, interpolation=cv2.INTER_AREA)

In [None]:
def create_dataset(df, mode=0):
    # Creating dataset
    images = []
    labels = []
    # df = df.sample(frac=1, random_state=SEED)
    for i in range(len(df)):
        filename = df.iloc[i]["paths"]
        print(f"{i}. step---> {filename}")
        img = deleteBlackAreas(filename)
        if mode == 1:
            img = normalize_histograms(img)
        label = df.iloc[i]["class-label"]

        imlr= cv2.flip(img,0)
        imud= cv2.flip(img,1)

        #add all the images an labels
        images.append(img)
        labels.append(label)
        images.append(imlr)
        labels.append(label)
        images.append(imud)
        labels.append(label)

        # imb=img+0.05*img # brighter image
        # imblr=cv2.flip(imb,0)
        # imbud=cv2.flip(imb,1)
        # images.append(imb)
        # labels.append(label)
        # images.append(imblr)
        # labels.append(label)
        # images.append(imbud)
        # labels.append(label)

        imd=img-0.075*img #deemer image
        imdlr=cv2.flip(imd,0)
        imdud=cv2.flip(imd,1)
        images.append(imd)
        labels.append(label)
        images.append(imdlr)
        labels.append(label)
        images.append(imdud)
        labels.append(label)
    print(f"{len(images)} images in images array and {len(labels)} labels in labels array")
    return np.array(images), np.array(labels)


def split_dataset(X, Y):
    # set aside 20% of train and test data for evaluation
    X_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=SEED)
    # Use the same function above for the validation set
    X_train = np.array(X_train)
    x_val = np.array(x_val)
    y_train = np.array(y_train)
    y_val = np.array(y_val)
    print("X_train shape: {}".format(X_train.shape))
    print("x_val shape: {}".format(x_val.shape))
    print("y_train shape: {}".format(y_train.reshape(-1, 1).shape))
    print("y_val shape: {}".format(y_val.reshape(-1, 1).shape))
    return [X_train, x_val, y_train, y_val]

In [5]:
X, y = create_dataset(df)


0. step---> ./dataset/1_normal/NL_001.png
1. step---> ./dataset/1_normal/NL_002.png
2. step---> ./dataset/1_normal/NL_003.png
3. step---> ./dataset/1_normal/NL_004.png
4. step---> ./dataset/1_normal/NL_005.png
5. step---> ./dataset/1_normal/NL_006.png
6. step---> ./dataset/1_normal/NL_007.png
7. step---> ./dataset/1_normal/NL_008.png
8. step---> ./dataset/1_normal/NL_009.png
9. step---> ./dataset/1_normal/NL_010.png
10. step---> ./dataset/1_normal/NL_011.png
11. step---> ./dataset/1_normal/NL_012.png
12. step---> ./dataset/1_normal/NL_013.png
13. step---> ./dataset/1_normal/NL_014.png
14. step---> ./dataset/1_normal/NL_015.png
15. step---> ./dataset/1_normal/NL_016.png
16. step---> ./dataset/1_normal/NL_017.png
17. step---> ./dataset/1_normal/NL_018.png
18. step---> ./dataset/1_normal/NL_019.png
19. step---> ./dataset/1_normal/NL_020.png
20. step---> ./dataset/1_normal/NL_021.png
21. step---> ./dataset/1_normal/NL_022.png
22. step---> ./dataset/1_normal/NL_023.png
23. step---> ./datase

In [None]:
[X_train, x_val, y_train, y_val] = split_dataset(X, y)

In [None]:
def plot_pie_sets(arrays):
    titles = ["Train Set", "Validation Set"]
    labels = ["Normal", "Cataract", "Glaucoma", "Retina Disease"]
    fig = plt.figure(figsize=(9, 5))
    plt.title("Distribution")
    plt.axis('off')
    plt.grid(False)
    for i in range(2):
        fig.add_subplot(1, 2, i + 1)
        arr = []
        for j in range(4):
            arr.append((arrays[i] == j).sum())
        labels = ["Normal", "Cataract", "Glaucoma", "Retina Disease"]
        print(arr)
        plt.title(titles[i])
        plt.pie(arr, labels=labels)


plot_pie_sets([y_train, y_val])

In [None]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=[256, 256, 3]),
    MaxPooling2D(),
    Conv2D(64, (2, 2), padding="same", activation='relu'),
    MaxPooling2D(),
    Conv2D(64, (2, 2), padding="same", activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(4, activation='softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=['accuracy'])


In [None]:
def convertPreds(y_pred):
    res_y_pred=[]
    for i in range(y_pred):
        res_y_pred.append(y_pred[i].argmax())
    return res_y_pred

In [None]:
# Define a StratifiedKFold object
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Iterate over the K folds
for train_index, test_index in skf.split(X_train, y_train):
    # Split the data into K folds
    X_fold_train, X_fold_test = X_train[train_index], X_train[test_index]
    y_fold_train, y_fold_test = y_train[train_index], y_train[test_index]

    # Train and evaluate the model on the fold
    model.fit(X_fold_train, y_fold_train,validation_data=(X_fold_test,y_fold_test),epochs=10)



In [None]:
# Evaluate the model on the test set
y_pred = convertPreds(model.predict(x_val))
accuracy = accuracy_score(y_val, y_pred)
print(f'Test set accuracy: {accuracy:.2f}')

history_vanilla = model.fit(X_train, y_train.flatten(), batch_size=BATCH_SIZE, epochs=EPOCHS,
                            validation_data=(x_val, y_val.flatten()))

In [None]:
def plot_history(history):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

plot_history(history_vanilla)

In [None]:
[X_train, x_val, y_train, y_val] = create_dataset(df, 1)