In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout

In [None]:
from google.colab import drive
drive.mount('/content/drive')
drivePath = "/content/drive/MyDrive"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
filePath = drivePath + "/Cataract/"

In [None]:
df = pd.read_csv(filePath + "full_df.csv")
df

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,filepath,labels,target,filename
0,0,69,Female,0_left.jpg,0_right.jpg,cataract,normal fundus,0,0,0,1,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['N'],"[1, 0, 0, 0, 0, 0, 0, 0]",0_right.jpg
1,1,57,Male,1_left.jpg,1_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['N'],"[1, 0, 0, 0, 0, 0, 0, 0]",1_right.jpg
2,2,42,Male,2_left.jpg,2_right.jpg,laser spot，moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,1,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",2_right.jpg
3,4,53,Male,4_left.jpg,4_right.jpg,macular epiretinal membrane,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",4_right.jpg
4,5,50,Female,5_left.jpg,5_right.jpg,moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",5_right.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6387,4686,63,Male,4686_left.jpg,4686_right.jpg,severe nonproliferative retinopathy,proliferative diabetic retinopathy,0,1,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",4686_left.jpg
6388,4688,42,Male,4688_left.jpg,4688_right.jpg,moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",4688_left.jpg
6389,4689,54,Male,4689_left.jpg,4689_right.jpg,mild nonproliferative retinopathy,normal fundus,0,1,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",4689_left.jpg
6390,4690,57,Male,4690_left.jpg,4690_right.jpg,mild nonproliferative retinopathy,mild nonproliferative retinopathy,0,1,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",4690_left.jpg


In [None]:
def has_cataract(X):
  if "cataract" in X:
    return 1
  return 0

In [None]:
df["left_cataract"] = df["Left-Diagnostic Keywords"].apply(lambda x: has_cataract(x))
df["right_cataract"] = df["Right-Diagnostic Keywords"].apply(lambda x: has_cataract(x))

In [None]:
left_cataract = df.loc[(df.left_cataract == 1) & (df.C == 1)]["Left-Fundus"].values
right_cataract = df.loc[(df.right_cataract == 1) & (df.C == 1)]["Right-Fundus"].values

In [None]:
left_normal = df.loc[(df.C == 0) & (df["Left-Diagnostic Keywords"] == "normal fundus")]["Left-Fundus"].sample(250, random_state=42).values
right_normal = df.loc[(df.C == 0) & (df["Right-Diagnostic Keywords"] == "normal fundus")]["Right-Fundus"].sample(250, random_state=42).values

In [None]:
len(left_cataract), len(right_cataract)

(304, 290)

In [None]:
len(left_normal), len(right_normal)

(250, 250)

In [None]:
cataract = np.concatenate((left_cataract, right_cataract), axis=0)
normal = np.concatenate((left_normal, right_normal), axis=0)

In [None]:
len(cataract), len(normal)

(594, 500)

In [None]:
import random
random.shuffle(cataract)
random.shuffle(normal)

In [None]:
from tensorflow.keras.preprocessing.image import load_img,img_to_array
dataset_dir = filePath + "/preprocessed_images/"

In [None]:
len(dataset_dir)

53

In [None]:
from tqdm import tqdm

image_size=224
labels = []
dataset = []
def create_dataset(image_category,label):
    for img in tqdm(image_category):
        image_path = os.path.join(dataset_dir,img)
        try:
            image = cv2.imread(image_path,cv2.IMREAD_COLOR)
            image = cv2.resize(image,(image_size,image_size))
        except:
            continue
        
        dataset.append([np.array(image),np.array(label)])
    random.shuffle(dataset)
    return dataset
        

In [None]:
dataset = create_dataset(cataract,1)
dataset = create_dataset(normal,0)

100%|██████████| 594/594 [00:00<00:00, 326614.65it/s]
100%|██████████| 500/500 [00:00<00:00, 433833.68it/s]


In [None]:
plt.figure(figsize=(12,7))
for i in range(10):
    sample = random.choice(range(len(dataset)))
    image = dataset[sample][0]
    category = dataset[sample][1]
    if category== 0:
        label = "Normal"
    else:
        label = "Cataract"
    plt.subplot(2,5,i+1)
    plt.imshow(image)
    plt.xlabel(label)
plt.tight_layout()    

In [None]:
x = np.array([i[0] for i in dataset]).reshape(-1,image_size,image_size,3)
y = np.array([i[1] for i in dataset])

In [None]:
cataract = pd.DataFrame({
    "Image" : x.reshape(1088, 150528).ravel(),
    "Outcome" : y
}, index=[i for i in range(1088*150528)])

In [None]:
x_train, x_val_and_test, y_train, y_val_and_test = train_test_split(x, y, test_size=0.3)
x_val, x_test, y_val, y_test = train_test_split(x_val_and_test, y_val_and_test, test_size=0.5)

In [None]:
from tensorflow.keras.applications.vgg19 import VGG19
vgg = VGG19(weights="imagenet",include_top = False,input_shape=(image_size,image_size,3))

In [None]:
for layer in vgg.layers:
    layer.trainable = False

In [None]:
model = Sequential()
model.add(vgg)
model.add(Flatten())
model.add(Dense(1, activation="sigmoid"))

In [None]:
model.summary()

In [None]:
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
checkpoint = ModelCheckpoint("vgg19.h5",monitor="val_acc",verbose=1,save_best_only=True,
                             save_weights_only=False,save_freq='epoch')
earlystop = EarlyStopping(monitor="val_acc",patience=5,verbose=1)

In [None]:
history = model.fit(x_train,y_train,batch_size=32,epochs=5,validation_data=(x_val,y_val),
                    verbose=1,callbacks=[checkpoint,earlystop])

In [None]:
model.save("models/cataract_detection.h5")

In [None]:
model.save(filePath + "models/cataract_detection.h5")

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
y_prob = model.predict(x_test)
y_pred = np.argmax(y_prob, axis=-1)

In [None]:
y_pred = [0 if x <= 0.5 else 1 for x in y_prob]

In [None]:
y_pred = np.array(y_pred)

In [None]:
y_pred

In [None]:
y_test

In [None]:
from sklearn.metrics import accuracy_score
score=accuracy_score(y_test, y_pred)
score

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
cm = confusion_matrix(y_test,y_pred)
plot_confusion_matrix(conf_mat = cm,figsize=(8,7),
                      show_normed = True);

# labels = ["Normal","Cataract"]

In [None]:
plt.figure(figsize=(12,7))
for i in range(10):
    sample = random.choice(range(len(x_test)))
    image = x_test[sample]
    category = y_test[sample]
    pred_category = y_pred[sample]
    
    if category== 0:
        label = "Normal"
    else:
        label = "Cataract"
        
    if pred_category== 0:
        pred_label = "Normal"
    else:
        pred_label = "Cataract"
        
    plt.subplot(2,5,i+1)
    plt.imshow(image)
    plt.xlabel("Actual:{}\nPrediction:{}".format(label,pred_label))
plt.tight_layout() 

In [None]:
def predict(path, model):
  image = cv2.imread(path, cv2.IMREAD_COLOR)
  image = cv2.resize(image, (224,224))

  matrix224224 = np.array([image])
  probability = model.predict(matrix224224)
  prediction = 0 if probability < 0.5 else 1
  return prediction, (probability[0][0]*100)

In [None]:
pred_cat, probability = predict("cataract.jpg", model)
pred_cat

In [None]:
pred_norm, probability = predict("normal.jpg", model)
pred_norm