# Cataract detection 

In this notebook I have attempted to detect cataracts in an human eye.

## Necessary libraries

In [None]:
import numpy as np
import cv2
import os 
import pandas as pd
from random import sample

import seaborn as sns
import matplotlib.pyplot as plt
from scikitplot.metrics import plot_confusion_matrix as plt_con_mat

from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, Dense, Dropout, MaxPooling2D, Flatten
from keras.utils import plot_model

## Loading the data

In [None]:
path = "../input/ocular-disease-recognition-odir5k"
df = pd.read_csv(os.path.join(path, "full_df.csv"))
df.head()

In [None]:
file_names = []
labels = []

for text, label, file_name in zip(df["Left-Diagnostic Keywords"], df["C"], df["Left-Fundus"]):
    
    if(("cataract" in text) and (label == 1)):
        file_names.append(file_name)
        labels.append(1)
    
    elif(("normal fundus" in text) and (label == 0)):
        file_names.append(file_name)
        labels.append(0)
        
for text, label, file_name in zip(df["Right-Diagnostic Keywords"], df["C"], df["Right-Fundus"]):
    
    if(("cataract" in text) and (label == 1)):
        file_names.append(file_name)
        labels.append(1)
    
    elif(("normal fundus" in text) and (label == 0)):
        file_names.append(file_name)
        labels.append(0)

print(len(file_names), len(labels))

In [None]:
plt.bar([0,1], [len([i for i in labels if i == 1]), len([i for i in labels if i == 0])], color = ['r', 'g'])
plt.xticks([0, 1], ['Cataract', 'Normal'])
plt.show()

## Extracting the data into train and test sets.

In [None]:
ROW = 224
COL = 224

In [None]:
image_data = []
for idx, image_name in enumerate(file_names):
    
    img = cv2.imread(os.path.join(path,"preprocessed_images",image_name))
    try:
        img = cv2.resize(img, (ROW, COL))
        image_data.append(img)
    except:
        del labels[idx]
    
image_data = np.array(image_data)

print(image_data.shape)

In [None]:
temp = []
for idx, label in enumerate(labels):
    if label == 0:
        temp.append(idx)

temp = sample(temp, len([label for label in labels if label == 1]))

X_data = []
y_data = []

for idx in temp:
    X_data.append(image_data[idx])
    y_data.append(labels[idx])

temp = []
for idx, label in enumerate(labels):
    if label == 1:
        temp.append(idx)
        
for idx in temp:
    X_data.append(image_data[idx])
    y_data.append(labels[idx])

X_data = np.array(X_data)
    
y_data = np.array(y_data)
y_data = np.expand_dims(y_data, axis = -1)
y_data = to_categorical(y_data)

print(X_data.shape, y_data.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, shuffle = True, random_state = 1)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

## Examples of the images

In [None]:
c = 0
n = 0

cataract_images = []
normal_images = []

for idx, label in enumerate(y_data):

    if(n <= 5 and np.argmax(label) == 0):
        normal_images.append(idx)
        n += 1
    elif(c <= 5):
        cataract_images.append(idx)
        c += 1
        
    if(n == 5 and c == 5):
        break
        
fig, ax = plt.subplots(5, 2, figsize = (20, 20))

ax[0, 0].title.set_text("Cataract")
ax[0, 1].title.set_text("Normal")

for i in range(5):
    
    ax[i, 0].imshow(X_data[cataract_images[i]])
    ax[i, 0].axis('off')
    ax[i, 1].imshow(X_data[normal_images[i]])
    ax[i, 1].axis('off')

plt.show()

## CNN model using VGG19

### Transfer learning

In [None]:
vgg = VGG19(weights = "imagenet", include_top = False, input_shape=(ROW, COL, 3))
for layer in vgg.layers:
    layer.trainable = False

In [None]:
model = Sequential()
model.add(vgg)
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(2,activation = "softmax"))

model.summary()

In [None]:
plot_model(model, show_shapes=True, show_layer_names=True)

## Training the CNN model

In [None]:
model.compile(optimizer = 'adam', 
              loss = 'categorical_crossentropy', 
              metrics=['accuracy', 'Precision', 'Recall'])

history = model.fit(X_train, y_train, 
                    validation_data = (X_test, y_test), 
                    epochs = 15,
                    batch_size = 64)

## Model training performance

In [None]:
sns.set()
fig = plt.figure(0, (12, 4))

ax = plt.subplot(1, 2, 1)
sns.lineplot(history.epoch, history.history['accuracy'], label = 'train')
sns.lineplot(history.epoch, history.history['val_accuracy'], label = 'validation')
plt.title('Accuracy')
plt.tight_layout()

ax = plt.subplot(1, 2, 2)
sns.lineplot(history.epoch, history.history['loss'], label = 'train')
sns.lineplot(history.epoch, history.history['val_loss'], label = 'validation')
plt.title('Loss')
plt.tight_layout()

#plt.savefig('epoch_history.png')
plt.show()

In [None]:
preds = model.predict_classes(X_test)
y_true = np.argmax(y_test, axis=1)

plt_con_mat(y_true, preds, figsize=(14,14))
plt.show()