---

<h1 style="text-align: center;font-size: 40px;">Cataract Classification Model</h1>

---

In [None]:
# Importing Libraies
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import tensorflow as tf

In [None]:
df = pd.read_csv("/kaggle/input/ocular-disease-recognition-odir5k/full_df.csv")
df.head(3)

> <h3> Extracting Cataract & Normal information from the Dataset </h3>

In [None]:
def has_cataract(text):
    if "cataract" in text:
        return 1
    else:
        return 0

In [None]:
df["left_cataract"] = df["Left-Diagnostic Keywords"].apply(lambda x: has_cataract(x))
df["right_cataract"] = df["Right-Diagnostic Keywords"].apply(lambda x: has_cataract(x))

In [None]:
left_cataract = df.loc[(df.C ==1) & (df.left_cataract == 1)]["Left-Fundus"].values
left_cataract[:10]

In [None]:
right_cataract = df.loc[(df.C ==1) & (df.right_cataract == 1)]["Right-Fundus"].values
right_cataract[:15]

In [None]:
print("Number of images in left cataract: {}".format(len(left_cataract)))
print("Number of images in right cataract: {}".format(len(right_cataract)))

>Normal Images

In [None]:
left_normal = df.loc[(df.C ==0) & (df["Left-Diagnostic Keywords"] == "normal fundus")]["Left-Fundus"].sample(500,random_state=42).values
right_normal = df.loc[(df.C ==0) & (df["Right-Diagnostic Keywords"] == "normal fundus")]["Right-Fundus"].sample(500,random_state=42).values
right_normal[:15]

In [None]:
cataract = np.concatenate((left_cataract,right_cataract),axis=0)
normal = np.concatenate((left_normal,right_normal),axis=0)[0:600]

In [None]:
print(len(cataract),len(normal))

><h3>Creating Dataset from images</h3>

In [None]:
from tensorflow.keras.preprocessing.image import load_img,img_to_array
dataset_dir = "/kaggle/input/ocular-disease-recognition-odir5k/preprocessed_images/"
image_size=224
labels = []
dataset = []
def create_dataset(image_category,label):
    for img in tqdm(image_category):
        image_path = os.path.join(dataset_dir,img)
        try:
            image = cv2.imread(image_path,cv2.IMREAD_COLOR)
            image = cv2.resize(image,(image_size,image_size))
        except:
            continue
        
        dataset.append([np.array(image),np.array(label)])
    random.shuffle(dataset)
    return dataset
        

In [None]:
dataset = create_dataset(cataract,1)

In [None]:
len(dataset)

In [None]:
dataset = create_dataset(normal,0)

In [None]:
len(dataset)

><h3>Let's see some images</h3>

In [None]:
plt.figure(figsize=(12,7))
for i in range(10):
    sample = random.choice(range(len(dataset)))
    image = dataset[sample][0]
    category = dataset[sample][1]
    if category== 0:
        label = "Normal"
    else:
        label = "Cataract"
    plt.subplot(2,5,i+1)
    plt.imshow(image)
    plt.xlabel(label)
plt.tight_layout()    

><h3>Dividing dataset into x(features) & y(target)</h3>

In [None]:
x = np.array([i[0] for i in dataset]).reshape(-1,image_size,image_size,3)
y = np.array([i[1] for i in dataset])

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
x_train.shape

><h3>Creating Model</h3>

In [None]:
model2=tf.keras.Sequential([
    # Starting the convolution blocks with the input layer
    tf.keras.layers.Input(shape=(224,224,3)),
    
    # 1st block consists of 2 convolution layer each with 64 filters of 1x1 & 3*3 and 
    #followed by a max-pool layer with stride 2 and pool-size of 2. All hidden layer uses LeakyReLU for non-linearity.
    tf.keras.layers.Conv2D(32,(3,3),strides=2,padding="valid", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(32,(3,3),strides=1,padding="valid", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.MaxPooling2D((2,2),strides=2),
    
    # 2nd block also consists of 2 convolution layer each with 128 filters of 3*3 
    # and followed by a max-pool layer with stride 2 and pool-size of 2.
    tf.keras.layers.Conv2D(64,(1,1),strides=2,padding="valid", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64,(3,3),strides=1,padding="valid", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.MaxPooling2D((2,2),strides=2),
    
    #3rd block consists of 3 convolution layer each with 256 filters of 3*3 and 
    # followed by a max-pool layer with stride 2 and pool-size of 2.
    tf.keras.layers.Conv2D(96,(1,1),strides=1,padding="same", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(96,(3,3),strides=1,padding="valid",activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(96,(1,1),strides=1,padding="same",activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.MaxPooling2D((2,2),strides=2),
    tf.keras.layers.Dropout(0.5),

    # 4th block consists of 3 convolution layer each with 256 filters of 3*3 and 
    # followed by a max-pool layer with stride 2 and pool-size of 2.
    tf.keras.layers.Conv2D(128,(1,1),strides=1,padding="same", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128,(3,3),strides=1,padding="valid", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128,(1,1),strides=1,padding="same",activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.MaxPooling2D((2,2),strides=2),
    
    # 5th block consists of 3 convolution layer each with 512 filters of 1x1,3*3 & 1x1 and 
    # followed by a max-pool layer with stride 2 and pool-size of 2.
    # followed by a drop out with 50%
    tf.keras.layers.Conv2D(128,(1,1),strides=1,padding="same", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128,(3,3),strides=1,padding="same", activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128,(1,1),strides=1,padding="same",activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
   
    # fully connected layer 
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024,activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.Dense(128,activation=tf.keras.layers.ReLU(), activity_regularizer=tf.keras.regularizers.l1(10e-10)),
    tf.keras.layers.Dense(2,activation='softmax')
])


In [None]:
model2.summary()

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.000001) 
model2.compile(optimizer=opt,loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),metrics=['accuracy'])
#model2.compile(optimizer=opt,loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),metrics=['accuracy'])

In [None]:
y_train.shape

In [None]:
# program to compute the time
# of execution of any python code
import time
# we initialize the variable start
# to store the starting time of
# execution of program
start = time.time()
# we can take any program but for
# example we have taken the below
# program
#y_train = np.asarray(y_train).astype('float32').reshape((-1,1))
#y_test = np.asarray(y_test).astype('float32').reshape((-1,1))
history = model2.fit(x_train,y_train,batch_size=16,epochs=1000,validation_data=(x_test,y_test),
                    verbose=2)
# now we have initialized the variable
# end to store the ending time after
# execution of program
end = time.time()
# difference of start and end variables
# gives the time of execution of the
# program in between
print("The time of execution of above program is :", end-start)

## Memory Requirement in GB

In [None]:
def get_model_memory_usage(batch_size, model):
    import numpy as np
    try:
        from keras import backend as K
    except:
        from tensorflow.keras import backend as K

    shapes_mem_count = 0
    internal_model_mem_count = 0
    for l in model.layers:
        layer_type = l.__class__.__name__
        if layer_type == 'Model':
            internal_model_mem_count += get_model_memory_usage(batch_size, l)
        single_layer_mem = 1
        out_shape = l.output_shape
        if type(out_shape) is list:
            out_shape = out_shape[0]
        for s in out_shape:
            if s is None:
                continue
            single_layer_mem *= s
        shapes_mem_count += single_layer_mem

    trainable_count = np.sum([K.count_params(p) for p in model.trainable_weights])
    non_trainable_count = np.sum([K.count_params(p) for p in model.non_trainable_weights])

    number_size = 4.0
    if K.floatx() == 'float16':
        number_size = 2.0
    if K.floatx() == 'float64':
        number_size = 8.0

    total_memory = number_size * (batch_size * shapes_mem_count + trainable_count + non_trainable_count)
    gbytes = np.round(total_memory / (1024.0 ** 3), 3) + internal_model_mem_count
    return gbytes
print("Memory Requirement (in GB)",get_model_memory_usage(1, model2))

In [None]:
loss,accuracy = model2.evaluate(x_test,y_test)
print("loss:",loss)
print("Accuracy:",accuracy)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
y_pred = model2.predict(x_test)
y_pred=np.argmax(y_pred,axis=1)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
from mlxtend.plotting import plot_confusion_matrix
cm = confusion_matrix(y_test,y_pred)
plot_confusion_matrix(conf_mat = cm,figsize=(8,7),class_names = ["Normal","Cataract"],show_normed = True);

In [None]:
plt.style.use("ggplot")
fig = plt.figure(figsize=(12,6))
epochs = range(1,1001)
plt.subplot(1,2,1)
plt.plot(epochs,history.history["accuracy"],"go-")
plt.plot(epochs,history.history["val_accuracy"],"ro-")
plt.title("Model Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["Train","val"],loc = "upper left")

plt.subplot(1,2,2)
plt.plot(epochs,history.history["loss"],"go-")
plt.plot(epochs,history.history["val_loss"],"ro-")
plt.title("Model Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(["Train","val"],loc = "upper left")
plt.show()

><h3>Prediction:</h3>

In [None]:
plt.figure(figsize=(12,12))
for i in range(10):
    sample = random.choice(range(len(x_test)))
    image = x_test[sample]
    category = y_test[sample]
    pred_category = y_pred[sample]
    
    if category== 0:
        label = "Normal"
    else:
        label = "Cataract"
        
    if pred_category== 0:
        pred_label = "Normal"
    else:
        pred_label = "Cataract"
        
    plt.subplot(2,5,i+1)
    plt.imshow(image)
    plt.title("Actual:{}\nPrediction:{}".format(label,pred_label))
    plt.axis("off")
plt.tight_layout() 