This set of images includes all conditions.

In [2]:
import os
import cv2 
import numpy as np
from numpy import expand_dims
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import glob
import shutil
from PIL import Image

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras import models, layers
#from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
#from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img
#from keras.preprocessing.image import load_img
#from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, decode_predictions #preprocess_input, 
from keras.applications.vgg19 import VGG19, preprocess_input
#from tensorflow.keras.applications import mobilenet_v2
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
#from matplotlib import pyplot
from tensorflow.keras import preprocessing
from tensorflow.keras.preprocessing import image # Keras own inbuild image class


# import the data

In [3]:
df=pd.read_excel('data.xlsx', index_col = 0)

In [22]:
#df_full = pd.read_csv('full_df.csv', index_col=0)
df.columns = df.columns.str.replace(' ','_').str.lower()
#df.shape

# Functions

In [6]:
def extract_conditions(dataframe, condition_name):
    """to extract rows with specific attributes (normal, diabetic, etc)
    
    df to apply sort_df on
    condition_name (str) list of conditions per df (n:normal, d: diabetic, 
      g: glaucoma, c: cataract, a: age-related, h: hypertensive, m: myoppia, 
      o: other )
    
    Returns the rows in df that meet the condition"""
    
    return dataframe[dataframe[condition_name] == 1]

In [7]:
def extract_eyes(dataframe, column, search_param):
    """to extract rows with specific attributes for eyes
    
    df to apply extract_eyes on
    column: the column to apply condition to
    search_param (str): the string to search on
    
    Returns the rows in df that meet the condition"""
    return dataframe[dataframe[column].str.contains(search_param)]

In [8]:
def make_list(dataframe_1, dataframe_2):
    """make a list of images for to pull from image pool for specific attributes
    
    dataframe_1: left eye df
    dataframe_2: right eye df"""

    return list(dataframe_1['left-fundus'])+list(dataframe_2['right-fundus'])

In [9]:
def make_full_list(dataframe):
    """make a list of images for to pull from image pool for specific attributes
    
    dataframe_1: left eye df
    dataframe_2: right eye df"""

    return list(dataframe['left-fundus'])+list(dataframe['right-fundus'])

# Seperate Images

## Normal Eye Sample

In [10]:
normal_full=extract_conditions(df, 'n')

### Left Eye

In [12]:
normal_left=extract_eyes(normal_full, 'left-diagnostic_keywords','normal fundus')

In [14]:
normal_left.shape

(1136, 14)

In [15]:
normal_left['left-fundus'].nunique()

1136

In [24]:
#checked to see how many unique key diagnostic labels
nl=normal_left.groupby(['left-diagnostic_keywords']).sum()

In [25]:
#to remove any other conditions, pure sample
normal_left2=normal_left[~normal_left['left-diagnostic_keywords'].str.contains('dust')]


### Right Eye

In [19]:
normal_right=extract_eyes(normal_full, 'right-diagnostic_keywords','normal fundus')

In [26]:
#checked to see how many unique key diagnostic labels
nr=normal_right.groupby(['right-diagnostic_keywords']).sum()

In [27]:
#to remove any other conditions, pure sample
normal_right2=normal_right[~normal_right['right-diagnostic_keywords'].str.contains('dust')]

In [28]:
normal_list_images=make_list(normal_left2, normal_right2)
len(normal_list_images)

2058

## Diabetic Eyes

In [29]:
diabetic_full=extract_conditions(df, 'd')

In [30]:
diabetic_full.shape

(1128, 14)

In [31]:
#make a list of images for diabetic sample
diabetic_list_images=make_full_list(diabetic_full)
len(diabetic_list_images)

2256

## Glaucoma

In [32]:
glaucoma_full=extract_conditions(df, 'g')

In [33]:
glaucoma_full.shape

(215, 14)

In [34]:
#make a list of images for glaucoma sample
glaucoma_list_images=make_full_list(glaucoma_full)
len(glaucoma_list_images)

430

## Cataracts

In [35]:
cataracts_full=extract_conditions(df, 'c')

In [36]:
cataracts_full.shape

(212, 14)

In [37]:
#make a list of images for cataracts sample
cataracts_list_images=make_full_list(cataracts_full)
len(cataracts_list_images)

424

## Age-Related

In [38]:
age_full=extract_conditions(df, 'a')
age_full.shape

(164, 14)

In [39]:
#make a list of images for age-related sample
age_list_images=make_full_list(age_full)
len(age_list_images)

328

## Hypertensive

In [40]:
hypertensive_full=extract_conditions(df, 'h')
hypertensive_full.shape

(103, 14)

In [41]:
#make a list of images for hypertensive sample
hypertensive_list_images=make_full_list(hypertensive_full)
len(hypertensive_list_images)

206

## Myopia

In [42]:
myopia_full=extract_conditions(df, 'm')
myopia_full.shape

(174, 14)

In [43]:
#make a list of images for myopic sample
myopia_list_images=make_full_list(myopia_full)
len(myopia_list_images)

348

## Other Diseases

In [44]:
other_full=extract_conditions(df, 'o')
other_full.shape

(979, 14)

In [45]:
#make a list of images for other sample
other_list_images=make_full_list(other_full)
len(other_list_images)

1958

# Images

# Split test data

## Import Images

In [46]:
classes = ['normal', 'diabetic', 'age_related', 'glaucoma', 'other', 'hypertensive', 'myopia', 'cataract']
base_path = 'oversampling/'

In [52]:
# define an image data generator
data_gen = preprocessing.image.ImageDataGenerator(
    # define the preprocessing function that should be applied to all images
    preprocessing_function=keras.applications.vgg19.preprocess_input,
    rescale=1/255,
    # fill_mode='nearest',
    # rotation_range=20,
    # width_shift_range=0.2,
    # height_shift_range=0.2,
    # horizontal_flip=True, 
    # zoom_range=0.2,
    # shear_range=0.2    
)

In [53]:
# a generator that returns batches of X and y arrays
train_data_gen = data_gen.flow_from_directory(
        directory=base_path,
        class_mode="categorical",
        classes=classes,
        batch_size=4000,  ## note: it's really images: 100 per category. this is mostly only working for models with transfer learning
        target_size=(224, 224)
)

Found 4000 images belonging to 8 classes.


In [54]:
# load in all images at once
xtrain, ytrain = next(train_data_gen)
xtrain.shape, ytrain.shape

((4000, 224, 224, 3), (4000, 8))

In [55]:
classes

['normal',
 'diabetic',
 'age_related',
 'glaucoma',
 'other',
 'hypertensive',
 'myopia',
 'cataract']

In [56]:
xtrain.dtype, ytrain.dtype

(dtype('float32'), dtype('float32'))

# VGG19 Pre-trained Model

tensorflow.keras.layers.Conv2D(filters, kernel_size, strides=(1, 1),
  padding='valid', data_format=None, dilation_rate=(1, 1),
  activation=None, use_bias=True, kernel_initializer='glorot_uniform',
  bias_initializer='zeros', kernel_regularizer=None,
  bias_regularizer=None, activity_regularizer=None,
  kernel_constraint=None, bias_constraint=None)

In [62]:
model_vgg=keras.applications.vgg19.VGG19(
    include_top=True,
    weights=None, #'imagenet',
    input_tensor=None,
    input_shape=(224, 224, 3),
    pooling=None,
    classes=8,
    classifier_activation='softmax'
)

In [63]:
# Check the summary:
model_vgg.summary()

Model: "vgg19"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [64]:
# Compile the model
model_vgg.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [65]:
# Early stopping:
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_accuracy', #the thing we are monitoring
    min_delta=0.005, #the minimum change in the quantity that we want for the model to train for another epoch
    patience=20, #number of epochs with no improvement needed for the model to stop
    verbose=1, #0 is silent, 1 means a message is displayed when something happens
    mode='auto'  
)

In [None]:
# Fit the model
history = model_vgg.fit(xtrain,ytrain,batch_size=10, epochs=300,validation_split=0.2, callbacks = [early_stop])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(300)

plt.figure(figsize=(15, 15))
plt.subplot(2, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

The training loss indicates how well the model is fitting the training data, while the validation loss indicates how well the model fits new data.

Model is overfitting

# Image test

In [None]:
image=Image.open('test_file/hypertensive/left_gen1.jpg')

In [None]:
image

In [None]:
width, height = image.size

In [None]:
print(width,height)