In [None]:
### Intro to AI Group 3 Final Project - Using CV to Predict Ocular Disease

In [None]:
train_base_dir = "C:\\Users\\elanw\\OneDrive\\Documents\\IntroToAI\\ocular_dataset\\train"
valid_base_dir = "C:\\Users\\elanw\\OneDrive\\Documents\\IntroToAI\\ocular_dataset\\valid"
test_base_dir = "C:\\Users\\elanw\\OneDrive\\Documents\\IntroToAI\\ocular_dataset\\test"
image_dir = "C:\\Users\\elanw\\OneDrive\\Documents\\IntroToAI\\ocular_dataset\\preprocessed_images\\"

In [None]:
from __future__ import absolute_import, division, print_function

# Import TensorFlow v2.
import tensorflow as tf
from tensorflow.keras import Model, layers
import numpy as np

In [93]:
num_classes = 8 # total classes (0-9 digits).
num_features = 50176 # data features (img shape: 28*28).

# Training Parameters
learning_rate = 0.001
training_steps = 1000
batch_size = 32
display_step = 100

# Network Parameters
# MNIST image shape is 28*28px, we will then handle 28 sequences of 28 timesteps for every sample.
num_input = 224 # number of sequences.
timesteps = 224  # timesteps.
num_units = 32 # number of neurons for the LSTM layer.

In [80]:
# Set up Image Data Generators for train, valid & test
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(horizontal_flip=False, vertical_flip=False, validation_split=0.2)
train_it = train_datagen.flow_from_directory(train_base_dir, target_size=(224, 224), color_mode='grayscale', class_mode='categorical', batch_size=5117, subset='training', shuffle=False)
valid_it = train_datagen.flow_from_directory(train_base_dir, target_size=(224, 224), color_mode='grayscale', class_mode='categorical', batch_size=1275, subset='validation', shuffle = False)

Found 5117 images belonging to 8 classes.
Found 1275 images belonging to 8 classes.


In [81]:
X_train, y_train = next(train_it)
x_valid, y_valid = next(valid_it)

In [102]:


print(X_train.shape)
x_train_reshape, x_valid_reshape = X_train.reshape([-1, 224, 224]), x_valid.reshape([-1, num_features])
x_train_reshape, x_valid_reshape = x_train_reshape/255, x_valid_reshape/255
print(x_train_reshape.shape)
print(x_train_reshape[4].min())
print(x_train_reshape[4].max())

(5117, 224, 224, 1)
(5117, 224, 224)
0.0
0.85882354


In [110]:
y_train = train_it.classes
print(y_train)

[0 0 0 ... 7 7 7]


In [78]:
x_train = train_it.data

# Prepare MNIST data.
from tensorflow.keras.datasets import mnist
(x_train_reshape, y_train), (x_valid_, y_test) = mnist.load_data()
# Convert to float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape([-1, 28, 28]), x_test.reshape([-1, num_features])
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.

AttributeError: 'DirectoryIterator' object has no attribute 'data'

In [112]:
# Use tf.data API to shuffle and batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train_reshape, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [113]:
# Create LSTM Model.
class LSTM(Model):
    # Set layers.
    def __init__(self):
        super(LSTM, self).__init__()
        # RNN (LSTM) hidden layer.
        self.lstm_layer = layers.LSTM(units=num_units)
        self.out = layers.Dense(num_classes)

    # Set forward pass.
    def call(self, x, is_training=False):
        # LSTM layer.
        x = self.lstm_layer(x)
        # Output layer (num_classes).
        x = self.out(x)
        if not is_training:
            # tf cross entropy expect logits without softmax, so only
            # apply softmax when not training.
            x = tf.nn.softmax(x)
        return x

# Build LSTM model.
lstm_net = LSTM()

In [114]:
# Cross-Entropy Loss.
# Note that this will apply 'softmax' to the logits.
def cross_entropy_loss(x, y):
    # Convert labels to int 64 for tf cross-entropy function.
    y = tf.cast(y, tf.int64)
    # Apply softmax to logits and compute cross-entropy.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x)
    # Average loss across the batch.
    return tf.reduce_mean(loss)

# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)

# Adam optimizer.
optimizer = tf.optimizers.Adam(learning_rate)

In [115]:

# Optimization process. 
def run_optimization(x, y):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        # Forward pass.
        pred = lstm_net(x, is_training=True)
        # Compute loss.
        loss = cross_entropy_loss(pred, y)
        
    # Variables to update, i.e. trainable variables.
    trainable_variables = lstm_net.trainable_variables

    # Compute gradients.
    gradients = g.gradient(loss, trainable_variables)
    
    # Update weights following gradients.
    optimizer.apply_gradients(zip(gradients, trainable_variables))

    

In [116]:
# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # Run the optimization to update W and b values.
    run_optimization(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = lstm_net(batch_x, is_training=True)
        loss = cross_entropy_loss(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 100, loss: 1.490094, accuracy: 0.500000
step: 200, loss: 1.590143, accuracy: 0.437500
step: 300, loss: 1.441734, accuracy: 0.343750
step: 400, loss: 1.478670, accuracy: 0.468750
step: 500, loss: 1.702457, accuracy: 0.343750
step: 600, loss: 1.780766, accuracy: 0.187500
step: 700, loss: 1.537272, accuracy: 0.468750
step: 800, loss: 1.637721, accuracy: 0.406250
step: 900, loss: 1.559122, accuracy: 0.375000
step: 1000, loss: 1.462396, accuracy: 0.562500


In [None]:
# Only run this once if you have not already done so, not multiple times.
# This will take take the csv and the folder of images, and make copies
# sorted into train, valid, and test folders, and then category
# subfolders within those folders.

import os
import shutil

categs = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']

if not os.path.exists(train_base_dir):
        os.mkdir(train_base_dir)
if not os.path.exists(valid_base_dir):
        os.mkdir(valid_base_dir)
if not os.path.exists(test_base_dir):
        os.mkdir(test_base_dir)

for categ in categs:
    if not os.path.exists(train_base_dir + "\\" + categ):
        os.mkdir(train_base_dir + "\\" + categ)
for categ in categs:
    if not os.path.exists(valid_base_dir + "\\" + categ):
        os.mkdir(valid_base_dir + "\\" + categ)
for categ in categs:
    if not os.path.exists(test_base_dir + "\\" + categ):
        os.mkdir(test_base_dir + "\\" + categ)


for index, row in ocular_data.iterrows():
    l_file = str(row['ID']) + "_left.jpg"
    r_file = str(row['ID'])+ "_right.jpg"
    if row['N'] == 1:
        dest = train_base_dir + "\\N\\"
    elif row['D'] == 1:
        dest = train_base_dir + "\\D\\"
    elif row['G'] == 1:
        dest = train_base_dir + "\\G\\"
    elif row['C'] == 1:
        dest = train_base_dir + "\\C\\"
    elif row['A'] == 1:
        dest = train_base_dir + "\\A\\"
    elif row['H'] == 1:
        dest = train_base_dir + "\\H\\"
    elif row['M'] == 1:
        dest = train_base_dir + "\\M\\"
    elif row['O'] == 1:
        dest = train_base_dir + "\\O\\"
    if os.path.exists(image_dir + l_file):
        shutil.copy(image_dir + l_file, dest + l_file)
    if os.path.exists(image_dir + r_file):
        shutil.copy(image_dir + r_file, dest + r_file)

for categ in categs:
    numb_files = len(os.listdir(train_base_dir + "\\" + categ))
    fifteen_percent = numb_files * .15
    idx = -1
    for filename in os.listdir(train_base_dir + "\\" + categ):
        idx = idx + 1
        if idx < fifteen_percent:
            shutil.move(train_base_dir + "\\" + categ + "\\" + filename, valid_base_dir + "\\" + categ + "\\" + filename)
        elif idx < fifteen_percent * 2:
            shutil.move(train_base_dir + "\\" + categ + "\\" + filename, test_base_dir + "\\" + categ + "\\" + filename)
        else:
            break
     

# create directory for each category
# 

In [None]:
# This duplicates training data by creating a horizontally flipped version of each.

import cv2
import os

categs = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']
for category in categs:
    for file in os.listdir(train_base_dir + "\\" + category):
        img = cv2.imread(train_base_dir + "\\" + category + "\\" + file)
        flipped_img = cv2.flip(img, 1)
        cv2.imwrite(train_base_dir + "\\" + category + "\\" + file[:-4] + "_flipped.jpg", flipped_img)

In [None]:
from sklearn.svm import SVC
clf = SVC(gamma = .01, C = 100)
# train using clf
clf.fit(train_it, valid_it)

In [None]:
# adam optimizer, grayscale, all images "facing" same way, # normalized by dividing by 255
cnn_model.fit(train_it, epochs=11, validation_data=valid_it)

In [None]:
# adam optimizer, colored, all images "facing" same way
cnn_model.fit(train_it, epochs=6, validation_data=valid_it)

In [None]:
# Fit the model
# highest yet is 45% validation accuracy
cnn_model.fit(train_it, epochs=7, validation_data=valid_it)

In [None]:
# Show the accuracy score

from sklearn.metrics import accuracy_score
predictions = xfer_vgg19_model.predict(test_it)
score = accuracy_score(y_true=test_it.classes, y_pred=predictions.argmax(axis=-1))
print(score)

In [None]:
# Display the classification report with precision, recall, and f1-score
# and number of examples for each category

import sklearn.metrics as metrics
print(metrics.classification_report(test_it.classes, predictions.argmax(axis=-1)))


In [None]:
# Helpful function to show an image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def show_image(image_path):
    image = mpimg.imread(image_path)
    plt.imshow(image)

show_image('C:\\Users\\elanw\\OneDrive\\Pictures\\eye_square.jpg')

In [None]:
# Helper function to load image from path and preprocess it

from tensorflow.keras.preprocessing import image as image_utils
from tensorflow.keras.applications.vgg16 import preprocess_input

def load_and_process_image(image_path):
    img = image_utils.load_img(image_path, target_size=(224,224))
    img = image_utils.img_to_array(img)
    img = img.reshape(1,224,224,3)
    img = preprocess_input(img)
    return img