In [None]:
import os
from utils import image_generators

In [None]:
# Connect to GDrive files
drive = "drive/MyDrive"

# Save directory paths
train_dir = os.path.join(drive, "train")
val_dir = os.path.join(drive, "validation")
g_train_dir = os.path.join(train_dir, "glaucoma")
g_val_dir = os.path.join(val_dir, "glaucoma")
n_train_dir = os.path.join(train_dir, "normal")
n_val_dir = os.path.join(val_dir, "normal")

In [None]:
# Define input datasets arguments
train_image_size = (178, 178)
val_image_size = (178, 178)

# Get image generators for training & validation data
train_gen, val_gen = image_generators(train_dir, val_dir, train_image_size, val_image_size)

#### 3.2.0. Hyperparameter tuning with GridSearchCV (discarded because of runtime issues)

In [None]:
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
def create_model():
  two_layer_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), input_shape=(178, 178, 3), activation='relu'),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  two_layer_model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),\
    loss='binary_crossentropy', metrics=['accuracy'])
  
  return two_layer_model

To use GridSearchCV with our image generators we need to unwrap the augmented images from the generators. To do so, I explored the `train_gen` `keras.preprocessing.image.DirectoryIterator` object.

In [None]:
print("train_gen type:", type(train_gen))
print("train_gen[0] type", type(train_gen[0]))
print("train_gen[0][0] type", type(train_gen[0][0]))
print("train_gen[0][0][0] type", type(train_gen[0][0][0]))

train_gen type: <class 'keras.preprocessing.image.DirectoryIterator'>
train_gen[0] type <class 'tuple'>
train_gen[0][0] type <class 'numpy.ndarray'>
train_gen[0][0][0] type <class 'numpy.ndarray'>


In [None]:
print("train_gen length:", len(train_gen))
print("train_gen[0] length", len(train_gen[0]))
print("train_gen[0][0] shape", train_gen[0][0].shape)
print("train_gen[0][0][0] shape", train_gen[0][0][0].shape)

train_gen length: 29
train_gen[0] length 2
train_gen[0][0] shape (20, 178, 178, 3)
train_gen[0][0][0] shape (178, 178, 3)


We notice that each element in the `train_gen`corresponds to an image batch. Then, the elements in the `train_gen[x]` tuples contain each image and label within the batch. Therefore, we need to iterate through each sample to get the input augmented images and their label for `GridSearchCV`.

In [None]:
def extract_images(generator, classes={0: "0", 1:"1"}):

  no_of_batches = len(generator)
  no_of_classes = len(classes)
  batch_size = generator[0][0].shape[0]
  no_of_examples = no_of_batches * no_of_classes * batch_size

  X = []
  y = []

  for no_batch, batch in tqdm(enumerate(generator)):

    # Ensure iteration doesn't run indefinitely, it's a keras issue
    if no_batch > (no_of_batches - 1):
      break

    for (image, label) in zip(batch[0], batch[1]):
      X.append(image)
      y.append(classes[label])

  return X, y

In [None]:
# Figure out which class is 0 and which is 1
train_gen.class_indices

{'glaucoma': 0, 'normal': 1}

In [None]:
classes = {0: "glaucoma", 1: "normal"}

X_train, y_train = extract_images(train_gen, classes)
X_val, y_val = extract_images(val_gen, classes)

29it [00:04,  6.30it/s]
8it [00:01,  6.17it/s]


In [None]:
# Group all training data together
X = X_train + X_val
y = y_train + y_val

# Ensure we have the correct format and number of samples
print(len(X))
print(X[0].shape)
print(len(y))
print(y[0])

705
(178, 178, 3)
705
glaucoma


In [None]:
# Choose a random seed for reproducibility
seed = 7
tf.random.set_seed(seed)

# Define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 20, 30, 40, 50]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=create_model(), param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, y)

# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
