In [1]:
import os
#import cv2
import glob
import pathlib
import PIL, PIL.Image
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
import matplotlib.pyplot as plt

In [22]:
base_dir = os.getcwd()
print(f'current directory: {base_dir}')

dataset_dir = pathlib.Path(os.path.join(base_dir, 'datasets/exp1'))
print(f'datasets directory: {dataset_dir}')

current directory: /Users/alicialarsen/Documents/TUe/Honors/MedAI/TUe-Honors-MedAI/replicate-BSc-Thesis
datasets directory: /Users/alicialarsen/Documents/TUe/Honors/MedAI/TUe-Honors-MedAI/replicate-BSc-Thesis/datasets/exp1


In [23]:
image_count = len(list(dataset_dir.glob('**/*.png')))
print('number of total images:', image_count, '\n\n')

number of total images: 1284 




In [24]:
# Open an arbitrary image file and check it's size
image = Image.open(f'{dataset_dir}/state0/img0.png')
img_width, img_height = image.size
print(f"Image resolution: {img_width}x{img_height} pixels")

Image resolution: 270x270 pixels


### **Set up tensor data**

In [25]:
batch_size = 5

train_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    validation_split=0.3,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
  dataset_dir,
  validation_split=0.3,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

class_names = train_ds.class_names
print(class_names)

Found 1284 files belonging to 2 classes.
Using 899 files for training.
Found 1284 files belonging to 2 classes.
Using 385 files for validation.
['state0', 'state1']


In [26]:
for image_batch, labels_batch in val_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

(5, 270, 270, 3)
(5,)


In [27]:
normalization_layer = tf.keras.layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

In [28]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

### **CNN classification with a VGG-16 model**

In [57]:
def VGG16model(class_names, img_height, img_width):
    pass

    num_classes = len(class_names)

    vgg16_model = keras.Sequential()
    vgg16_model.add(layers.Conv2D(input_shape=(img_height, img_width, 3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
    vgg16_model.add(layers.MaxPool2D(pool_size=(2,2),strides=(2,2)))
    vgg16_model.add(layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    vgg16_model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    vgg16_model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    vgg16_model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
    vgg16_model.add(layers.MaxPooling2D(pool_size=(2,2),strides=(2,2)))
    vgg16_model.add(layers.Flatten())
    vgg16_model.add(layers.Dense(4096,activation="relu"))
    vgg16_model.add(layers.Dense(4096,activation="relu"))
    vgg16_model.add(layers.Dense(num_classes, activation="softmax"))
    vgg16_model.summary()

    opt = tf.keras.optimizers.legacy.Adam(learning_rate=0.0001)
    vgg16_model.compile(optimizer=opt, loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
    return vgg16_model

In [None]:
vgg16_model = VGG16model(class_names, img_height, img_width)
vgg16_model.fit(train_ds, validation_data=val_ds, epochs=1)

In [32]:
models_path = os.path.join(base_dir, 'models')
vgg16_model.save(os.path.join(models_path, 'vgg16_model.h5'))

use `model = keras.models.load_model('path/to/location')` to load the models

In [30]:
vgg16_model = keras.models.load_model(os.path.join(models_path, 'vgg16_model.h5')) 



In [34]:
tf.keras.utils.plot_model(vgg16_model)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


### **Evaluations**

In [35]:
def evaluate(model, name='model'):
  loss, acc = model.evaluate(val_ds, batch_size= batch_size)
  print(f"{name}'s accuracy: {round((acc * 100), 2)}%")

evaluate(vgg16_model, 'VGG-16')

simple CNN's accuracy: 51.95%
VGG-16's accuracy: 51.95%


In [36]:
def predict(model):
  return np.argmax(model.predict(val_ds), axis=-1)

def get_labels(dataset):
  all_val_labels = []
  for _, labels in dataset:
      all_val_labels.extend(labels.numpy())
  all_labels = np.array(all_val_labels)
  return all_labels

vgg16_predicted = predict(vgg16_model)
vgg16_labels = get_labels(val_ds)

vgg16_comparison_list = list(zip(predict(vgg16_model), get_labels(val_ds)))
print("VGG-16 (predictions, true labels): ", vgg16_comparison_list)

VGG-16 (predictions, true labels):  [(1, 1), (1, 0), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 0)]
simple model (predictions, true labels):  [(1, 1), (1, 0), (1, 1), (1, 0), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 0)]


In [50]:
def AUC_curve(curve_type: str):
    AUC_CURVE = tf.keras.metrics.AUC(
        num_thresholds=200,
        curve=curve_type,
        summation_method='interpolation',
        name=None,
        dtype=None,
        thresholds=None,
        multi_label=False,
        num_labels=None,
        label_weights=None,
        from_logits=False
    )
    AUC_CURVE.update_state(vgg16_labels, predict(vgg16_model))
    AUC_ROC_result = AUC_CURVE.result()
    return AUC_CURVE, AUC_ROC_result


AUC_ROC, AUC_ROC_result = AUC_curve('ROC')
print("AUC ROC score:", AUC_ROC_result.numpy())

AUC_PR, AUC_PR_result = AUC_curve('PR')
print("AUC PR score:", AUC_PR_result.numpy())

AUC ROC score: 0.5


Notebook by Alicia HH Larsen

MedAI, Artificial Intelligence Track, TU/e Honors

2024-04-28