In [1]:
import os 
import numpy as np 
import pandas as pd 

import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import regularizers, initializers, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
base_dir = os.path.join("C:/Users/kzhan/Desktop/archive")
training_dir = os.path.join(base_dir + "/Training")
testing_dir = os.path.join(base_dir + "/Testing")

In [9]:
batch_size, img_height, img_width = 64, 180, 180

#Load images from a directory to a tf.data.Dataset
training_data = tf.keras.preprocessing.image_dataset_from_directory(
    directory = training_dir,
    validation_split = 0.2,
    subset = "training",
    seed = 123,
    image_size = (img_height, img_width),
    batch_size = batch_size)

#Load images from a directory to a tf.data.Dataset
validation_data = tf.keras.preprocessing.image_dataset_from_directory(
    directory = training_dir,
    validation_split = 0.2,
    subset = "validation",
    seed = 123,
    image_size = (img_height, img_width),
    batch_size = batch_size
)

Found 2870 files belonging to 4 classes.
Using 2296 files for training.
Found 2870 files belonging to 4 classes.
Using 574 files for validation.


In [5]:
def get_training_dir(directory, batch_size, img_height, img_width, validation_split, seed):
    training_data = tf.keras.preprocessing.image_dataset_from_directory(
        directory = training_dir,
        validation_split = validation_split,
        image_size = (img_height, img_width),
        subset = "training",
        seed = seed,
        batch_size = batch_size)
    return training_data

def get_validation_dir(directory, batch_size, img_height, img_width, validation_split, seed):
    validation_data = tf.keras.preprocessing.image_dataset_from_directory(
        directory = training_dir,
        validation_split = validation_split,
        image_size = (img_height, img_width),
        batch_size = batch_size,
        seed = seed,
        subset = "validation")
    return validation_data 

training_data = get_training_dir(training_dir, 64, 180, 180, 0.2, 123)
validation_data = get_validation_dir(training_dir, 64, 180, 180, 0.2, 123)

Found 2870 files belonging to 4 classes.
Using 2296 files for training.
Found 2870 files belonging to 4 classes.
Using 574 files for validation.


In [8]:
#Get the class names of the training images
class_names = training_data.class_names
print("There are:", len(class_names), "classes \nand their names are: ", class_names)

#get the shapes of the images and labels in the training_data
for images, labels in training_data:
    print(images.shape)
    print(labels.shape)
    break

There are: 4 classes 
and their names are:  ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']
(64, 180, 180, 3)
(64,)


In [11]:
#prefetch overlaps data preprocessing and model execution while training.
#cache keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.

AUTOTUNE = tf.data.experimental.AUTOTUNE
training_data = training_data.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)
validation_data = validation_data.cache().prefetch(buffer_size = AUTOTUNE)

normalizing_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
normalized_ds = training_data.map(lambda x, y: (normalizing_layer(x), y))

image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]

# Notice the pixels values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

0.093149036 0.9981472


In [13]:
num_classes = len(class_names)

#define data_augmentation
data_aug = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal", input_shape = (img_height, img_width, 3)),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.1)])

#define a Sequential model
model = tf.keras.Sequential([
    data_aug,
    tf.keras.layers.experimental.preprocessing.Rescaling(1./255),
    tf.keras.layers.Conv2D(64, (3, 3), activation = tf.nn.relu), #if data_augmentation was not used, input_shape would be parameterized here
    tf.keras.layers.MaxPooling2D(2, 2), 
    tf.keras.layers.Conv2D(64, (3, 3), activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D(2, 2), 

    tf.keras.layers.Conv2D(64, (3, 3), activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D(2, 2), 
    tf.keras.layers.Conv2D(64, (3, 3), activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation = tf.nn.relu),
    tf.keras.layers.Dense(num_classes, activation = tf.nn.softmax)
])

In [14]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_1 (Sequential)    (None, 180, 180, 3)       0         
_________________________________________________________________
rescaling_2 (Rescaling)      (None, 180, 180, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 178, 178, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 89, 89, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 87, 87, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 43, 43, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 41, 41, 64)       

In [15]:
model.compile(loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer = tf.keras.optimizers.Adam(), metrics = ['accuracy'])

In [17]:
epochs = 10
history = model.fit(training_data, validation_data = validation_data, epochs = epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
