## Data prep and modeling

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import InputLayer, Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy, FalsePositives, FalseNegatives, TruePositives, TrueNegatives, Precision, Recall, AUC
import sklearn
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [2]:
dataset, dataset_info = tfds.load('malaria', with_info=True, as_supervised=True, split=['train'], shuffle_files=True)

Downloading and preparing dataset 337.08 MiB (download: 337.08 MiB, generated: Unknown size, total: 337.08 MiB) to /root/tensorflow_datasets/malaria/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Shuffling /root/tensorflow_datasets/malaria/1.0.0.incompleteN3RIFY/malaria-train.tfrecord*...:   0%|          …

Dataset malaria downloaded and prepared to /root/tensorflow_datasets/malaria/1.0.0. Subsequent calls will reuse this data.


In [3]:
DATASET_SIZE = len(dataset[0])

In [4]:
TRAIN_RATIO = 0.6
VAL_RATIO = 0.2
TEST_RATIO = 0.2

def splits(dataset, TRAIN_RATIO, VAL_RATIO, TEST_RATIO):
  train_dataset = dataset.take(int(TRAIN_RATIO*DATASET_SIZE))

  val_test_dataset = dataset.skip(int(TRAIN_RATIO*DATASET_SIZE))
  val_dataset = val_test_dataset.take(int(VAL_RATIO*DATASET_SIZE))

  test_dataset = val_test_dataset.skip(int(VAL_RATIO*DATASET_SIZE))
  return train_dataset, val_dataset, test_dataset

train_dataset, val_dataset, test_dataset = splits(dataset[0], TRAIN_RATIO, VAL_RATIO, TEST_RATIO)

IM_SIZE = 224
def resize_rescale(image, label):
  return tf.image.resize(image, (IM_SIZE, IM_SIZE))/255.0, label

train_dataset = train_dataset.map(resize_rescale)
val_dataset = val_dataset.map(resize_rescale)
test_dataset = test_dataset.map(resize_rescale)

BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.shuffle(buffer_size=8).batch(BATCH_SIZE)
# test_dataset = test_dataset.shuffle(buffer_size=8).batch(BATCH_SIZE)

In [7]:
model = tf.keras.Sequential([InputLayer(input_shape=(IM_SIZE, IM_SIZE, 3)),

                             Conv2D(filters=6, kernel_size=3, strides=1, padding='valid', activation='relu'),
                             BatchNormalization(),
                             MaxPool2D(pool_size=2, strides=2),

                             Conv2D(filters=16, kernel_size=3, strides=1, padding='valid', activation='relu'),
                             BatchNormalization(),
                             MaxPool2D(pool_size=2, strides=2),

                             Flatten(),

                             Dense(100, activation='sigmoid'),
                             BatchNormalization(),
                             Dense(10, activation='sigmoid'),
                             BatchNormalization(),
                             Dense(1, activation='sigmoid')]) # 0 or 1
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 6)       168       
                                                                 
 batch_normalization (BatchN  (None, 222, 222, 6)      24        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 6)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 16)      880       
                                                                 
 batch_normalization_1 (Batc  (None, 109, 109, 16)     64        
 hNormalization)                                                 
                                                        

In [8]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=3, verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


## Callbacks

Utilities called at certain points during model training.

https://www.tensorflow.org/api_docs/python/tf/keras/callbacks

In [15]:
from tensorflow.keras.callbacks import Callback, CSVLogger

In [13]:
class LossCallback(Callback):
  def on_epoch_end(self, epoch, logs):
    '''Print out the loss values at the end of the epoch'''
    loss = logs["loss"]
    print(f"\nEpoch Number {epoch+1} the model has a loss of {loss}")

  def on_batch_end(self, batch, logs):
    loss = logs["loss"]
    print(f"\nBatch Number {batch+1} the model has a loss of {loss}")

In [14]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1, callbacks = [LossCallback()])


Batch Number 1 the model has a loss of 0.2658993601799011
  1/517 [..............................] - ETA: 37:14 - loss: 0.2659 - accuracy: 0.9062 - precision: 0.8824 - recall: 0.9375 - auc: 0.9375
Batch Number 2 the model has a loss of 0.24996784329414368

Batch Number 3 the model has a loss of 0.29177364706993103
  3/517 [..............................] - ETA: 23s - loss: 0.2918 - accuracy: 0.8854 - precision: 0.8235 - recall: 0.9545 - auc: 0.9417  
Batch Number 4 the model has a loss of 0.31359952688217163

Batch Number 5 the model has a loss of 0.2864079475402832
  5/517 [..............................] - ETA: 23s - loss: 0.2864 - accuracy: 0.9062 - precision: 0.8791 - recall: 0.9524 - auc: 0.9397
Batch Number 6 the model has a loss of 0.2726671099662781

Batch Number 7 the model has a loss of 0.2761997878551483
  7/517 [..............................] - ETA: 22s - loss: 0.2762 - accuracy: 0.9152 - precision: 0.8992 - recall: 0.9508 - auc: 0.9427
Batch Number 8 the model has a loss

### CSVLogger
Streams epoch results to a CSV file. ***

In [16]:
csv_callback = tf.keras.callbacks.CSVLogger(
    filename='logs.csv', separator=',', append=False # True
)

In [17]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1, callbacks = [csv_callback])



### EarlyStopping

- At certain point the model stops generalizing.
- Stop training when overfitting occurs. (=when valdiation accuracy does not increase)
- Stop training automatically!

In [18]:
es_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', # check on val loss, once it stops reducing, it stop training
    min_delta=0, # 0 => any slightest change is considered drop.
    patience=0, # **epochs with no improvement after which training will be stopped.
                # if 5, 5 successive in/decrease occurs in monitored quantity, it stops.
    verbose=0,
    mode='auto', # min => stop training when monitored quantity decreases
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0
)

In [None]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1, callbacks = [es_callback])