## Data Prep and Modeling

In [22]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import InputLayer, Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L2
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy, FalsePositives, FalseNegatives, TruePositives, TrueNegatives, Precision, Recall, AUC, binary_accuracy
from tensorflow.keras.callbacks import LearningRateScheduler
import sklearn
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [3]:
dataset, dataset_info = tfds.load('malaria', with_info=True, as_supervised=True, split=['train'], shuffle_files=True)

TRAIN_RATIO = 0.6
VAL_RATIO = 0.2
TEST_RATIO = 0.2
DATASET_SIZE = len(dataset[0])

def splits(dataset, TRAIN_RATIO, VAL_RATIO, TEST_RATIO):
  train_dataset = dataset.take(int(TRAIN_RATIO*DATASET_SIZE))

  val_test_dataset = dataset.skip(int(TRAIN_RATIO*DATASET_SIZE))
  val_dataset = val_test_dataset.take(int(VAL_RATIO*DATASET_SIZE))

  test_dataset = val_test_dataset.skip(int(VAL_RATIO*DATASET_SIZE))
  return train_dataset, val_dataset, test_dataset

train_dataset, val_dataset, test_dataset = splits(dataset[0], TRAIN_RATIO, VAL_RATIO, TEST_RATIO)

IM_SIZE = 224
def resize_rescale(image, label):
  return tf.image.resize(image, (IM_SIZE, IM_SIZE))/255.0, label

train_dataset = train_dataset.map(resize_rescale)
val_dataset = val_dataset.map(resize_rescale)
test_dataset = test_dataset.map(resize_rescale)

BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.shuffle(buffer_size=8).batch(BATCH_SIZE)
# test_dataset = test_dataset.shuffle(buffer_size=8).batch(BATCH_SIZE)

Downloading and preparing dataset 337.08 MiB (download: 337.08 MiB, generated: Unknown size, total: 337.08 MiB) to /root/tensorflow_datasets/malaria/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Shuffling /root/tensorflow_datasets/malaria/1.0.0.incompleteDVTANI/malaria-train.tfrecord*...:   0%|          …

Dataset malaria downloaded and prepared to /root/tensorflow_datasets/malaria/1.0.0. Subsequent calls will reuse this data.


In [6]:
dropout_rate = 0.2
regularization_rate = 0.01

model = tf.keras.Sequential([InputLayer(input_shape=(IM_SIZE, IM_SIZE, 3)),

                             Conv2D(filters=6, kernel_size=3, strides=1, padding='valid', activation='relu', kernel_regularizer=L2(regularization_rate)),
                             BatchNormalization(),
                             MaxPool2D(pool_size=2, strides=2),
                             Dropout(rate=dropout_rate),

                             Conv2D(filters=16, kernel_size=3, strides=1, padding='valid', activation='relu', kernel_regularizer=L2(regularization_rate)),
                             BatchNormalization(),
                             MaxPool2D(pool_size=2, strides=2),

                             Flatten(),

                             Dense(100, activation='sigmoid', kernel_regularizer=L2(regularization_rate)), #
                             BatchNormalization(),
                             Dropout(rate=dropout_rate),

                             Dense(10, activation='sigmoid', kernel_regularizer=L2(regularization_rate)),
                             BatchNormalization(),
                             Dense(1, activation='sigmoid')]) # 0 or 1
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 6)       168       
                                                                 
 batch_normalization (BatchN  (None, 222, 222, 6)      24        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 6)      0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 111, 111, 6)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 16)      880       
                                                                 
 batch_normalization_1 (Batc  (None, 109, 109, 16)     6

## Custom Loss

### How bce works

https://www.tensorflow.org/api_docs/python/tf/keras/losses/BinaryCrossentropy

In [7]:
# Example 1: (batch_size = 1, number of samples = 4)
y_true = [0, 1, 0, 0]
y_pred = [-18.6, 0.51, 2.94, -12.8]
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
bce(y_true, y_pred).numpy()

0.865458

### Custom Loss method (without parameters)

In [8]:
def custom_bce(y_true, y_pred):
  bce = BinaryCrossentropy()
  return bce(y_true, y_pred)

In [9]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=custom_bce,
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)



### Custom Loss method (with parameters)

In [10]:
FACTOR = 1
def custom_bce(FACTOR):
  def loss(y_true, y_pred):
    bce = BinaryCrossentropy()
    return bce(y_true, y_pred) * FACTOR
  return loss

In [12]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=custom_bce(FACTOR),
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)



### Custom Loss Class

In [17]:
class CustomBCE(tf.keras.losses.Loss):
  def __init__(self, FACTOR):
    super(CustomBCE, self).__init__()
    self.FACTOR = FACTOR

  def call(self, y_true, y_pred):
    bce = BinaryCrossentropy()
    return bce(y_true, y_pred) * self.FACTOR

In [18]:
metrics = [BinaryAccuracy(name='accuracy'),
           Precision(name='precision'),
           Recall(name='recall'),
           AUC(name='auc')]
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=CustomBCE(FACTOR),
              metrics=metrics)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)



## Custom Metrics

### How binary accuracy works

https://www.tensorflow.org/api_docs/python/tf/keras/metrics/binary_accuracy

(There are two, binary_accuracy and BinaryAccuracy)

In [20]:
m = tf.keras.metrics.BinaryAccuracy()
m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]])
m.result().numpy()

0.75

In [21]:
y_true = [[1], [1], [0], [0]]
y_pred = [[1], [1], [0], [0]]
m = tf.keras.metrics.binary_accuracy(y_true, y_pred)
assert m.shape == (4,)
m.numpy()

array([1., 1., 1., 1.], dtype=float32)

### Custom Metric Method (without parameters)

In [23]:
def custom_accuracy(y_true, y_pred):
  return binary_accuracy(y_true, y_pred)

In [24]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=CustomBCE(FACTOR),
              metrics=custom_accuracy)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)



### Custom Metric Method (with parameters)

In [27]:
FACTOR = 1
def custom_accuracy(FACTOR):
  def accuracy(y_true, y_pred):
    return binary_accuracy(y_true, y_pred) * FACTOR
  return accuracy

In [28]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=CustomBCE(FACTOR),
              metrics=custom_accuracy(FACTOR))
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)



### Custom Metric Class

https://www.tensorflow.org/api_docs/python/tf/keras/metrics/Metric

In [41]:
class CustomBinaryAccuracy(tf.keras.metrics.Metric):
  def __init__(self, FACTOR, name="custom_accuracy"):
    super(CustomBinaryAccuracy, self).__init__()
    self.FACTOR = FACTOR
    self.accuracy = self.add_weight(name=name, initializer='zeros')

    # TypeError: Can't instantiate abstract class CustomBinaryAccuracy with abstract methods result, update_state
    #TypeError: outer_factory.<locals>.inner_factory.<locals>.tf__update_state() got an unexpected keyword argument 'sample_weight'
  def update_state(self, y_true, y_pred, sample_weight=None):
    '''Accumulates statistics for the metric. Assigning value to accuracu variable.'''
    output = binary_accuracy(tf.cast(y_true, dtype=tf.float32), y_pred) * self.FACTOR # list of binaries [1,1,1,...] => we should calculate the accuracy
    self.accuracy.assign(tf.math.count_nonzero(output, dtype=tf.float32)/tf.cast(len(output), dtype=tf.float32))


  def result(self):
    return self.accuracy

  def reset_states(self):
    '''Reset state at the end of each epoch.'''
    self.accuracy.assign(0.)

In [43]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=CustomBCE(FACTOR),
              metrics=CustomBinaryAccuracy(FACTOR),)
              #run_eagerly=True) # otherwise graph mode
history = model.fit(train_dataset, validation_data=val_dataset, epochs=1, verbose=1)



  m.reset_state()


