In [1]:
# clear any existing session
from tensorflow.keras import backend as K
K.clear_session()

In [2]:
# imports
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from wandb.keras import WandbCallback
from utils import data_utils
import tensorflow as tf
import numpy as np
import wandb
import time

In [3]:
# fix random seed for better reproducibility
tf.random.set_seed(666)

In [4]:
# Enable XLA
tf.config.optimizer.set_jit(True)

# Enable AMP
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})

A brief introduction to XLA is available [here](https://docs.google.com/presentation/d/1F7hBey7m7bKSmLB4-Ipe9KvZl--TkaJGi69wRzzpAGM/edit#slide=id.p1). It helps to fuse certain operations (like addition, division, sqrt) used in a deep learning model thereby speeding up computation. 

In [5]:
# initialize wandb
wandb.init("ml-bootcamp")

W&B Run: https://app.wandb.ai/sayakpaul/ML-Bootcamp-Launchpad/runs/436jaav7

In [12]:
# don't change this
CLASSES = [b'daisy', b'dandelion', b'roses', b'sunflowers', b'tulips']

In [6]:
# define the constants
BATCH_SIZE = 80
EPOCHS = 20

In [7]:
# let's load up the tfrecord filenames
tfr_pattern_train = "train_tfr/*.tfrec"
train_filenames = tf.io.gfile.glob(tfr_pattern_train)
tfr_pattern_test = "test_tfr/*.tfrec"
test_filenames = tf.io.gfile.glob(tfr_pattern_test)

In [8]:
# create the train and test dataset
training_dataset, steps_per_epoch = data_utils.batch_dataset(train_filenames, BATCH_SIZE, True)
validation_dataset, validation_steps = data_utils.batch_dataset(test_filenames, BATCH_SIZE, False)

Let's create a utility function which would return us an adjusted ResNet50 model. 

In [9]:
def create_model(img_size=(224,224), num_class=5, train_base=True):
    input_layer = Input(shape=(img_size[0],img_size[1],3))
    base = ResNet50(input_tensor=input_layer,
                    include_top=False,
                    weights="imagenet")
    base.trainable = train_base
    x = base.output
    x = GlobalAveragePooling2D()(x)
    
    preds = Dense(num_class, activation="softmax")(x)
    return Model(inputs=input_layer, outputs=preds)

In [10]:
# instantiate the model, supply the loss scaled optimizer,
# and compile it
model = create_model()
opt = Adam(learning_rate=1e-4)
opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt,  
                                                       "dynamic")
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=opt,
              metrics=["accuracy"])

In [None]:
# train the model
start = time.time()
model.fit_generator(training_dataset, 
    steps_per_epoch=steps_per_epoch,
    validation_data=validation_dataset,
    validation_steps=validation_steps,
    epochs=EPOCHS,
    callbacks=[WandbCallback(data_type="image", labels=CLASSES)])
wandb.log({"training_time": time.time() - start})

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


A comparative study on mixed precision training is available [here](https://github.com/sayakpaul/Mixed-Precision-Training-in-tf.keras-2.0).