Google Colab 提供tesla t4可以进行混合精度训练，以提高速度。https://www.tensorflow.org/guide/mixed_precision?hl=zh-cn

but the tensorflow version need to under 2.5.0

In [None]:
# get helper functions
from helper_functions import create_tensorboard_callback, plot_loss_curves, compare_historys

In [None]:
# use tensorflow datasets to download data
import tensorflow_datasets as tfds

# list all available datasets
datasets_list = tfds.list_builders()
print("food101" in datasets_list) # should be true

In [None]:
# load in the data
(train_data, test_data), ds_info = tfds.load(
    name="food101",
    split=["train", "validation"],
    shuffle_files=True,
    as_supervised=True,  # data gets returned in tuple format (data, label)
    with_info=True  # download meta data as well
    )

### Explore data
* class names
* shape of input data
* datatype of input data
* what the labels looks like 

In [None]:
# feature of food101 from tfds
ds_info.features

In [None]:
# get class names
class_names = ds_info.features["label"].names
class_names[:10]

In [None]:
# get a sample of data
train_one_sample = train_data.take(1)

In [None]:
# what does one sample of training data look like
train_one_sample

In [None]:
# output info about our training sample
for image, label in train_one_sample:
    print(f"""
    Image shape: {image.shape}
    Image datatype: {image.dtype}
    Target class from Food101 (tensor form): {label}
    Class name (str form): {class_names[label.numpy()]}
    """)

In [None]:
# # image tensor looks like:
# image
# what the min and max values of our image tensor
import tensorflow as tf
tf.reduce_min(image), tf.reduce_max(image)
# (0, 255)

In [None]:
# plot image
import matplotlib.pyplot as plt
plt.imshow(image)
plt.title(class_names[label.numpy()])
plt.axis(False)

### Preprocessing
```python
tf.keras.preprocessing.image_dataset_from_directory(
    directory, 
    labels='inferred', 
    label_mode='int',
    class_names=None, 
    color_mode='rgb', 
    batch_size=32, 
    image_size=(256, 256), 
    shuffle=True, 
    seed=None, 
    validation_split=None, 
    subset=None, 
    interpolation='bilinear', 
    follow_links=False
)
```

what should be done:
1. dtype modify to float32 & float16
2. reshape and batch data
3. normalize, scale to 0-1

In [None]:
# create preprocessing functions for data
def preprocess_img(image, label, img_shape=224):
    """
    Convert image datatype from unit8 to float32,
    reshape image to (img_shape, img_shape, color_channels)
    return (float32_image, label) tuple
    """
    image = tf.image.resize(image, [img_shape, img_shape])
    # image = image/225. # scale image values (not required with EfficientNetBX models from tf.keras.applications)
    return tf.cast(image, tf.float32)

In [None]:
# preprocess a single sample image and check the outputs
preprocessed_img = preprocess_img(image, label)[0]
print(f"Image before preprocessing:\n {image[:2]}..., \nShape: {image.shape},\nDatatype: {image.dtype}\n")
print(f"Image after preprocessing:\n {preprocessed_img[:2]}..., \nShape: {preprocessed_img.shape},\nDatatype: {preprocessed_img.dtype}")

In [None]:
# batch and prepare data sets by data pipeline with tf
# map preprocessing functions to training (and parallelize)
train_data = train_data.map(map_func=preprocess_img, num_parallel_calls=tf.data.AUTOTUNE)
# shuffle train data and turn it into batches
train_data = train_data.shuffle(buffer_size=1000).batch(batch_size=32).prefetch(buffer_size=tf.data.AUTOTUNE)

# map preprocessing function to test data
test_data = test_data.map(map_func=preprocess_img, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)

以上所有autotune，prefetch使得内部可以进行并行计算，他们都是为了训练提速的方案。

`num_parallel_calls` means call computers to find as many resource as can find to train.

`prefetch` as many data as it can, before train.

In [None]:
train_data, test_data
# check the shape and datatype

### Create modelling callbacks
* tensorboard
* early stopping
* checkpoint

In [None]:
from helper_functions import create_tensorboard_callback
checkpoint_path = "model_checkpoints/cp.ckpt"
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path,
    moniter="val_acc",
    save_best_only=True,
    save_weights_only=True,
    verbose=0
)

### Set up mix precision

In [None]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16")

### Build feature extraction model


In [None]:
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

# create base model
input_shape = (224, 224, 3)
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False

# create functional model
inputs = layers.Input(shape=input_shape, name="input_layer")
# Note: EfficientNetBx models have rescaling built-in but others doesn't have
# x = preprocessing.Rescaling(1./255)(x)
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(len(class_names))(x)
outputs = layers.Activation("softmax", dtype=tf.float32, name="softmax_float32")(x)
model = tf.keras.Model(inputs, outputs) 

# compile the model
model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizers.Adam(),
    metrics=["accuracy"]
)

In [None]:
model.summary()

In [None]:
# checking layer dtype policies (are we using mixed precision)
for layer in model.layers:
    print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

In [None]:
# check the layers in the base model and see what dtype policy they are using
for layer in model.layers[1].layers[:20]:
    print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

In [None]:
# fit the feature extraction model
# turn off all warnings except for errors
tf.get_logger().setLevel("ERROR")

# fit the model with callbacks
history_101_food_classes_feature_extract = model.fit(
    train_data,
    epochs=3,
    steps_per_epoch=len(train_data),
    validation_data=test_data,
    validation_steps=int(0.15 * len(test_data)),
    callbacks=[create_tensorboard_callback(
        "training_logs",
        "efficientnetb0_101_classes_all_data_feature_extract"
    ), model_checkpoint]
) 

In [None]:
# evaluate model
results_feature_extract_model = model.evaluate(test_data)
results_feature_extract_model

### Load and evaluate checkpoint weights

1. recreate a new instance of model called created_model by turning our original model creation code into a function called create_model()
2. compiling created_model with the same loss, optimizer and metrics as the original model
3. call the load_weights() method on created_model and passing it the path to where checkpointed weights are stored
4. call evaluate() method on created_model with loaded weights and saving the results
5. compare the created_model results to previous model results (should be the exact same, if not very close)

In [None]:
# 1. create a function to recreate the original model
def create_model():
    # create base model
    input_shape = (224, 224, 3)
    base_model = tf.keras.applications.efficientnet.EfficientNetB0(include_top=False)
    base_model.trainable = False # freeze base model layers

    # create functional model
    inputs = layers.Input(shape=input_shape, name="input_layer")
    # Note: efficientNetBX model doesn't need scaling
    # x = layer.Rescaling(1./255)(x)
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D(name="pooling_layer")(x)
    x = layers.Dense(len(class_names))(x)
    # separate activation of output layer so we can output float32 activations
    outputs =  layers.Activation("softmax", dtype=tf.float32, name="softmax_float32")(x)
    model = tf.keras.Model(inputs, outputs)

    return model

# 2. create and compile a new version of the original model (new weights)
created_model = create_model()
created_model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizers.legacy.Adam(),
    metrics=["accuracy"]
)

# 3. load the saved weights
created_model.load_weights(checkpoint_path)

# 4. evaluate the model with loaded weights
results_created_model_with_loaded_weights = created_model.evaluate(test_data)

In [None]:
# 5. loaded checkpoint weights should return very similar results
import numpy as numpy
assert np.isclose(results_feature_extract_model, results_created_model_with_loaded_weights).all(), "Loaded weights results are not close to original model."

In [None]:
# check the layers in the base model and see what dtype policy they are using
for layer in created_model.layers[1].layers[:20]:
    print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

### Save the whole model to file
`save()`

In [None]:
# ## Saving model to Google Drive (optional)

# # Create save path to drive 
# save_dir = "drive/MyDrive/tensorflow_course/food_vision/07_efficientnetb0_feature_extract_model_mixed_precision/"
# # os.makedirs(save_dir) # Make directory if it doesn't exist

# # Save model
# model.save(save_dir)

In [None]:
# save model locally
save_dir = "efficientnetb0_feature_extract_model_mixed_precision"
model.save(save_dir)

In [None]:
# load model previously saved above
loaded_saved_model = tf.keras.models.load_model(save_dir)

In [None]:
# Check the layers in the base model and see what dtype policy they're using
for layer in loaded_saved_model.layers[1].layers[:20]: # check only the first 20 layers to save output space
    print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

In [None]:
# Check loaded model performance (this should be the same as results_feature_extract_model)
results_loaded_saved_model = loaded_saved_model.evaluate(test_data)
results_loaded_saved_model

In [None]:
# The loaded model's results should equal (or at least be very close) to the model's results prior to saving
# Note: this will only work if you've instatiated results variables 
import numpy as np
assert np.isclose(results_feature_extract_model, results_loaded_saved_model).all()

### Preparing model layers for fine-tuning

In [None]:
# Download the saved model from Google Storage
!wget https://storage.googleapis.com/ztm_tf_course/food_vision/07_efficientnetb0_feature_extract_model_mixed_precision.zip 

In [None]:
# Unzip the SavedModel downloaded from Google Stroage
!mkdir downloaded_gs_model # create new dir to store downloaded feature extraction model
!unzip 07_efficientnetb0_feature_extract_model_mixed_precision.zip -d downloaded_gs_model

In [None]:
# Load and evaluate downloaded GS model
loaded_gs_model = tf.keras.models.load_model("downloaded_gs_model/07_efficientnetb0_feature_extract_model_mixed_precision")

In [None]:
# get a summary of downloaded gs model
loaded_gs_model.summary()

In [None]:
# how does the loaded model perform
results_loaded_gs_model = loaded_gs_model.evaluate(test_data)
results_loaded_gs_model

In [None]:
# are any of the layers in our model frozen
for layer in loaded_gs_model.layers:
    layer.trainable = True
    print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

model layers:

0. the input layer
1. the pre-trained base model layer: tf.keras.applications.efficientnet.EfficientNetB0
2. the pooling layer
3. the fully-connected dense layer
4. the output softmax activation with float32 dtype

In [None]:
# check the layers in the base model and what dtype policy they are using
for layer in loaded_gs_model.layers[1].layers[:20]:
    print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

In [None]:
# setup earlystopping callback to stop training if  model's val_loss doesn't improve
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3
)

# create modelcheckpoint callback to save best model during fin-tune
checkpoint_path = "fine_tune_checkpoints/"
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path,
    save_best_only=True,
    monitor="val_loss"
)

# create learning rate reduction callback
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.2,
    patience=2,
    verbose=1,
    min_lr=1e-7
)

In [None]:
# compile the model
loaded_gs_model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=tf.keras.optimizer.legacy.Adam(0.0001),
    metrics=["accuracy"]
)

In [None]:
# start to fine-tune all layers
history_101_food_classes_all_data_fine_tune = loaded_gs_model.fit(
    train_data,
    epochs=100,
    steps_per_epoch=len(train_data),
    validation_data=test_data,
    validation_steps=int(0.15 * len(test_data)),
    callbacks=[
        create_tensorboard_callback("training_logs", "efficientb0_101_classes_all_data_fine_tuning"),
        model_checkpoint,
        early_stopping,
        reduce_lr
    ]
)

In [None]:
# save model locally
loaded_gs_model.save("efficientnetb0_fine_tuned_101_classes_mixed_precision")