In [1]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-eaaf37bd-01d7-cbcf-de02-e9ea12b086f2)


In [None]:
!pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


In [3]:
import tensorflow as tf
from keras.models import Sequential, load_model
from keras import Model
from tensorflow.keras import mixed_precision
from tensorflow.keras.applications import EfficientNetB0
from keras.layers import Dense, Input, GlobalAveragePooling2D
from keras.layers import RandomFlip, RandomRotation, RandomZoom, RandomHeight, RandomWidth
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import Adam


In [4]:
# Get helper functions file
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
# Import series of helper functions for the notebook (we've created/used these in previous notebooks)
from helper_functions import create_tensorboard_callback, plot_loss_curves, unzip_data, compare_historys

--2022-07-23 15:29:55--  https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10246 (10K) [text/plain]
Saving to: ‘helper_functions.py’


2022-07-23 15:29:55 (114 MB/s) - ‘helper_functions.py’ saved [10246/10246]



In [5]:
# Download data from Google Storage (already preformatted)
# !wget https://storage.googleapis.com/ztm_tf_course/food_vision/101_food_classes_10_percent.zip 

# unzip_data("101_food_classes_10_percent.zip")

train_dir = "101_food_classes_10_percent/train/"
test_dir = "101_food_classes_10_percent/test/"

IMG_SIZE = (224, 224)
train_data = tf.keras.preprocessing.image_dataset_from_directory(train_dir,
                                                                                label_mode="categorical",
                                                                                image_size=IMG_SIZE)
                                                                                
test_data = tf.keras.preprocessing.image_dataset_from_directory(test_dir,
                                                                label_mode="categorical",
                                                                image_size=IMG_SIZE,
                                                                shuffle=False)

--2022-07-23 15:29:57--  https://storage.googleapis.com/ztm_tf_course/food_vision/101_food_classes_10_percent.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.68.128, 142.250.4.128, 74.125.24.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.68.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1625420029 (1.5G) [application/zip]
Saving to: ‘101_food_classes_10_percent.zip’


2022-07-23 15:30:19 (72.5 MB/s) - ‘101_food_classes_10_percent.zip’ saved [1625420029/1625420029]

Found 7575 files belonging to 101 classes.
Found 25250 files belonging to 101 classes.


In [11]:
# === Build dataaugmentation layer ===
data_augmentation = Sequential([
    RandomFlip('horizontal'),  # randomly flip images on horizontal edge
    RandomRotation(0.2),  # randomly rotate images by a specific amount
    RandomHeight(0.2),  # randomly adjust the height of an image by a specific  amount
    RandomWidth(0.2),  # randomly adjust the width of an image by a specific amount
    RandomZoom(0.2),  # randomly zoom into an image
    # Rescaling(1./255)  # keep for models like ResNet50V2, remove for EfficientNet
], name='data_augmentation')

base_model = EfficientNetB0(include_top=False)
base_model.trainable = False

inputs = Input(shape=(224, 224, 3), name='input_layer')
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = GlobalAveragePooling2D(name='global_average_pooling')(x)
outputs = Dense(101, activation='softmax', name='dense_output')(x)

mixed_model = Model(inputs, outputs)

In [12]:
mixed_model.compile(loss='categorical_crossentropy',
                    optimizer=Adam(),
                    metrics=['accuracy'])

In [13]:
mixed_history = mixed_model.fit(train_data,
                                epochs=10,
                                validation_data=test_data,
                                validation_steps=int(0.15 * len(test_data)))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
    # === Build Feature-Extraction model with mixed_precision = True ===

# === Build dataaugmentation layer ===
data_augmentation = Sequential([
    RandomFlip('horizontal'),  # randomly flip images on horizontal edge
    RandomRotation(0.2),  # randomly rotate images by a specific amount
    RandomHeight(0.2),  # randomly adjust the height of an image by a specific  amount
    RandomWidth(0.2),  # randomly adjust the width of an image by a specific amount
    RandomZoom(0.2),  # randomly zoom into an image
    # Rescaling(1./255)  # keep for models like ResNet50V2, remove for EfficientNet
], name='data_augmentation')

# Check policy in feature extraction model
output = Dense(10, activation='softmax', name='output')
print(f"Before outputs dtype: %s\n" % output.dtype)
print(f"Before outputs dtype_policy: %s\n" % output.dtype_policy)

policy = mixed_precision.Policy('mixed_float16')
print(mixed_precision.set_global_policy('mixed_float16'))
print('Compute dtype: %s' % policy.compute_dtype)
print('Variable dtype: %s' % policy.variable_dtype)

base_model = EfficientNetB0(include_top=False)
base_model.trainable = False

inputs = Input(shape=(224, 224, 3), name='input_layer')
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = GlobalAveragePooling2D(name='global_average_pooling')(x)
x = Dense(101, name='dense_output')(x)
outputs = Activation('softmax', dtype='float32', name='softmax_output')(x)

mixed_model = Model(inputs, outputs)

print("After outputs dtype: %s" % outputs.dtype)
# print("After outputs dtype_policy: %s" % outputs.dtype_policy)


Before outputs dtype: float32

Before outputs dtype_policy: <Policy "mixed_float16">

None
Compute dtype: float16
Variable dtype: float32
After outputs dtype: <dtype: 'float32'>


In [15]:
mixed_model.compile(loss='categorical_crossentropy',
                    optimizer=Adam(),
                    metrics=['accuracy'])

In [16]:
mixed_history = mixed_model.fit(train_data,
                                epochs=10,
                                validation_data=test_data,
                                validation_steps=int(0.15 * len(test_data)))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


As a result, I can say that:
- Does it make the model train little faster, in my case: **Before**(27s 108ms) versus **After**(25s 105ms)
- Does it have little effect the accuracy or performance of our model, in my case:
**Before**(accuracy-0.7123, val_accuracy-0.5230) versus **After**(accuracy-0.7185, val_accuracy-0.5257)
- Despite the fact that the documentation says about 3 times faster learning on **GPU** and 60% on **TPU**, I don't catch any advantages of using mixed_precision training.