<a href="https://colab.research.google.com/github/menon92/DL-Sneak-Peek/blob/master/mixed_precesion_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image

In [2]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-fddc28be-8e4d-02c9-b105-0075b2347b2f)


In [3]:
print(f"Tensorflwo verison: {tf.__version__}")
print(f"GPU device name   : {tf.config.list_physical_devices('GPU')}")

Tensorflwo verison: 2.4.1
GPU device name   : [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
data_dir = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)
print("Data dir:", data_dir)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
Data dir: /root/.keras/datasets/flower_photos


In [5]:
BATCH_SIZE = 128
IMAGE_SIZE = (128, 128)

In [6]:
valid_datagen = image.ImageDataGenerator(
    rescale=1./255,
    validation_split=.20
)
train_datagen = image.ImageDataGenerator(
    rotation_range=40,
    horizontal_flip=True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    rescale=1./255,
    validation_split=.20
)

print('Validation data:')
valid_generator = valid_datagen.flow_from_directory(
    data_dir,
    subset="validation",
    shuffle=False, 
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    interpolation="bilinear"
)
print('Training data:')
train_generator = train_datagen.flow_from_directory(
    data_dir,
    subset="training",
    shuffle=True, 
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    interpolation="bilinear"
)

Validation data:
Found 731 images belonging to 5 classes.
Training data:
Found 2939 images belonging to 5 classes.


In [47]:
def create_model(img_size=(256, 256), num_class=5, train_base=True):
    '''Create model for training
    '''
    # init input layer 
    input_layer = layers.Input(shape=(img_size[0], img_size[1], 3), name='input')
    
    # init VGG16 model
    base_model = VGG16(
        input_tensor=input_layer, include_top=False, weights="imagenet")

    # set all layer of base model trainable statue
    base_model.trainable = train_base
    
    # take output from base model
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_class, name='logits')(x)

    # for final softmax activation explicit setting of dtype=float32 is important 
    # because for mixed precesion, softmax will be in float 16 which is not 
    # numerically stable computation
    preds = layers.Activation('softmax', dtype='float32', name='predictions')(x)

    # create a Model usilng input and output
    model = Model(inputs=input_layer, outputs=preds)

    return model


def print_model_data_type_policy(model):
    '''Print model dtype policy and and layer dtype of given model'''
    for i, layer in enumerate(model.layers):
        print(
            f"[ {i+1:02d} ] layer.dtype_policy: {layer.dtype_policy}, "
            f"dtype.name: {layer.dtype} layer.name: {layer.name}"
        )

In [50]:
model = create_model(img_size=IMAGE_SIZE)
print_model_data_type_policy(model)

# store the initial weights of the model
initial_weights = model.get_weights()

[ 01 ] layer.dtype_policy: <Policy "float32">, dtype.name: float32 layer.name: input
[ 02 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block1_conv1
[ 03 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block1_conv2
[ 04 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block1_pool
[ 05 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block2_conv1
[ 06 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block2_conv2
[ 07 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block2_pool
[ 08 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block3_conv1
[ 09 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block3_conv2
[ 10 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block3_conv3
[ 11 ] layer.dtype_policy: <Policy 

In [9]:
# compile model 
model.compile(
    loss="categorical_crossentropy",
    optimizer='adam',
    metrics=["accuracy"]
)

In [10]:
# number of times the model will training
epochs = 3

# calculate trian and validation steps per epochs 
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size

In [11]:
%time
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps
)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 6.2 µs
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [44]:
# clear previous session
keras.backend.clear_session()

In [13]:
# Enable XLA
tf.config.optimizer.set_jit(True)

# Enable AMP
keras.mixed_precision.set_global_policy('mixed_float16')

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Tesla T4, compute capability 7.5


In [51]:
model = create_model(img_size=IMAGE_SIZE)
print_model_data_type_policy(model)

# init model with initial weights
model.set_weights(initial_weights)

[ 01 ] layer.dtype_policy: <Policy "float32">, dtype.name: float32 layer.name: input
[ 02 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block1_conv1
[ 03 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block1_conv2
[ 04 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block1_pool
[ 05 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block2_conv1
[ 06 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block2_conv2
[ 07 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block2_pool
[ 08 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block3_conv1
[ 09 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block3_conv2
[ 10 ] layer.dtype_policy: <Policy "mixed_float16">, dtype.name: float32 layer.name: block3_conv3
[ 11 ] layer.dtype_policy: <Policy 

In [15]:
model.compile(
    loss="categorical_crossentropy",
    optimizer='adam',
    metrics=["accuracy"]
)

In [16]:
%time
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps
)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 7.63 µs
Epoch 1/3
Epoch 2/3
Epoch 3/3


## Resources
- [Accelerated Linear Algebra (XLA)](https://www.tensorflow.org/xla)
- [Mixed precision in tensorflow](https://www.tensorflow.org/guide/mixed_precision)
- [Mixed precision training](https://github.com/sayakpaul/Mixed-Precision-Training-in-tf.keras-2.0)
- [Just in time compilation (JIT)](https://en.wikipedia.org/wiki/Just-in-time_compilation)