# Mixed precision

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import mixed_precision

In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-d84a66c5-d566-712b-aaf7-8c980657c607)


## Setting the dtype policy

In [None]:
policy_str = 'mixed_float16'
# policy_str = 'float32'

In [None]:
policy = mixed_precision.Policy(policy_str)
mixed_precision.set_global_policy(policy)

In [None]:
# Тоже самое что выше только по короче
mixed_precision.set_global_policy(policy_str)

 `policy` определяет две составляющие слоя:
 * `dtype` вычислений, который будет `float16` чтобы вычисления были быстрее;
 * `dtype` переменных, который будет `float32` чтобы результаты были стабильнее.

 Все это можно проверить:

In [None]:
print('Compute dtype: %s' % policy.compute_dtype)
print('Variable dtype: %s' % policy.variable_dtype)

Compute dtype: float16
Variable dtype: float32


## Building the model

Соберем небольшую модельку, что посмотреть на деле как изменится перформанс

In [None]:
inputs = keras.Input(shape=(784,), name='digits')

# Если вдруг у нас нет GPU, то сделаем модельку еще поменьше
if tf.config.list_physical_devices('GPU'):
  print('The model will run with 4096 units on a GPU')
  num_units = 4096
else:
  print('The model will run with 64 units on a CPU')
  num_units = 64

dense1 = layers.Dense(num_units, activation='relu', name='dense_1')
x = dense1(inputs)
dense2 = layers.Dense(num_units, activation='relu', name='dense_2')
x = dense2(x)

The model will run with 4096 units on a GPU


In [None]:
# Проверим `policy` наших слоев
print(dense1.dtype_policy)
print('x.dtype: %s' % x.dtype.name)
print('dense1.kernel.dtype: %s' % dense1.kernel.dtype.name)

<Policy "mixed_float16">
x.dtype: float16
dense1.kernel.dtype: float32


In [None]:
# INCORRECT: softmax and model output will be float16, when it should be float32
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
print('Outputs dtype: %s' % outputs.dtype.name)

Outputs dtype: float16


Для конкретного слоя можно переопределить его политику, для этого надо явно указать `dtype`.

Для выходного слоя и выходной функции активации лучше явно указывать `float32` для стабильности (но это необязательно)

In [None]:
# CORRECT: softmax and model output are float32
x = layers.Dense(10, name='dense_logits')(x)
outputs = layers.Activation('softmax', dtype='float32', name='predictions')(x)
print('Outputs dtype: %s' % outputs.dtype.name)

Outputs dtype: float32


In [None]:
# В случае с линейной функцией активации dtype float32
# будет означать просто конвертацию из float16 в float32
# outputs = layers.Activation('linear', dtype='float32')(outputs)

In [None]:
# Скомпелируем нашу модель и подготовим MNIST
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.RMSprop(),
              metrics=['accuracy'])

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255

In [None]:
initial_weights = model.get_weights()

## Training the model with Model.fit

Next, train the model:

In [None]:
history = model.fit(x_train, y_train,
                    batch_size=8192,
                    epochs=5,
                    validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 0.2794 - accuracy: 0.9156 - 768ms/epoch - 2ms/step
Test loss: 0.2794015109539032
Test accuracy: 0.9156000018119812


In [None]:
history = model.fit(x_train, y_train,
                    batch_size=8192,
                    epochs=5,
                    validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
print('Test loss:', test_scores[0])
print('Test accuracy:', test_scores[1])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 0.5211 - accuracy: 0.8294 - 717ms/epoch - 2ms/step
Test loss: 0.5210518836975098
Test accuracy: 0.8294000029563904


## Loss scaling

При использовании `tf.keras.Model.fit` с `mixed_float16` автоматически делаетcя `loss scale`

### Underflow and Overflow


In [None]:
x = tf.constant(256, dtype='float16')
print((x ** 2).numpy())

x = tf.constant(256, dtype='float32')
(x ** 2).numpy()

inf


65535.996

In [None]:
x = tf.constant(1e-5, dtype='float16')
print((x ** 2).numpy()  )

x = tf.constant(1e-5, dtype='float32')
(x ** 2).numpy()

0.0


9.9999994e-11

### Loss scaling overview

```
loss_scale = 1024
loss = model(inputs)
loss *= loss_scale
# Assume `grads` are float32. You do not want to divide float16 gradients.
grads = compute_gradient(loss, model.trainable_variables)
grads /= loss_scale
```

* большой `loss scale` может привести к `overflow`
* маленький `loss scale` может привести к `underflow`

По дефолту он динамический


## Training the model with a custom training loop

Если хотим тренироваться с `float16` то надо сделаать два изменения по сравнению с `float32`:

1. Build the model with mixed precision (you already did this)
2. Explicitly use loss scaling if `mixed_float16` is used.


Для 2го шага можно использовать `tf.keras.mixed_precision.LossScaleOptimizer`

In [None]:
optimizer = keras.optimizers.RMSprop()
optimizer = mixed_precision.LossScaleOptimizer(optimizer)

In [None]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
train_dataset = (tf.data.Dataset.from_tensor_slices((x_train, y_train))
                 .shuffle(10000).batch(8192))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(8192)

In [None]:
@tf.function
def train_step(x, y):
  with tf.GradientTape() as tape:
    predictions = model(x)
    loss = loss_object(y, predictions)
    scaled_loss = optimizer.get_scaled_loss(loss)
  scaled_gradients = tape.gradient(scaled_loss, model.trainable_variables)
  gradients = optimizer.get_unscaled_gradients(scaled_gradients)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  return loss

In [None]:
@tf.function
def test_step(x):
  return model(x, training=False)

In [None]:
model.set_weights(initial_weights)

In [None]:
for epoch in range(5):
  epoch_loss_avg = tf.keras.metrics.Mean()
  test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
      name='test_accuracy')
  for x, y in train_dataset:
    loss = train_step(x, y)
    epoch_loss_avg(loss)
  for x, y in test_dataset:
    predictions = test_step(x)
    test_accuracy.update_state(y, predictions)
  print('Epoch {}: loss={}, test accuracy={}'.format(epoch, epoch_loss_avg.result(), test_accuracy.result()))

Epoch 0: loss=1.7596639394760132, test accuracy=0.48240000009536743
Epoch 1: loss=0.8342849016189575, test accuracy=0.7926999926567078
Epoch 2: loss=0.4011383056640625, test accuracy=0.7412999868392944
Epoch 3: loss=0.40254899859428406, test accuracy=0.9114000201225281
Epoch 4: loss=0.3263859450817108, test accuracy=0.9381999969482422
