# TensorFlow Profiler: Profile Model Performance

In [1]:
from datetime import datetime
from packaging import version

import os 

import tensorflow as tf
from tensorflow.keras import (
    layers,
    Sequential, 
    optimizers,
    callbacks
)
import tensorflow_datasets as tfds

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device_name = tf.test.gpu_device_name()
if not device_name:
    raise SystemError('GPU device not found')
print(f'Found GPU at: {device_name}')

Found GPU at: /device:GPU:0
Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB



## Train an Image Classification Model with TensorBoard Callbacks

In [3]:
tfds.disable_progress_bar()

(ds_train, ds_test), ds_info = tfds.load(
    'mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

In [4]:
def normalize_img(image, label):
    return tf.cast(image, tf.float32) / 255., label

ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_train = ds_train.batch(128)


In [5]:
ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
ds_test = ds_test.batch(128)

In [6]:
model = Sequential([
    layers.Flatten(input_shape=(28, 28, 1)),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer = optimizers.Adam(0.001),
    metrics=['accuracy']
)


logs = 'logs/' + datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_cb = callbacks.TensorBoard(log_dir=logs, histogram_freq=1, profile_batch='500,520')

In [7]:
print('The training has begun. Go for a coffee!!')

history = model.fit(ds_train,
                    validation_data=ds_test,
                    epochs=20,
                    callbacks=[tensorboard_cb],
                    verbose=2)

print('The training has finished.')
print(f'The accuracy of the model is {round(history.history["accuracy"][-1], 4)}')

The training has begun. Go for a coffee!!
Epoch 1/20


InvalidArgumentError: Multiple Default OpKernel registrations match NodeDef '{{node ZerosLike}}': 'op: "ZerosLike" device_type: "DEFAULT" constraint { name: "T" allowed_values { list { type: DT_INT32 } } } host_memory_arg: "y"' and 'op: "ZerosLike" device_type: "DEFAULT" constraint { name: "T" allowed_values { list { type: DT_INT32 } } } host_memory_arg: "y"' [Op:ZerosLike]

## Use the TensorFlow Profiler Model Training Performance

In [None]:
%load_ext tensorboard
%tensorboard --logdir=logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 13680), started 0:03:19 ago. (Use '!kill 13680' to kill it.)