## Guide to tf.keras for TPUs on Colabs

Here is a very quick implemention and walkthrough to show using TPUs with Keras in Colabs

License: Apache 2.0


In [None]:
import numpy as np

import tensorflow as tf
import time
import os

import tensorflow.keras
from tensorflow.keras.datasets import mnist, fashion_mnist
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten,Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D


In [None]:
print("Running TensorFlow: %s" % tf.__version__)

## Check for TPU

To Test if you have GPU set up, run the Cell below

if no TPU is found click on Runtime (in the menu at the top) 
    - Choose "Change Runtime Type" and select TPU from the dropdown menu

The TPU_ADDRESS variable will be passed into the distribution strategy in the code later

In [None]:
# Check if a TPU address is returned, else change Runtime
try:
    device_name = os.environ['COLAB_TPU_ADDR']
    TPU_ADDRESS = 'grpc://' + device_name
    print('Found TPU at: {}'.format(TPU_ADDRESS))

except KeyError:
    print('TPU not found')

### Hello World in Machine Learning

In [None]:
# Notice the batch size used below, a multiple of 128
batch_size = 1024  # 2048 or 4096 is also fine
num_classes = 10
epochs = 5   # Test fast, fail fast
learning_rate = 0.001

# Input image dimensions - typical of (Fashion) MNIST
img_rows, img_cols = 28, 28

In [None]:
# Image and label data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
# Reshaping to vectorize the input images
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [None]:
# Normalizing the input images 
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

In [None]:
# Convert class labels into a one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

## Using tf.data

Make sure you have drop_remainder = True as TPUs need to have a fixed shape

In [None]:
def train_input_fn(batch_size=1024):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.cache() # Loads the data into memory since its such a small dataset
    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)
    dataset = dataset.repeat() 
    dataset = dataset.batch(batch_size, drop_remainder=True)  # TPUs need to have a fixed shape

    # Return the dataset.
    return dataset

In [None]:
def test_input_fn(batch_size=1024):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((x_test,y_test))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.cache()
    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size, drop_remainder=True)  # TPUs need to have a fixed shape

    # Return the dataset.
    return dataset

## The Convolutional NN Model

Pass in an input shape and batch size as TPUs (and XLA) require fixed shapes 

Rest of the model definition is a typical Convolutional NN 

In [None]:
Input_layer = tf.keras.Input(
      name='input', shape=input_shape, batch_size=batch_size, dtype=tf.float32)
x = Conv2D(32, kernel_size=(3, 3), activation='relu',name = 'Conv_01')(Input_layer)
x = MaxPooling2D(pool_size=(2, 2),name = 'MaxPool_01')(x)
x = Conv2D(64, (3, 3), activation='relu',name = 'Conv_02')(x)
x = MaxPooling2D(pool_size=(2, 2),name = 'MaxPool_02')(x)
x = Conv2D(64, (3, 3), activation='relu',name = 'Conv_03')(x)
x = Flatten(name = 'Flatten_01')(x)
x = Dense(64, activation='relu',name = 'Dense_01')(x)
x = Dropout(0.5,name = 'Dropout_02')(x)
Output_layer = Dense(num_classes, activation='softmax',name = 'Dense_02')(x)

In [None]:
# Create a Model using the definition from above
model = tf.keras.Model(inputs=[Input_layer], outputs=[Output_layer])

In [None]:
# Debug the model definition to check if everything looks good
model.summary()

In [None]:
# Use a tf optimizer rather than a Keras one for now
opt = tf.train.AdamOptimizer(learning_rate)

model.compile(
      optimizer=opt,
      loss='categorical_crossentropy',
      metrics=['acc'])

## Creating the TPU from a Keras Model

tf.contrib.tpu.keras_to_tpu_model will eventually go away and you will pass it into the model.compile as a distribution strategy, but for 1.13.1 this works. 

We can see this is a TPUv2 with 8 cores  

For batching you want to have a batch of 128 per core so 1024 overall  

You could also use 128, 256, 512 etc 

In [None]:
tpu_model = tf.contrib.tpu.keras_to_tpu_model(
    model,
    strategy=tf.contrib.tpu.TPUDistributionStrategy(
        tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)))

In [None]:
tpu_model.summary()

## Training using tf.data pipeline 

Training (Fashion) MNIST on a TPU is a bit overkill and the TPU barely gets a chance to warm up

In [None]:
tpu_model.fit(
    train_input_fn,
    steps_per_epoch = 60,
    epochs=epochs,
)

In [None]:
tpu_model.save_weights('./Fash_MNIST_TPU_%d.h5' % batch_size, overwrite=True)

In [None]:
tpu_model.evaluate(test_input_fn,
    steps = 100)

### Converting the model back to a CPU model

In [None]:
cpu_model = tpu_model.sync_to_cpu()