# CPU/GPU WORK

In [0]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import time

Load Dataset

In [0]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train.shape, y_train.shape, x_test.shape

((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3))

In [0]:
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

In [0]:
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras.utils.to_categorical(y_test, num_classes=10)
y_train.shape

(50000, 10)

Create Model

In [0]:
def get_model():
  model = keras.Sequential([
                            keras.Input(shape=(32, 32, 3)),
                            layers.Conv2D(32, 3, padding='same', activation='relu'),
                            layers.MaxPooling2D(2),
                            layers.Conv2D(64, 3, padding='same', activation='relu'),
                            layers.MaxPooling2D(2),
                            layers.Conv2D(128, 5, activation='relu'),
                            layers.Flatten(),
                            layers.Dense(512, activation='relu'),
                            layers.Dense(10, activation='softmax'),
  ])

  model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])
  return model

In [0]:
model = get_model()
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 32, 32, 32)        896       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 16, 16, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 8, 8, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 4, 4, 128)         204928    
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)              

Set Training Method

In [0]:
class TimeHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

In [0]:
def train_model(model, data, batch_size, epochs=1):
  time_callback = TimeHistory()
  model.fit(data[0], data[1],
            epochs=epochs, batch_size=batch_size,
            callbacks=[time_callback])
  times = time_callback.times
  return times

In [0]:
batch_list = [8, 16, 32, 64, 128]

In [0]:
steps_per_epochs = [(x_train.shape[0] // item) for item in batch_list]
steps_per_epochs

[6250, 3125, 1562, 781, 390]

Train and Check times

In [0]:
timelists = []
for bs in batch_list:
  timelists.append(train_model(model, (x_train, y_train), bs*8))



In [0]:
with open('gpu_vals4.txt', 'w') as f:
    for i, item in enumerate(timelists):
        f.write(f"batch_size {batch_list[i]} steps/epoch {steps_per_epochs[i]} seconds_consumed {item}\n")

# TPU WORK

In [0]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import time
import os

Load Dataset

In [0]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras.utils.to_categorical(y_test, num_classes=10)

Dataset Function

In [0]:
def train_input_fn(batch_size=1024):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((x_train,y_train))
    # Shuffle, repeat, and batch the examples.
    dataset = dataset.cache()
    dataset = dataset.shuffle(1000, reshuffle_each_iteration=True)
    dataset = dataset.repeat()
    dataset = dataset.batch(batch_size, drop_remainder=True)
    # Return the dataset.
    return dataset

Get TPU Address

In [0]:
try:
 device_name = os.environ['COLAB_TPU_ADDR']
 TPU_ADDRESS = 'grpc://' + device_name
 print('Found TPU at: {}'.format(TPU_ADDRESS))
except KeyError:
  print('TPU not found')

Found TPU at: grpc://10.89.76.218:8470


Create Model using Tensorflow optimizer

In [0]:
def get_model():
  model = keras.Sequential([
                            keras.Input(shape=(32, 32, 3)),
                            layers.Conv2D(32, 3, padding='same', activation='relu'),
                            layers.MaxPooling2D(2),
                            layers.Conv2D(64, 3, padding='same', activation='relu'),
                            layers.MaxPooling2D(2),
                            layers.Conv2D(128, 5, activation='relu'),
                            layers.Flatten(),
                            layers.Dense(512, activation='relu'),
                            layers.Dense(10, activation='softmax'),
  ])
  
  # Use a tf optimizer rather than a Keras one for now
  opt = tf.optimizers.RMSprop()

  model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['acc'])
  return model

Move Model to TPU

In [0]:
# tf.config.experimental_connect_to_host(TPU_ADDRESS)
# resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TPU_ADDRESS)
# tf.tpu.experimental.initialize_tpu_system(resolver)
# strategy = tf.distribute.experimental.TPUStrategy(resolver) 

tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)

print("REPLICAS: ", strategy.num_replicas_in_sync)

Running on TPU  ['10.89.76.218:8470']




INFO:tensorflow:Initializing the TPU system: grpc://10.89.76.218:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.89.76.218:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


REPLICAS:  8


In [0]:
with strategy.scope():
  tpu_model = get_model()

Set Training Method

In [0]:
class TimeHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

In [0]:
def train_model(model, bs, epochs=1):
  time_callback = TimeHistory()
  spp = x_train.shape[0] // bs
  model.fit(train_input_fn(batch_size=bs),
            epochs=epochs, steps_per_epoch=spp,
            callbacks=[time_callback])
  times = time_callback.times
  return times

Run on multiple batches

In [0]:
batch_list = [8, 16, 32, 64, 128]

In [0]:
num_tpus = 8

In [0]:
timelists = []
for bs in batch_list:
  timelists.append(train_model(tpu_model, bs*num_tpus))



In [0]:
with open('tpu_vals3.txt', 'w') as f:
    for i, item in enumerate(timelists):
        f.write(f"bs {batch_list[i]} bs*num_tpus(8) {batch_list[i]*num_tpus} time {item}\n")

TPU is heavly dependent on batch size as tensorflow's documentation says, you must have batch size at least 128 for TPU proper division of tasks