# Requirements

- You have a GPU!
- You have CUDA installed 
  - conda install -c anaconda cudatoolkit
- Install TensorFlow (Keras is buid on TF)
  - Install instructions https://www.tensorflow.org/install/gpu
  - conda install -c anaconda tensorflow-gpu

## Enabling GPU in Colab

Edit→Notebook Settings


In [13]:
%tensorflow_version 2.x
import tensorflow as tf

In [2]:
from tensorflow.python.client import device_lib

In [3]:
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6298714310650341784
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14638920512
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4895616935981846626
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
]


## Test GPU 


In [14]:
%tensorflow_version 2.x
import tensorflow as tf
import timeit


with tf.device('/cpu:0'):
  random_image_cpu = tf.random.normal((100, 100, 100, 3))
  net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)

with tf.device('/device:GPU:0'):
  random_image_gpu = tf.random.normal((100, 100, 100, 3))
  net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
  


In [27]:
import numpy as np
import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

mnist = tf.keras.datasets.mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()


# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))



In [33]:

with tf.device('/cpu:0'):

  # Build the model.
  model = Sequential([
    Dense(64, activation='relu', input_shape=(784,)),
    Dense(100, activation='relu'),
    Dense(10, activation='softmax'),
  ])

  # Compile the model.
  model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
  )

  # Train the model.
  model.fit(
    train_images,
    to_categorical(train_labels),
    epochs=5,
    batch_size=32,
  )

  # Evaluate the model.
  model.evaluate(
    test_images,
    to_categorical(test_labels)
  )



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [34]:
with tf.device('/device:GPU:0'):

  # Build the model.
  model = Sequential([
    Dense(64, activation='relu', input_shape=(784,)),
    Dense(100, activation='relu'),
    Dense(10, activation='softmax'),
  ])

  # Compile the model.
  model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
  )

  # Train the model.
  model.fit(
    train_images,
    to_categorical(train_labels),
    epochs=5,
    batch_size=32,
  )

  # Evaluate the model.
  model.evaluate(
    test_images,
    to_categorical(test_labels)
  )



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [57]:
# python 3.6
import numpy as np
import tensorflow as tf
from tensorflow.python.client import device_lib
from timeit import default_timer as timer

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

model.compile(optimizer=tf.compat.v1.train.AdamOptimizer(0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


def random_one_hot_labels(shape):
    n, n_class = shape
    classes = np.random.randint(0, n_class, n)
    tmp_labels = np.zeros((n, n_class))
    tmp_labels[np.arange(n), classes] = 1
    return tmp_labels


data = np.random.random((800, 32))
labels = random_one_hot_labels((800, 10))

devices=device_lib.list_local_devices()
dev = [x.name for x in devices]

for d in dev:
  durations = []
  for i in range(5):  # run N times
    with tf.device(d):
      start = timer()
      model.fit(data, labels, epochs=50, batch_size=32,verbose=0)
      durations.append(timer() - start)

  print(d)
  print(f"model.fit durations: {durations}")

/device:CPU:0
model.fit durations: [1.8927598139998736, 1.6335957519995645, 1.6406057089998285, 1.6244155849999515, 1.6224856520002504]
/device:GPU:0
model.fit durations: [2.6461376830002337, 2.571619790999648, 2.533031289999599, 2.557901645000129, 2.529399165000541]
