In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.client import device_lib
import numpy as np
import matplotlib.pyplot as plt

# Source
https://www.analyticsvidhya.com/blog/2021/11/benchmarking-cpu-and-gpu-performance-with-tensorflow/

## Checking Hardware

In [3]:
print(tf.config.list_physical_devices('GPU')) 

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [4]:
# Check if a GPU is available
if tf.config.list_physical_devices('GPU'):
    print('GPU is available')
else:
    print('GPU is not available')

GPU is available


In [13]:
print(tf.config.list_physical_devices('CPU')) 

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [15]:
# Check if a GPU is available
if tf.config.list_physical_devices('CPU'):
    print('CPU is available')
else:
    print('CPU is not available')

CPU is available


In [28]:
gpu_devices = tf.config.list_physical_devices('GPU')
print(tf.config.experimental.get_device_details(gpu_devices[0]))
print(tf.config.experimental.get_device_details(gpu_devices[1]))

{'device_name': 'DML'}
{'device_name': 'DML'}


In [29]:

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 7114795605828337976
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 46451316736
 locality {
   bus_id: 1
 }
 incarnation: 14662518364814417620
 physical_device_desc: "device: 0, name: DML, pci bus id: <undefined>"
 xla_global_id: -1,
 name: "/device:GPU:1"
 device_type: "GPU"
 memory_limit: 31584231040
 locality {
   bus_id: 1
 }
 incarnation: 2048875632983690336
 physical_device_desc: "device: 1, name: DML, pci bus id: <undefined>"
 xla_global_id: -1]

## EDA

In [5]:
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [6]:
X_train.shape, X_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3))

In [8]:
X_train[0].shape

(32, 32, 3)

In [10]:
y_train[:5]

array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

## Pre-Processing

In [11]:
# scaling image values between 0-1
X_train_scaled = X_train/255
X_test_scaled = X_test/255
# one hot encoding labels
y_train_encoded = keras.utils.to_categorical(y_train, num_classes = 10, dtype = 'float32')
y_test_encoded = keras.utils.to_categorical(y_test, num_classes = 10, dtype = 'float32')

## Model

In [12]:
def get_model():
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(32,32,3)),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(1000, activation='relu'),
        keras.layers.Dense(10, activation='sigmoid')    
    ])
    model.compile(optimizer='SGD',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model

In [30]:
%%timeit -n1 -r1
with tf.device('/CPU:0'):
    model_cpu = get_model()
    model_cpu.fit(X_train_scaled, y_train_encoded, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
%%timeit -n1 -r1
# GPU
with tf.device('/GPU:0'):
    model_gpu = get_model()
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
40.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [32]:
%%timeit -n1 -r1
# GPU
with tf.device('/GPU:1'):
    model_gpu = get_model()
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
4min 7s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
