# 1. Check system environment

In [1]:
import tensorflow as tf

2023-01-05 09:57:05.152295: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-05 09:57:06.236580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-01-05 09:57:06.236675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64


In [2]:
# Check tensor flow version
print("Tensor flow version : {}".format(tf.__version__))

Tensor flow version : 2.11.0


In [3]:
# Get the list of all logical GPU device on your notebook
gpus = tf.config.list_logical_devices('GPU')
gpu_nb = len(gpus)
if gpus:
    print(f"We have {gpu_nb} gpu available")
for gpu in gpus:
    print("Device Name:", gpu.name, "  Device Type:", gpu.device_type)

if gpu_nb == 0:
    raise SystemError('No GPU device found') 

We have 2 gpu available
Device Name: /device:GPU:0   Device Type: GPU
Device Name: /device:GPU:1   Device Type: GPU


2023-01-05 09:58:07.729014: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-05 09:58:09.408025: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13582 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:04:00.0, compute capability: 7.5
2023-01-05 09:58:09.409952: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13582 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:82:00.0, compute capability: 7.5


In [5]:
# Get the list of all logical CPU device on your notebook
cpus = tf.config.list_logical_devices('CPU')
cpu_nb = len(cpus)
if cpus:
    print(f"We have {cpu_nb} cpu available")
for cpu in cpus:
    print("Device Name:", cpu.name, "  Device Type:", cpu.device_type)

if cpu_nb == 0:
    raise SystemError('No CPU device found') 

We have 1 cpu available
Device Name: /device:CPU:0   Device Type: CPU


In [6]:
# Build a list for available GPU devices names
gpu_names = [x.name for x in gpus]
# Build a list for available CPU devices names
cpu_names = [x.name for x in cpus]

In [7]:
print(gpu_names[0])
print(cpu_names[0])

/device:GPU:0
/device:CPU:0


# 2. Define the operation to benchmark

To avoid downloading data, here we choose to define a simple function that multiply 2 random vectors of the given length. This is the function that we are going to benchmark over available devices (GPU and CPU).

In [8]:
def random_multiply(vector_length):
    vector_1 = tf.random.normal(vector_length)
    vector_2 = tf.random.normal(vector_length)
    return vector_1 * vector_2

In [9]:
def gpu_operation(vector_length):
    # If you have several GPU you can select the one to use by changing the used index
    with tf.device(gpu_names[0]):
        random_multiply(vector_length)

In [10]:
def cpu_operation(vector_length):
    # If you have several CPU you can select the one to use by changing the used index
    with tf.device(cpu_names[0]):
        random_multiply(vector_length)

In [None]:
# 3. Launch the benchmark of each device over several vectors of different lengths

In [11]:
import timeit

# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu_operation([1])
gpu_operation([1])

for i in range(8):
    vector_length = pow(10, i)
    cpu_time = timeit.timeit(f'cpu_operation([{vector_length}])', number=20, setup="from __main__ import cpu_operation")
    gpu_time = timeit.timeit(f'gpu_operation([{vector_length}])', number=20, setup="from __main__ import gpu_operation")
    print(f'Operations on vector of length {vector_length} are {cpu_time/gpu_time}x faster on GPU than CPU')

Operations on vector of length 1 are 0.629850086262482x faster on GPU than CPU
Operations on vector of length 10 are 0.6582134305981338x faster on GPU than CPU
Operations on vector of length 100 are 0.6801479576564236x faster on GPU than CPU
Operations on vector of length 1000 are 0.7547438530466873x faster on GPU than CPU
Operations on vector of length 10000 are 1.5366686471375353x faster on GPU than CPU
Operations on vector of length 100000 are 1.9995076976573303x faster on GPU than CPU
Operations on vector of length 1000000 are 5.044044423726084x faster on GPU than CPU
Operations on vector of length 10000000 are 59.587983243386155x faster on GPU than CPU
