In [13]:
import tensorflow as tf

gpu = tf.config.experimental.list_physical_devices("GPU")
print(gpu)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [14]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

2022-02-01 20:18:26.635814: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 11618285609678135448
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14238613504
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 16291868757930179493
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3080 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6"
 xla_global_id: 416903419]

2022-02-01 20:18:26.636324: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-01 20:18:26.636516: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-01 20:18:26.637309: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-01 20:18:26.637321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1609] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.
2022-02-01 20:18:26.637865: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read 

# Model 1: CIFAR10

In [2]:
from tensorflow import keras
from tensorflow.keras.layers import Flatten, Dense
import numpy as np
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

x_train = x_train / 255.0
x_test = x_test / 255.0

y_train_ohc = tf.keras.utils.to_categorical(y_train, num_classes=10, dtype='float32')
y_test_ohc = tf.keras.utils.to_categorical(y_test, num_classes=10, dtype='float32')

def get_model_1():
    model = tf.keras.models.Sequential([
        Flatten(input_shape=(32, 32, 3)),
        Dense(3000, activation='relu'),
        Dense(1000, activation='relu'),
        Dense(10, activation='sigmoid')
    ])
    model.compile(optimizer="adam", 
                  loss="categorical_crossentropy", 
                  metrics=["accuracy"])    
    return model


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
# CPU vs GPU testing
def test_cpu_1(epochs=5):
    with tf.device("/CPU:0"):
        model_cpu = get_model_1()
        model_cpu.fit(x_train, y_train_ohc, epochs=epochs)

def test_gpu_1(epochs=5):
    with tf.device("/GPU:0"):
        model_gpu = get_model_1()
        model_gpu.fit(x_train, y_train_ohc, epochs=epochs)


In [10]:

%timeit -n1 -r1 test_cpu_1(epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
4min 21s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [11]:
%timeit -n1 -r1 test_gpu_1(epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
56.8 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Model 2: Fashion MNIST

In [7]:
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images_scaled = train_images / 255.0
test_images_scaled = test_images / 255.0

def get_model_2(hidden_layers=1):
    # Flatten layer for input
    layers = [keras.layers.Flatten(input_shape=(28, 28))]
    # hidden layers
    for i in range(hidden_layers):
        layers.append(keras.layers.Dense(500, activation='relu'),)
    # output layer    
    layers.append(keras.layers.Dense(10, activation='sigmoid'))
    model = keras.Sequential(layers)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

def test_cpu_2():
    with tf.device('/CPU:0'):
        cpu_model = get_model_2(hidden_layers=5)
        cpu_model.fit(train_images_scaled, train_labels, epochs=5)

def test_gpu_2():
    with tf.device('/GPU:0'):
        cpu_model = get_model_2(hidden_layers=5)
        cpu_model.fit(train_images_scaled, train_labels, epochs=5)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [8]:
%timeit -n1 -r1 test_cpu_2()

2022-02-01 20:02:15.234683: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 188160000 exceeds 10% of free system memory.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
44.2 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [9]:
%timeit -n1 -r1 test_gpu_2()

2022-02-01 20:03:06.459535: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 188160000 exceeds 10% of free system memory.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
42.8 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
