In [12]:
import tensorflow as tf
import numpy as np
import time
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Check GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [13]:
# Load MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Preprocess data
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1)).astype('float32') / 255
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1)).astype('float32') / 255
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)


In [14]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [15]:
# Train and evaluate model on CPU
with tf.device('/CPU:0'):
    model_cpu = create_model()
    print("Training on CPU...")

    start_time = time.time()
    history_cpu = model_cpu.fit(x_train, y_train, epochs=1, batch_size=64, validation_split=0.1, verbose=2)
    end_time = time.time()
    cpu_training_time = end_time - start_time

    # Evaluate model
    start_time = time.time()
    test_loss, test_acc = model_cpu.evaluate(x_test, y_test, verbose=2)
    end_time = time.time()
    cpu_inference_time = end_time - start_time

print(f"CPU Training Time: {cpu_training_time:.2f} seconds")
print(f"CPU Inference Time: {cpu_inference_time:.2f} seconds")


Training on CPU...
844/844 - 47s - 56ms/step - accuracy: 0.9426 - loss: 0.1909 - val_accuracy: 0.9850 - val_loss: 0.0534
313/313 - 2s - 7ms/step - accuracy: 0.9833 - loss: 0.0529
CPU Training Time: 84.55 seconds
CPU Inference Time: 2.62 seconds


In [16]:
# Define the custom CNN model using cuDNN
class CustomConv2D(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size, strides=(1, 1), padding='SAME', **kwargs):
        super(CustomConv2D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding

    def build(self, input_shape):
        self.kernel = self.add_weight(
            shape=(*self.kernel_size, input_shape[-1], self.filters),
            initializer='glorot_uniform',
            trainable=True
        )

    def call(self, inputs):
        return tf.nn.conv2d(
            inputs,
            self.kernel,
            strides=[1, *self.strides, 1],
            padding=self.padding
        )

def build_custom_cnn():
    inputs = tf.keras.Input(shape=(28, 28, 1))
    x = CustomConv2D(32, (3, 3))(inputs)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = CustomConv2D(64, (3, 3))(x)
    x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = CustomConv2D(64, (3, 3))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(64, activation='relu')(x)
    outputs = tf.keras.layers.Dense(10, activation='softmax')(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [17]:
# Train and evaluate the custom model on GPU
with tf.device('/GPU:0'):
    model_custom_gpu = build_custom_cnn()
    print("Training Custom GPU Model with cuDNN...")

    start_time = time.time()
    history_custom_gpu = model_custom_gpu.fit(x_train, y_train, epochs=1, batch_size=64, validation_split=0.1, verbose=2)
    end_time = time.time()
    custom_gpu_training_time = end_time - start_time

    # Evaluate model
    start_time = time.time()
    test_loss, test_acc = model_custom_gpu.evaluate(x_test, y_test, verbose=2)
    end_time = time.time()
    custom_gpu_inference_time = end_time - start_time

print(f"Custom GPU Training Time: {custom_gpu_training_time:.2f} seconds")
print(f"Custom GPU Inference Time: {custom_gpu_inference_time:.2f} seconds")


Training Custom GPU Model with cuDNN...
844/844 - 65s - 77ms/step - accuracy: 0.9512 - loss: 0.1592 - val_accuracy: 0.9827 - val_loss: 0.0629
313/313 - 3s - 10ms/step - accuracy: 0.9802 - loss: 0.0600
Custom GPU Training Time: 65.61 seconds
Custom GPU Inference Time: 5.19 seconds
