In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
#os.environ['CUDA_VISIBLE_DEVICES'] = ''

import tensorflow as tf
        
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

from tensorflow import keras
from tensorflow.keras import layers
import time

Num GPUs Available:  1


In [2]:
#os.environ['CUDA_VISIBLE_DEVICES']

In [3]:

class AdvancedCNN(tf.keras.Model):
    """
    Advanced Convolutional Neural Network (CNN) for image classification.

    Attributes:
    - Layers for the CNN architecture (convolutional, pooling, dense layers, dropout layers).

    Methods:
    - call: Forward pass for the model.
    - step: Compute and apply gradients for one training batch.
    - train: Train the model on a dataset.
    - set_trainable_variables: Set the trainable variables of the model.
    - trainable_vars_as_vector: Return the trainable variables as a 1D tensor.
    """
    
    def __init__(self, cnn_input_reshape, num_classes):
        """
        Initialize the advanced CNN model with given input shape and number of output classes.

        Args:
        - cnn_input_reshape (tuple): The shape to which the input should be reshaped. (e.g., (28, 28, 1))
        - num_classes (int): Number of output classes.
        """
        super(AdvancedCNN, self).__init__()
        
        self.reshape = tf.keras.layers.Reshape(cnn_input_reshape)
        
        self.conv1 = tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu', padding='same')
        self.conv2 = tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu', padding='same')
        self.max_pool1 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))
        
        self.conv3 = tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu', padding='same')
        self.conv4 = tf.keras.layers.Conv2D(128, kernel_size=3, activation='relu', padding='same')
        self.max_pool2 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))
        
        self.conv5 = tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu', padding='same')
        self.conv6 = tf.keras.layers.Conv2D(256, kernel_size=3, activation='relu', padding='same')
        self.max_pool3 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))

        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(512, activation='relu')
        self.dropout1 = tf.keras.layers.Dropout(0.5)
        self.dense2 = tf.keras.layers.Dense(512, activation='relu')
        self.dropout2 = tf.keras.layers.Dropout(0.5)
        self.dense3 = tf.keras.layers.Dense(num_classes, activation='softmax')

    def call(self, inputs, training=None):
        """
        Forward pass for the model.

        Args:
        - inputs (tf.Tensor): Input tensor (batch of images).
        - training (bool, optional): Whether the forward pass is for training or inference.

        Returns:
        - tf.Tensor: Output tensor (batch of class probabilities).
        """
        x = self.reshape(inputs)  # Add a channel dimension
        
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.max_pool1(x)

        x = self.conv3(x)
        x = self.conv4(x)
        x = self.max_pool2(x)

        x = self.conv5(x)
        x = self.conv6(x)
        x = self.max_pool3(x)

        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dropout1(x, training=training)
        x = self.dense2(x)
        x = self.dropout2(x, training=training)
        x = self.dense3(x)
        return x

    @tf.function
    def step(self, batch):
        """
        Perform one training step on a given batch of data.

        Args:
        - batch (tuple): A tuple containing two elements:
            - x_batch (tf.Tensor): A batch of input data.
            - y_batch (tf.Tensor): A batch of labels.

        This method computes the gradients using backpropagation and updates the model's trainable parameters.
        """
        x_batch, y_batch = batch

        with tf.GradientTape() as tape:
            # Forward pass: Compute predictions
            y_batch_pred = self(x_batch, training=True)

            # Compute the loss value
            loss = self.loss(y_batch, y_batch_pred)

        # Compute gradients
        gradients = tape.gradient(loss, self.trainable_variables)
        
        # Apply gradients to the model's trainable variables (update weights)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
       
    def train(self, dataset):
        """
        Train the model on an entire dataset.

        Args:
        - dataset (tf.data.Dataset): The dataset on which the model will be trained. 
        """
        for batch in dataset:
            self.step(batch)
            
    def set_trainable_variables(self, trainable_vars):
        """
        Set the model's trainable variables.

        Args:
        - trainable_vars (list of tf.Tensor): A list of tensors representing the trainable variables to be set.

        This method sets each of the model's trainable variables to the corresponding tensor in `trainable_vars`.
        """
        for model_var, var in zip(self.trainable_variables, trainable_vars):
            model_var.assign(var)

    def trainable_vars_as_vector(self):
        """
        Get the model's trainable variables as a single vector.

        Returns:
        - tf.Tensor: A 1D tensor containing all of the model's trainable variables.
        """
        return tf.concat([tf.reshape(var, [-1]) for var in self.trainable_variables], axis=0)




class LeNet5(tf.keras.Model):
    """
    LeNet-5 model for image classification.

    Attributes:
    - Layers for the LeNet-5 architecture (convolutional, pooling, dense layers).

    Methods:
    - call: Forward pass for the model.
    - step: Compute and apply gradients for one training batch.
    - train: Train the model on a dataset.
    - set_trainable_variables: Set the trainable variables of the model.
    - trainable_vars_as_vector: Return the trainable variables as a 1D tensor.
    """
    def __init__(self, cnn_input_reshape, num_classes):
        """
        Initialize the LeNet-5 model with given input shape and number of output classes.

        Args:
        - cnn_input_reshape (tuple): The shape to which the input should be reshaped. (e.g., (28, 28, 1))
        - num_classes (int): Number of output classes.
        """
        
        super(LeNet5, self).__init__()
        
        self.reshape = tf.keras.layers.Reshape(cnn_input_reshape)
        
        # Layer 1 Conv2D
        self.conv1 = tf.keras.layers.Conv2D(filters=6, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='same')
        # Layer 2 Pooling Layer
        self.avgpool1 = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')
        # Layer 3 Conv2D
        self.conv2 = tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid')
        # Layer 4 Pooling Layer
        self.avgpool2 = tf.keras.layers.AveragePooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid')
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(units=120, activation='tanh')
        self.dense2 = tf.keras.layers.Dense(units=84, activation='tanh')
        self.dense3 = tf.keras.layers.Dense(units=num_classes, activation='softmax')

    def call(self, inputs, training=None):
        """
        Forward pass for the model.

        Args:
        - inputs (tf.Tensor): Input tensor (batch of images).
        - training (bool, optional): Whether the forward pass is for training.

        Returns:
        - x (tf.Tensor): Output tensor (batch of class probabilities).
        """
        x = self.reshape(inputs)
        x = self.conv1(x)
        x = self.avgpool1(x)
        x = self.conv2(x)
        x = self.avgpool2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        
        return x

    @tf.function
    def step(self, batch):
        """
        Perform one training step on a given batch of data.

        Args:
        - batch (tuple): A tuple containing two elements:
            - x_batch (tf.Tensor): A batch of input data.
            - y_batch (tf.Tensor): A batch of labels.

        This method computes the gradients using backpropagation and updates the model's trainable parameters.
        """

        x_batch, y_batch = batch

        with tf.GradientTape() as tape:
            # Forward pass: Compute predictions
            y_batch_pred = self(x_batch, training=True)

            # Compute the loss value
            loss = self.loss(y_batch, y_batch_pred)

        # Compute gradients
        gradients = tape.gradient(loss, self.trainable_variables)
        
        # Apply gradients to the model's trainable variables (update weights)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))

    def train(self, dataset):
        """
        Train the model on an entire dataset.

        Args:
        - dataset (tf.data.Dataset): The dataset on which the model will be trained. 
        """
        for batch in dataset:
            self.step(batch)

    def set_trainable_variables(self, trainable_vars):
        """
        Set the model's trainable variables.

        Args:
        - trainable_vars (list of tf.Tensor): A list of tensors representing the trainable variables to be set.

        This method sets each of the model's trainable variables to the corresponding tensor in `trainable_vars`.
        """
        for model_var, var in zip(self.trainable_variables, trainable_vars):
            model_var.assign(var)

    def trainable_vars_as_vector(self):
        """
        Get the model's trainable variables as a single vector.

        Returns:
        - tf.Tensor: A 1D tensor containing all of the model's trainable variables.
        """
        return tf.concat([tf.reshape(var, [-1]) for var in self.trainable_variables], axis=0)

In [4]:
MNIST_CNN_BATCH_INPUT = (None, 28, 28)  # EMNIST dataset (None is used for batch size, as it varies)
MNIST_CNN_INPUT_RESHAPE = (28, 28, 1)
MNIST_N_TRAIN = 60_000


def mnist_load_data():
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    X_train, X_test = X_train / 255.0, X_test / 255.0

    return X_train, y_train, X_test, y_test

In [5]:
# For MNIST
advanced_cnn = AdvancedCNN(
    cnn_input_reshape=(28, 28, 1), 
    num_classes=10
)

advanced_cnn.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),  # we have softmax
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')]
)

# For MNIST
lenet5 = LeNet5(
    cnn_input_reshape=(28, 28, 1), 
    num_classes=10
)

lenet5.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),  # we have softmax
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')]
)

In [6]:
import time

In [7]:
def norma(bs, time_sec, n_train=60_000):
    return time_sec * 1000 / (n_train / bs) # ms

In [8]:
X_train, y_train, X_test, y_test = mnist_load_data()

In [9]:
advanced_cnn.fit(x=X_train, y=y_train, batch_size=32, epochs=1, verbose=0)
advanced_cnn.fit(x=X_train, y=y_train, batch_size=64, epochs=1, verbose=0)
advanced_cnn.fit(x=X_train, y=y_train, batch_size=128, epochs=1, verbose=0)
advanced_cnn.fit(x=X_train, y=y_train, batch_size=256, epochs=1, verbose=0)
lenet5.fit(x=X_train, y=y_train, batch_size=32, epochs=1, verbose=0)
lenet5.fit(x=X_train, y=y_train, batch_size=64, epochs=1, verbose=0)
lenet5.fit(x=X_train, y=y_train, batch_size=128, epochs=1, verbose=0)
lenet5.fit(x=X_train, y=y_train, batch_size=256, epochs=1, verbose=0)

<keras.src.callbacks.History at 0x7f78180e6c50>

In [10]:
%%timeit

advanced_cnn.fit(x=X_train, y=y_train, batch_size=32, epochs=20, verbose=0)

5min 18s ± 3.37 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
5 * 60 + 18

318

In [19]:
norma(bs=32, time_sec=318, n_train=20*60_000)

8.48

In [20]:
norma(bs=32, time_sec=3.37, n_train=20*60_000)

0.08986666666666666

In [11]:
%%timeit

advanced_cnn.fit(x=X_train, y=y_train, batch_size=64, epochs=20, verbose=0)

3min 2s ± 840 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [21]:
3 * 60 + 2

182

In [22]:
norma(bs=64, time_sec=182, n_train=20*60_000)

9.706666666666667

In [23]:
norma(bs=64, time_sec=0.84, n_train=20*60_000)

0.0448

In [12]:
%%timeit

advanced_cnn.fit(x=X_train, y=y_train, batch_size=128, epochs=20, verbose=0)

1min 40s ± 296 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [24]:
norma(bs=128, time_sec=100, n_train=20*60_000)

10.666666666666666

In [25]:
norma(bs=128, time_sec=0.296, n_train=20*60_000)

0.031573333333333335

In [13]:
%%timeit

advanced_cnn.fit(x=X_train, y=y_train, batch_size=256, epochs=20, verbose=0)

1min 10s ± 249 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [26]:
norma(bs=256, time_sec=70, n_train=20*60_000)

14.933333333333334

In [27]:
norma(bs=256, time_sec=0.249, n_train=20*60_000)

0.05312

In [14]:
%%timeit

lenet5.fit(x=X_train, y=y_train, batch_size=32, epochs=20, verbose=0)

3min 50s ± 1.51 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
3 * 60 + 50

230

In [29]:
norma(bs=32, time_sec=230, n_train=20*60_000)

6.133333333333334

In [30]:
norma(bs=32, time_sec=1.51, n_train=20*60_000)

0.040266666666666666

In [15]:
%%timeit

lenet5.fit(x=X_train, y=y_train, batch_size=64, epochs=20, verbose=0)

2min 10s ± 765 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
norma(bs=64, time_sec=130, n_train=20*60_000)

6.933333333333334

In [32]:
norma(bs=64, time_sec=0.765, n_train=20*60_000)

0.0408

In [16]:
%%timeit

lenet5.fit(x=X_train, y=y_train, batch_size=128, epochs=20, verbose=0)

1min 6s ± 447 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [33]:
norma(bs=128, time_sec=66, n_train=20*60_000)

7.04

In [34]:
norma(bs=128, time_sec=0.447, n_train=20*60_000)

0.04768

In [17]:
%%timeit

lenet5.fit(x=X_train, y=y_train, batch_size=256, epochs=20, verbose=0)

34.1 s ± 399 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
norma(bs=256, time_sec=34.1, n_train=20*60_000)

7.274666666666667

In [36]:
norma(bs=256, time_sec=0.399, n_train=20*60_000)

0.08512