In [1]:
import tensorflow as tf

## A Simple Dense Class

In [14]:
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        # Weights matrix
        w_shape = (input_size, output_size) # A matrix of shape (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1) # Initialized with random values
        self.W = tf.Variable(w_initial_value) # Declared as a tensor variable using TF
    
        # Bias vector
        b_shape = (output_size,) # Bias rank-1 tensor of output_size
        b_initial_value = tf.zeros(b_shape) # Initialized with 0s
        self.b = tf.Variable(b_initial_value) # Declared as a tf variable


    def __call__(self, inputs): # The forward pass
        return self.activation(tf.matmul(inputs, self.W) + self.b)


    @property
    def weights(self):
        return [self.W, self.b] # Just for convenience, to get layer's weights

## A Simple Sequential Class

In [17]:
class NaiveSequential: # To chain the layers together
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs): # Calls the underlying layers on the inputs in order
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x # Returns the final output

    @property
    def weights(self): # For convenience to get the layers' weights
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

## Using NaiveDense and NaiveSequential to create a mock Keras model

In [20]:
model = NaiveSequential([
    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu), # First dense layer
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax) # Second (and final) dense layer
])

assert len(model.weights) == 4 # Make sure that the model has 4 weight vectors

## Creating a Batch generator

In [23]:
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images)/batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

## Running One Training Step
- Compute the predictions of the model for the images in the batch
- Compute the loss value for these predictions, given the actual labels
- Compute the gradient of the loss w.r.t. the weights
- Update the weight values by a small amount in the opposite direction of the gradient

In [51]:
# Use TensorFlow's GradientTape object to compute the gradient
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape: # Running the forward pass
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights) # Compute the  gradient of loss w.r.t. weights
    update_weights(gradients, model.weights) # Update the weights according to the gradients
    return average_loss

In [53]:
learning_rate = 1e-3

def update_weights(gradients, weights):
    for g, w in zip(gradients, weights):
        w.assign_sub(g * learning_rate) # assign_sub is the equivalent of -= for TF variables

# In practice, we would never write this update_weights func by hand, we would use an Optimizer instance from Keras like:

# from tensorflow.keras import optimizers

# optimizer = optimizers.SGD(learning_rate=1e-3)
        
# def update_weights(gradients, weights):
#     optimizer.apply_gradients(zip(gradients, weights))

## The Full Training Loop

- An epoch: repeating the training step for each batch in the training data
- Full training loop: the repitition of one epoch

In [56]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"Loss at batch {batch_counter}: {loss:.2f}")

## Testing the model

In [59]:
from tensorflow.keras.datasets import mnist # Importing the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data() # Loading the training and testing data

train_images = train_images.reshape((60000, 28 * 28)) # Reshaping the image tensor
train_images = train_images.astype("float32") / 255 # Changing the datatype
test_images = test_images.reshape((10000, 28 * 28)) # Reshaping the image tensor
test_images = test_images.astype("float32") / 255 # Changing the datatype

fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
Loss at batch 0: 5.75
Loss at batch 100: 2.26
Loss at batch 200: 2.26
Loss at batch 300: 2.11
Loss at batch 400: 2.26
Epoch 1
Loss at batch 0: 1.95
Loss at batch 100: 1.89
Loss at batch 200: 1.87
Loss at batch 300: 1.73
Loss at batch 400: 1.85
Epoch 2
Loss at batch 0: 1.62
Loss at batch 100: 1.59
Loss at batch 200: 1.54
Loss at batch 300: 1.43
Loss at batch 400: 1.52
Epoch 3
Loss at batch 0: 1.35
Loss at batch 100: 1.35
Loss at batch 200: 1.27
Loss at batch 300: 1.21
Loss at batch 400: 1.28
Epoch 4
Loss at batch 0: 1.15
Loss at batch 100: 1.16
Loss at batch 200: 1.06
Loss at batch 300: 1.05
Loss at batch 400: 1.11
Epoch 5
Loss at batch 0: 1.00
Loss at batch 100: 1.02
Loss at batch 200: 0.92
Loss at batch 300: 0.92
Loss at batch 400: 0.98
Epoch 6
Loss at batch 0: 0.88
Loss at batch 100: 0.91
Loss at batch 200: 0.82
Loss at batch 300: 0.83
Loss at batch 400: 0.89
Epoch 7
Loss at batch 0: 0.80
Loss at batch 100: 0.83
Loss at batch 200: 0.74
Loss at batch 300: 0.76
Loss at batch 40

## Evaluating the model
Can evaluate the model by taking the argmax of its predictions over the test images, and comparing it to the expected labels.

In [64]:
import numpy as np

predictions = model(test_images)
predictions = predictions.numpy() # Converts TF tensor to NumPy tensor
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"Accuracy: {matches.mean():.2f}")

Accuracy: 0.82
