In [1]:
import tensorflow as tf

In [3]:
x = tf.Variable(0.)
with tf.GradientTape() as tape:
  y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)
print(grad_of_y_wrt_x)

tf.Tensor(2.0, shape=(), dtype=float32)


In [4]:
x = tf.Variable(tf.random.uniform((2,2)))
with tf.GradientTape() as tape:
  y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)
print(grad_of_y_wrt_x)

tf.Tensor(
[[2. 2.]
 [2. 2.]], shape=(2, 2), dtype=float32)


In [6]:
W = tf.Variable(tf.random.uniform((2,2)))
b = tf.Variable(tf.zeros((2,)))
x = tf.random.uniform((2,2))

with tf.GradientTape() as tape:
  y = tf.matmul(x, W) + b

grad_of_y_wrt_W_and_b = tape.gradient(y, [W, b])
print(grad_of_y_wrt_W_and_b[0])
print()
print(grad_of_y_wrt_W_and_b[1])

tf.Tensor(
[[0.23850584 0.23850584]
 [1.153277   1.153277  ]], shape=(2, 2), dtype=float32)

tf.Tensor([2. 2.], shape=(2,), dtype=float32)


In [40]:
# Dense Class
# Sequential Class
# Batch Generator Class
# one training step
# update weights
# one full training loop
# model predictions
import matplotlib.pyplot as plt
import math

class NaiveDense:
  def __init__(self, input_size, output_size, activation):
    self.activation = activation

    w_shape = (input_size, output_size)
    w_initial_value = tf.random.uniform(shape=w_shape, minval=0, maxval=1e-1)
    self.W = tf.Variable(w_initial_value)

    b_shape = (output_size, )
    b_initial_value = tf.zeros(b_shape)
    self.b = tf.Variable(b_initial_value)

  def __call__(self, inputs):
    return self.activation(tf.matmul(inputs, self.W) + self.b)

  @property
  def weights(self):
    return [self.W, self.b]

class NaiveSequential:

  def __init__(self, layers):
    self.layers = layers

  def __call__(self, inputs):
    x = inputs
    for layer in self.layers:
      x = layer(x)

    return x

  @property
  def weights(self):
    weights = []
    for layer in self.layers:
      weights += layer.weights
    return weights

model = NaiveSequential( [
    NaiveDense(input_size=28*28, output_size=512, activation = tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation = tf.nn.softmax)
])

class BatchGenerator:
  def __init__(self, images, labels, batch_size=128):
    assert len(images) == len(labels)
    self.index = 0
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
    self.num_batches = math.ceil(len(images) / batch_size)

  def next(self):
    images = self.images[self.index: self.index + self.batch_size]
    labels = self.labels[self.index: self.index + self.batch_size]
    self.index += self.batch_size
    return images, labels

def one_training_step(model, images_batch, labels_batch):
  with tf.GradientTape() as tape:
    predictions = model(images_batch)
    per_sample_loss = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
    average_loss = tf.reduce_mean(per_sample_loss)

  gradients  = tape.gradient(average_loss, model.weights)
  update_weights(gradients, model.weights)
  return average_loss


learning_rate = 1e-3
def update_weights(gradients, weights):
  for g, w in zip(gradients, weights):
    w.assign_sub(g*learning_rate)


def fit(model, images, labels, epochs, batch_size=128):
  for epoch_counter in range(epochs):
    print(f'epoch: {epoch_counter}')

    batch_generator = BatchGenerator(images, labels)
    for batch_counter in range(batch_generator.num_batches):
      images_batch, labels_batch = batch_generator.next()
      loss = one_training_step(model, images_batch, labels_batch)
      if batch_counter % 100 == 0:
        print(f"loss at {batch_counter}: {loss: .2f}")

from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28*28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28*28))
test_images = test_images.astype('float32') / 255

fit(model, train_images, train_labels, epochs= 10, batch_size=128)

import numpy as np

predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis = 1)
matches = test_labels == predicted_labels
print(f"accuracy: {matches.mean():.2f}")


epoch: 0
loss at 0:  6.47
loss at 100:  2.22
loss at 200:  2.19
loss at 300:  2.07
loss at 400:  2.21
epoch: 1
loss at 0:  1.90
loss at 100:  1.86
loss at 200:  1.80
loss at 300:  1.70
loss at 400:  1.82
epoch: 2
loss at 0:  1.58
loss at 100:  1.56
loss at 200:  1.48
loss at 300:  1.41
loss at 400:  1.50
epoch: 3
loss at 0:  1.32
loss at 100:  1.32
loss at 200:  1.22
loss at 300:  1.20
loss at 400:  1.26
epoch: 4
loss at 0:  1.13
loss at 100:  1.15
loss at 200:  1.02
loss at 300:  1.04
loss at 400:  1.10
epoch: 5
loss at 0:  0.98
loss at 100:  1.01
loss at 200:  0.89
loss at 300:  0.92
loss at 400:  0.98
epoch: 6
loss at 0:  0.88
loss at 100:  0.90
loss at 200:  0.79
loss at 300:  0.84
loss at 400:  0.89
epoch: 7
loss at 0:  0.80
loss at 100:  0.82
loss at 200:  0.71
loss at 300:  0.77
loss at 400:  0.83
epoch: 8
loss at 0:  0.73
loss at 100:  0.75
loss at 200:  0.65
loss at 300:  0.71
loss at 400:  0.77
epoch: 9
loss at 0:  0.68
loss at 100:  0.70
loss at 200:  0.60
loss at 300:  0.67