In [67]:
import tensorflow as tf

class AdaGrad:
    def __init__(self, params, lr=0.01, epsilon=1e-8):
        self.lr = lr
        self.epsilon = epsilon
        self.params = params
        self.s = [tf.Variable(tf.zeros_like(p), trainable=False) for p in params]

    def apply_gradients(self, grads):
        for i in range(len(self.params)):
            self.s[i].assign_add(tf.square(grads[i]))
            update = -self.lr * grads[i] / tf.sqrt(self.s[i] + self.epsilon)
            self.params[i].assign_add(update)

    def train(self, loss_fn, n_epochs=100):
        for epoch in range(n_epochs):
            with tf.GradientTape() as tape:
                loss = loss_fn()
            grads = tape.gradient(loss, self.params)
            self.apply_gradients(grads)
        
            if epoch % 10 == 0:
                print(f"Epoch {epoch}: Loss = {float(loss):.4f}, w = {w.numpy()}, b = {b.numpy()}")

In [69]:
# simple loss: (w - 3)^2 + (b - 1)^2
def loss_fn():
    return (w - 3)**2 + (b - 1)**2

In [71]:
w = tf.Variable([5.0], dtype=tf.float32)
b = tf.Variable([2.0], dtype=tf.float32)

optimizer = AdaGrad(params=[w, b], lr=0.1)
optimizer.train(loss_fn, n_epochs=100)

Epoch 0: Loss = 5.0000, w = [4.9], b = [1.9]
Epoch 10: Loss = 2.6291, w = [4.497252], b = [1.531102]
Epoch 20: Loss = 1.8383, w = [4.284423], b = [1.3632808]
Epoch 30: Loss = 1.3750, w = [4.128132], b = [1.2568476]
Epoch 40: Loss = 1.0652, w = [4.002919], b = [1.1840388]
Epoch 50: Loss = 0.8432, w = [3.8982747], b = [1.132695]
Epoch 60: Loss = 0.6773, w = [3.808616], b = [1.0959724]
Epoch 70: Loss = 0.5496, w = [3.730539], b = [1.0695225]
Epoch 80: Loss = 0.4493, w = [3.6617744], b = [1.0504037]
Epoch 90: Loss = 0.3693, w = [3.6007125], b = [1.0365584]
