In [1]:
import tensorflow as tf
import numpy as np

class MomentumOptimizer:
    def __init__(self, params, lr=0.01, beta=0.9):
        self.lr = lr
        self.beta = beta
        self.params = params
        self.momentum = [tf.Variable(tf.zeros_like(p), trainable=False) for p in params]

    def apply_gradients(self, grads):
        for i in range(len(self.params)):
            self.momentum[i].assign(self.beta * self.momentum[i] - self.lr * grads[i])
            self.params[i].assign_add(self.momentum[i])

    def train(self, loss_fn, n_epochs=100):
        for epoch in range(n_epochs):
            with tf.GradientTape() as tape:
                loss = loss_fn()
            grads = tape.gradient(loss, self.params)
            self.apply_gradients(grads)
        
            if epoch % 10 == 0:
                print(f"Epoch {epoch}: Loss = {float(loss):.4f}, w = {w.numpy()}, b = {b.numpy()}")

In [2]:
# simple loss: (w - 3)^2 + (b - 1)^2
def loss_fn():
    return (w - 3)**2 + (b - 1)**2

In [3]:
w = tf.Variable([5.0], dtype=tf.float32)
b = tf.Variable([2.0], dtype=tf.float32)

optimizer = MomentumOptimizer(params=[w, b], lr=0.1, beta=0.9)
optimizer.train(loss_fn, n_epochs=100)

Epoch 0: Loss = 5.0000, w = [4.6], b = [1.8]
Epoch 10: Loss = 0.0001, w = [3.5085132], b = [1.2542567]
Epoch 20: Loss = 0.6099, w = [2.3757546], b = [0.6878772]
Epoch 30: Loss = 0.0097, w = [2.9041555], b = [0.9520777]
Epoch 40: Loss = 0.0673, w = [3.2301683], b = [1.1150843]
Epoch 50: Loss = 0.0047, w = [3.0033822], b = [1.0016911]
Epoch 60: Loss = 0.0067, w = [2.919304], b = [0.9596519]
Epoch 70: Loss = 0.0012, w = [3.0093515], b = [1.0046756]
Epoch 80: Loss = 0.0006, w = [3.0269167], b = [1.0134584]
Epoch 90: Loss = 0.0002, w = [2.9932268], b = [0.99661344]
