In [7]:
import numpy as np

class AdaGrad:
    def __init__(self, params, learning_rate=0.01, eps=1e-8):
        self.params = params
        self.lr = learning_rate
        self.eps = eps
        self.G = {param: np.zeros_like(value) for param, value in params.items()}

    def step(self, grads):
        for param, grad in grads.items():
            self.G[param] += np.square(grad)
            adjusted_lr = self.lr / (np.sqrt(self.G[param]) + self.eps)
            self.params[param] -= adjusted_lr * grad

# Example usage


In [8]:
dict = {}
params = {'w1': np.array([0.1, 0.2]), 'w2': np.array([0.3, 0.4])}
dict = {param : np.zeros_like(value) for param, value in params.items()}
print(dict)

{'w1': array([0., 0.]), 'w2': array([0., 0.])}


In [9]:
params = {'w1': np.array([0.1, 0.2]), 'w2': np.array([0.3, 0.4])}
optimizer = AdaGrad(params)

# Simulate 5 update steps
for t in range(5):
    # Simulate different gradients for w1 and w2
    grads = {
        'w1': np.array([1.0, 1.0]) * (t + 1),  # Increasing gradients
        'w2': np.array([0.1, 0.1])             # Constant small gradients
    }
    
    print(f"Step {t+1}")
    print(f"Before update: w1 = {params['w1']}, w2 = {params['w2']}")
    
    optimizer.step(grads)
    
    print(f"After update: w1 = {params['w1']}, w2 = {params['w2']}")
    print(f"Accumulated G: G1 = {optimizer.G['w1']}, G2 = {optimizer.G['w2']}")
    print()

Step 1
Before update: w1 = [0.1 0.2], w2 = [0.3 0.4]
After update: w1 = [0.09 0.19], w2 = [0.29 0.39]
Accumulated G: G1 = [1. 1.], G2 = [0.01 0.01]

Step 2
Before update: w1 = [0.09 0.19], w2 = [0.29 0.39]
After update: w1 = [0.08105573 0.18105573], w2 = [0.28292893 0.38292893]
Accumulated G: G1 = [5. 5.], G2 = [0.02 0.02]

Step 3
Before update: w1 = [0.08105573 0.18105573], w2 = [0.28292893 0.38292893]
After update: w1 = [0.07303789 0.17303789], w2 = [0.27715543 0.37715543]
Accumulated G: G1 = [14. 14.], G2 = [0.03 0.03]

Step 4
Before update: w1 = [0.07303789 0.17303789], w2 = [0.27715543 0.37715543]
After update: w1 = [0.06573492 0.16573492], w2 = [0.27215543 0.37215543]
Accumulated G: G1 = [30. 30.], G2 = [0.04 0.04]

Step 5
Before update: w1 = [0.06573492 0.16573492], w2 = [0.27215543 0.37215543]
After update: w1 = [0.05899292 0.15899292], w2 = [0.2676833 0.3676833]
Accumulated G: G1 = [55. 55.], G2 = [0.05 0.05]



In [10]:
params

{'w1': array([0.05899292, 0.15899292]), 'w2': array([0.2676833, 0.3676833])}