## Problem

$$f(w_1, w_2) = 0.1w_1^2 + 2w_2^2 \;\;\;\;\;\;\;(1)$$

In [1]:
import numpy as np

### Adam

In [2]:
def df_w(W):
    w1, w2 = W
    dw1 = 0.2 * w1
    dw2 = 4 * w2
    dW = np.array([dw1, dw2])
    
    return dW

In [3]:
def Adam(W, dW, lr, V, S, beta1, beta2, t):
    epsilon = 1e-6
    V = beta1 * V + (1 - beta1) * dW
    S = beta2 * S + (1 - beta2) * (dW ** 2)

    V_corr = V / (1 - beta1 ** t)
    S_corr = S / (1 - beta2 ** t)

    W = W - lr * V_corr / (np.sqrt(S_corr) + epsilon)
    return W, V, S

In [4]:
def train_p1(optimizer, lr, epochs):
    W = np.array([-5, -2], dtype=np.float32)
    V = np.array([0, 0], dtype=np.float32)
    S = np.array([0, 0], dtype=np.float32)
    results = [W]

    for t in range(1, epochs + 1):
        dW = df_w(W)
        W, V, S = optimizer(W, dW, lr, V, S, beta1=0.9, beta2=0.999, t=t)
        results.append(W)
    return results

In [5]:
train_p1(Adam, lr=0.2, epochs=30)

[array([-5., -2.], dtype=float32),
 array([-4.8000002 , -1.80000002]),
 array([-4.60025478, -1.60082451]),
 array([-4.40094848, -1.40317262]),
 array([-4.20227764, -1.20787822]),
 array([-4.00445033, -1.01592745]),
 array([-3.80768638, -0.82847307]),
 array([-3.61221732, -0.64684159]),
 array([-3.41828623, -0.47252765]),
 array([-3.22614739, -0.30716934]),
 array([-3.03606592, -0.15249855]),
 array([-2.84831706, -0.01026326]),
 array([-2.66318543,  0.11787552]),
 array([-2.480964  ,  0.23046161]),
 array([-2.30195279,  0.3263587 ]),
 array([-2.12645742,  0.40484195]),
 array([-1.95478732,  0.46564961]),
 array([-1.7872537 ,  0.50898799]),
 array([-1.62416726,  0.53549442]),
 array([-1.46583566,  0.54617144]),
 array([-1.31256067,  0.54230812]),
 array([-1.16463526,  0.52540206]),
 array([-1.02234036,  0.4970906 ]),
 array([-0.88594163,  0.4590951 ]),
 array([-0.75568617,  0.41317781]),
 array([-0.63179919,  0.3611089 ]),
 array([-0.51448089,  0.30464048]),
 array([-0.40390346,  0.24548