In [1]:
import numpy as np

In [2]:
def df_w(W):
    w1, w2 = W
    dw1 = 0.2*w1
    dw2 = 4*w2
    return np.array([dw1, dw2])

# Gradient Descent

In [8]:
def sgd(W, dw, lr):
    W = W - lr * dw
    return W


def train_p1(optimizer, lr, epochs):
    W = np.array([-5, -2], dtype=np.float32)
    results = [W.tolist()]
    
    for epoch in range(epochs):
        # Compute gradient
        dw = df_w(W)
        
        # Update W using the provided optimization function
        W = optimizer(W, dw, lr)

        results.append(W.tolist())
        
        print(f"Epoch {epoch + 1}: w1 = {W[0]}, w2 = {W[1]}")
    return results


results = train_p1(sgd, lr=0.4, epochs=30)

Epoch 1: w1 = -4.6, w2 = 1.2000000000000002
Epoch 2: w1 = -4.231999999999999, w2 = -0.7200000000000002
Epoch 3: w1 = -3.893439999999999, w2 = 0.43200000000000016
Epoch 4: w1 = -3.5819647999999993, w2 = -0.2592000000000001
Epoch 5: w1 = -3.2954076159999994, w2 = 0.1555200000000001
Epoch 6: w1 = -3.0317750067199993, w2 = -0.09331200000000006
Epoch 7: w1 = -2.7892330061823993, w2 = 0.05598720000000004
Epoch 8: w1 = -2.5660943656878072, w2 = -0.03359232000000004
Epoch 9: w1 = -2.360806816432783, w2 = 0.020155392000000022
Epoch 10: w1 = -2.1719422711181604, w2 = -0.012093235200000017
Epoch 11: w1 = -1.9981868894287076, w2 = 0.007255941120000012
Epoch 12: w1 = -1.838331938274411, w2 = -0.0043535646720000085
Epoch 13: w1 = -1.691265383212458, w2 = 0.0026121388032000056
Epoch 14: w1 = -1.5559641525554613, w2 = -0.0015672832819200039
Epoch 15: w1 = -1.4314870203510244, w2 = 0.0009403699691520025
Epoch 16: w1 = -1.3169680587229424, w2 = -0.0005642219814912016
Epoch 17: w1 = -1.211610614025107, w

## Gradient Descent + Momentum

In [9]:
def sgd_momentum(W, V, dw, lr, beta):
    V = beta * V + (1 - beta) * dw
    W = W - lr * V
    return W, V


def train_p2(optimizer, lr, beta, epochs):
    W = np.array([-5, -2], dtype=np.float32)
    V = np.array([0, 0], dtype=np.float32)
    results = [W.tolist()]
    
    for epoch in range(epochs):
        # Compute gradient
        dw = df_w(W)
        
        # Update W using the provided optimization function
        W, V = optimizer(W, V, dw, lr, beta)

        results.append(W.tolist())
        
        print(f"Epoch {epoch + 1}: w1 = {W[0]}, w2 = {W[1]}")
    return results


results = train_p2(sgd_momentum, lr=0.6, beta=0.5, epochs=30)

Epoch 1: w1 = -4.7, w2 = 0.3999999999999999
Epoch 2: w1 = -4.268, w2 = 1.12
Epoch 3: w1 = -3.7959199999999997, w2 = 0.13600000000000012
Epoch 4: w1 = -3.3321248, w2 = -0.5192
Epoch 5: w1 = -2.900299712, w2 = -0.22376000000000013
Epoch 6: w1 = -2.5103691852799996, w2 = 0.19247199999999992
Epoch 7: w1 = -2.1647817708031996, w2 = 0.16962160000000004
Epoch 8: w1 = -1.8621011573166075, w2 = -0.04534951999999995
Epoch 9: w1 = -1.599034781134315, w2 = -0.09841565599999999
Epoch 10: w1 = -1.3715595061751098, w2 = -0.0068499368000000255
Epoch 11: w1 = -1.1755282983250006, w2 = 0.04715284695999999
Epoch 12: w1 = -1.006980996500446, w2 = 0.01757082248800001
Epoch 13: w1 = -0.8622884857981419, w2 = -0.018305176733599993
Epoch 14: w1 = -0.7382049212991013, w2 = -0.01427696426408
Epoch 15: w1 = -0.6318708437716349, w2 = 0.004869499087575998
Epoch 16: w1 = -0.5407915543816036, w2 = 0.0085993318583128
Epoch 17: w1 = -0.4628044164236918, w2 = 0.00014505001370584102
Epoch 18: w1 = -0.39604258245931434, 

## RMSProp

In [10]:
def rmsprop(W, S, dw, gamma, ep, lr):
    S = gamma * S + (1 - gamma) * dw**2
    W = W - lr * (dw / np.sqrt(S + ep))
    return W, S

def train_p3(optimizer, gamma, ep, lr, epochs):
    W = np.array([-5, -2], dtype=np.float32)
    S = np.array([0, 0], dtype=np.float32)
    results = [W.tolist()]
    
    for epoch in range(epochs):
        # Compute gradient
        dw = df_w(W)
        
        # Update W using the provided optimization function
        W, S = optimizer(W, S, dw, gamma, ep, lr)

        results.append(W.tolist())
        
        print(f"Epoch {epoch + 1}: w1 = {W[0]}, w2 = {W[1]}")
    return results


results = train_p3(rmsprop, gamma=0.9, ep=1e-6, lr=0.3 ,epochs=30)

Epoch 1: w1 = -4.051321445330401, w2 = -1.05131677606536
Epoch 2: w1 = -3.435197540710313, w2 = -0.59152342591607
Epoch 3: w1 = -2.9589369293489796, w2 = -0.32943940499816177
Epoch 4: w1 = -2.5654628900149308, w2 = -0.17756481857235581
Epoch 5: w1 = -2.22920552377513, w2 = -0.09163256127358085
Epoch 6: w1 = -1.9362675156207105, w2 = -0.04494498658095136
Epoch 7: w1 = -1.6781768574274967, w2 = -0.020814229601575286
Epoch 8: w1 = -1.4493498477990567, w2 = -0.009035585595074872
Epoch 9: w1 = -1.245881993508816, w2 = -0.0036459054729884493
Epoch 10: w1 = -1.0649030085077544, w2 = -0.001353509894550125
Epoch 11: w1 = -0.9042022597717996, w2 = -0.00045644443087383853
Epoch 12: w1 = -0.7619964948529877, w2 = -0.00013756292811056234
Epoch 13: w1 = -0.6367784991349714, w2 = -3.626010194868877e-05
Epoch 14: w1 = -0.5272152373016313, w2 = -8.113374556116915e-06
Epoch 15: w1 = -0.43207850492177147, w2 = -1.4747341183766382e-06
Epoch 16: w1 = -0.3501985066951054, w2 = -2.0278399084030003e-07
Epoch 

## Adam

In [13]:
def adam(W, V, S, t, dw, beta, ep, lr):
    beta1, beta2 = beta
    V = beta1 * V + (1 - beta1) * dw
    S = beta2 * S + (1 - beta2) * dw**2
    
    V_corr = V / (1 - beta1**t)
    S_corr = S / (1 - beta2**t)

    W = W - lr * (V_corr / np.sqrt(S_corr) + ep)
    return W, V, S


def train_p4(optimizer, beta, ep, lr, epochs):
    W = np.array([-5, -2], dtype=np.float32)
    V = np.array([0, 0], dtype=np.float32)
    S = np.array([0, 0], dtype=np.float32)
    results = [W.tolist()]
    
    for epoch in range(epochs):
        # Compute gradient
        dw = df_w(W)
        
        # Update W using the provided optimization function
        t = epoch + 1
        W, V, S = optimizer(W, V, S, t, dw, beta, ep, lr)

        results.append(W.tolist())
        
        print(f"Epoch {epoch + 1}: w1 = {W[0]}, w2 = {W[1]}")
    return results


results = train_p4(adam, beta=[0.9, 0.999], ep=1e-6, lr=0.2, epochs=30)

Epoch 1: w1 = -4.8000002, w2 = -1.8000002
Epoch 2: w1 = -4.600254775652752, w2 = -1.6008248541864185
Epoch 3: w1 = -4.400948465362033, w2 = -1.4031731381331956
Epoch 4: w1 = -4.202277614312448, w2 = -1.2078789048955492
Epoch 5: w1 = -4.004450290938027, w2 = -1.0159282878015319
Epoch 6: w1 = -3.8076863242991874, w2 = -0.8284740581959538
Epoch 7: w1 = -3.61221724703128, w2 = -0.6468427212355607
Epoch 8: w1 = -3.41828612684455, w2 = -0.47252891000982167
Epoch 9: w1 = -3.2261472680014704, w2 = -0.307170711028778
Epoch 10: w1 = -3.0360657628979464, w2 = -0.15250000808216777
Epoch 11: w1 = -2.848316872982822, w2 = -0.010264778912287581
Epoch 12: w1 = -2.663185217944753, w2 = 0.11787395890527219
Epoch 13: w1 = -2.4809637525779293, w2 = 0.23046003291351314
Epoch 14: w1 = -2.301952512221825, w2 = 0.32635712988700927
Epoch 15: w1 = -2.1264571103667507, w2 = 0.4048404051336537
Epoch 16: w1 = -1.9547869761167105, w2 = 0.46564811939562323
Epoch 17: w1 = -1.787253324847357, w2 = 0.5089865667782412
E