<a href="https://colab.research.google.com/github/sidhu2690/ai-from-scratch/blob/main/00_optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
m, c = 2.0, 3.0

X = [i for i in range(1, 1000)]
Y = [m * i + c for i in X]

In [11]:
def fx(w, b, x):
  return w * b + x

def gradient_w(w, b, x, y):
  return (fx(w, b, x) - y) * x

def gradient_b(w, b, x, y):
  return (fx(w, b, x) - y)

In [22]:
def adgrad():
  w, b = 0.0, 0.0
  eta = 5e-4
  epochs = 1000
  vw, vb, eps = 0.0, 0.0, 1e-8
  n = len(X)

  for epoch in range(epochs):
    dw, db = 0.0, 0.0
    for x, y in zip(X, Y):
      dw += gradient_w(w, b, x, y)
      db += gradient_b(w, b, x, y)

    dw /= n
    db /= n

    vw += dw ** 2
    vb += db ** 2

    w -= eta * dw * 1/((vw + eps)**(1/2))
    b -= eta * db * 1/((vb + eps)**(1/2))

  return w, b

In [23]:
w_pred, b_pred = adgrad()

In [24]:
y_pred = [w_pred * i + b_pred for i in X]


def mse(y_pred, Y):
  mse_vals = [(y_pred[i] - Y[i])**2 for i in range(len(y_pred))]
  return sum(mse_vals) / len(mse_vals)

mse(y_pred, Y)

1297659.9934906815

You can tune these parameters to obtain better loss values.




# RMSProp

In [31]:
def rmsprop():
  w, b = 0.0, 0.0
  eta = 1e-5
  epochs = 1000
  vw, vb, eps, beta = 0.0, 0.0, 1e-8, 0.7
  n = len(X)

  for epoch in range(epochs):
    dw, db = 0.0, 0.0
    for x, y in zip(X, Y):
      dw += gradient_w(w, b, x, y)
      db += gradient_b(w, b, x, y)
    vw = beta * vw + (1 - beta) * dw ** 2
    vb = beta * vb + (1 - beta) * db ** 2

    vw /= n
    vb /= n

    w -= eta * dw * 1 / ((vw + eps)**(1/2))
    b -= eta * db * 1 / ((vb + eps)**(1/2))
  return w, b

In [32]:
w_pred, b_pred = rmsprop()

In [33]:
y_pred = [w_pred * i + b_pred for i in X]


def mse(y_pred, Y):
  mse_vals = [(y_pred[i] - Y[i])**2 for i in range(len(y_pred))]
  return sum(mse_vals) / len(mse_vals)

mse(y_pred, Y)

678226.3996048633

You can tune these parameters to obtain better loss values.




# Adam

In [52]:
def adam():
  w, b = 0.0, 0.0
  eta = 1e-4
  epochs = 10
  vw, vb, mw, mb = 0.0, 0.0, 0.0, 0.0
  beta1, beta2, eps = 0.9, 0.999, 1e-8
  n = len(X)

  for t in range(1, epochs + 1):
    dw, db = 0.0, 0.0
    for x, y in zip(X, Y):
      dw += gradient_w(w, b, x, y)
      db += gradient_b(w, b, x, y)

    dw /= n
    db /= n

    mw = beta1 * mw + (1 - beta1) * dw
    mb = beta1 * mb + (1 - beta1) * db

    vw = beta2 * vw + (1 - beta2) * dw ** 2
    vb = beta2 * vb + (1 - beta2) * db ** 2

    mw_hat = mw / (1 - (beta1**t))
    mb_hat = mb / (1 - (beta1**t))

    vw_hat = vw / (1 - (beta2**t))
    vb_hat = vb / (1 - (beta2**t))

    w -= eta * mw_hat / ((vw_hat + eps)**(1/2))
    b -= eta * mb_hat / ((vb_hat + eps)**(1/2))

  return w, b

In [53]:
w_pred, b_pred = adam()

In [54]:
y_pred = [w_pred * i + b_pred for i in X]


def mse(y_pred, Y):
  mse_vals = [(y_pred[i] - Y[i])**2 for i in range(len(y_pred))]
  return sum(mse_vals) / len(mse_vals)

mse(y_pred, Y)

57158.86535762008

You can tune these parameters to obtain better loss values.


