# Gradient Descent
This is an optimization algorithm

In [1]:
import numpy as np
import scipy as sc
import IPython.display as display
import ipywidgets as widgets
import bqplot.pyplot as plt
from bqplot import *
from ipywidgets import *

In [2]:
n = 100
X = np.random.randn(n,1) * 10
ones = np.ones((n,1))
X = np.concatenate((X, ones), axis=1)
theta = np.array([[2],[3]])
y = np.dot(X, theta) + np.random.randn(n,1)*3

In [3]:
def cost(y_hat, y):
    n = y.shape[0]
    err = y_hat - y
    cost = np.dot(err.T, err) / n
    return cost

In [4]:
def gradient_descent(X, y, lr=1e-4, epochs=100):
    theta_hat = np.zeros((X.shape[1],1))
    loss = list()
    lst_theta_hat = list()
    lst_theta_hat.append(theta_hat.copy())
    for i in range(epochs):
        y_hat = np.dot(X, theta_hat)
        err = y_hat - y
        grad = np.dot(X.T, err).mean(axis=1).reshape(theta_hat.shape)
        theta_hat -= lr * grad
        lst_theta_hat.append(theta_hat.copy())
        loss.append((i+1, cost(y_hat, y).item()))
    
    return (loss, lst_theta_hat)

In [5]:
(loss, lst_theta_hat) = gradient_descent(X, y, epochs=700, lr=1e-5)
print(lst_theta_hat[-1])

[[1.9949709 ]
 [1.59485801]]


In [6]:
X_0 = np.linspace(-30,30,100).reshape((n,1))
X_0 = np.concatenate((X_0, np.ones((X_0.shape[0],1))), axis=1)
y_0 = np.dot(X_0, lst_theta_hat[-1])
learning_curve = np.array(loss)
plt.figure(figsize=(10,7))
plt.plot(learning_curve[:,0].flatten(), learning_curve[:,1].flatten(), colors=['orange'])
plt.show()

VBox(children=(Figure(axes=[Axis(scale=LinearScale()), Axis(orientation='vertical', scale=LinearScale())], fig…

In [7]:
time_interval = 500
y_0 = np.dot(X_0, lst_theta_hat[0])

x_sc = LinearScale()
y_sc = LinearScale()

ax_x = Axis(label='X', scale=x_sc, grid_lines='solid')
ax_y = Axis(label='y', scale=y_sc, orientation='vertical', side='left', grid_lines='solid')

fig = plt.figure(animation_duration=time_interval)
scatter = plt.scatter(x=X[:,0].flatten(), y=y.flatten(), visible=True, label="Data points")
reg_line = plt.plot(x=X_0[:,0].flatten(), y=y_0.flatten(), colors=['red'], label="Regression line")

plt.ylim(-70,70)
slider = IntSlider(min=0, max=len(lst_theta_hat)/10, step=1, description="Epochs x 10", value=0)
play = Play(min=0, max=len(lst_theta_hat)/10, interval=time_interval)

def slider_update(change):
    theta = lst_theta_hat[slider.value * 10]
    plt.title('[%.2f , %.2f]'.format(theta[0,0], theta[0,1]))
    reg_line.y = np.dot(X_0, theta).flatten()

slider.observe(slider_update, 'value')

In [8]:
jslink((play, 'value'), (slider, 'value'))
VBox([HBox([play, slider]), fig])

VBox(children=(HBox(children=(Play(value=0, interval=500, max=70), IntSlider(value=0, description='Epochs x 10…