In [35]:
import numpy as np
from sklearn.linear_model import LinearRegression, SGDRegressor
import plotly.express as px
import plotly.graph_objects as go

### Normal Equation

In [2]:
x = 2 * np.random.rand(100,1)
y = 4 + 3 * x + np.random.randn(100,1)
print(type(x), x.shape, y.shape)

<class 'numpy.ndarray'> (100, 1) (100, 1)


In [3]:
fig = px.scatter(x=x[:,0], y=y[:,0])
fig.show()

In [4]:
x_b = np.c_[np.ones((100,1)), x]
theta_best = np.linalg.inv(x_b.T.dot(x_b)).dot(x_b.T).dot(y)
theta_best

array([[4.27515369],
       [2.92584104]])

In [5]:
x_new = np.array([[0], [2]])
x_new_b = np.c_[np.ones((2,1)), x_new]
y_pred = x_new_b.dot(theta_best)
y_pred

array([[ 4.27515369],
       [10.12683578]])

In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=x[:,0], y=y[:,0], mode="markers", name="Data"))
fig.add_trace(go.Scatter(x=x_new[:,0], y=y_pred[:,0], mode="lines", name="Prediction"))
fig.show()

In [7]:
# from scikit learn
lin_reg = LinearRegression()
lin_reg.fit(x,y)
print(lin_reg.intercept_, lin_reg.coef_)
print(lin_reg.predict(x_new))

[4.27515369] [[2.92584104]]
[[ 4.27515369]
 [10.12683578]]


### Gradient Descent

In [8]:
eta = 0.1
n_iterations = 1000
m = 100

theta = np.random.randn(2,1)

for iteration in range(n_iterations):
    gradients = 2/m * x_b.T.dot(x_b.dot(theta) - y)
    theta = theta - eta * gradients

### Stochastic GD

In [10]:
x_b.shape

(100, 2)

In [12]:
theta_path_sgd = []
m = len(x_b)
n_epochs = 50
t0, t1 = 5, 50

In [36]:
def learning_schedule(t):
    return t0 / (t + t1)

theta = np.random.randn(2,1)
fig = go.Figure()

for epoch in range(n_epochs):
    for i in range(m):
        if epoch == 0 and i < 20:
            y_pred = x_new_b.dot(theta)
            style_line = dict(color='blue', dash='solid') if i > 0 else dict(color='red', dash='dash')
            fig.add_trace(go.Scatter(x=x_new[:,0], y=y_pred[:,0], mode='lines', line=style_line))
        random_idx = np.random.randint(m)
        xi = x_b[random_idx:random_idx+1]
        yi = y[random_idx:random_idx+1]
        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
        eta = learning_schedule(epoch * m + i)
        theta = theta - eta * gradients
        theta_path_sgd.append(theta)

fig.add_trace(go.Scatter(x=x[:,0], y=y[:,0], mode='markers', name='Actual Data'))
fig.update_layout(xaxis_title='x', yaxis_title='y', title='First 20 steps of SGD')
fig.show()


In [31]:
theta

array([[4.22502323],
       [2.91297223]])

In [38]:
# using SGDRegressor
sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=0.1)
sgd_reg.fit(x, y.ravel())
print(sgd_reg.intercept_, sgd_reg.coef_)

[4.23987239] [2.88968802]


### Mini-batch GD