In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
np.random.seed(1)

m = 100

x1 = 50 + 30 * np.random.rand(m, 1)
y = 135 + 0.5 * x1 + 3 * np.random.randn(m,1)

In [4]:
from sklearn.preprocessing import StandardScaler

scal = StandardScaler()
x1_scal = scal.fit_transform(x1)

x1_scal[:5]

array([[-0.23388437],
       [ 0.79634961],
       [-1.65000333],
       [-0.62345239],
       [-1.15190299]])

In [5]:
scal.mean_, scal.scale_

(array([64.57633783]), array([8.83204675]))

In [6]:
from sklearn.linear_model import SGDRegressor

sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None, eta0=.1)
sgd_reg.fit(x1_scal,y.ravel())

SGDRegressor(eta0=0.1, penalty=None)

In [7]:
sgd_reg.intercept_, sgd_reg.coef_

(array([167.59478302]), array([4.29447724]))

In [8]:
x1_new = [[50], [80]]
x1_new_scal = scal.transform(x1_new)

In [9]:
sgd_reg.predict(x1_new_scal)

array([160.50721286, 175.09435451])

In [10]:
#批次梯度下降法
X = np.c_[np.ones((m,1)), x1_scal]

In [12]:
n_iterations = 1000
eta = 0.1

theta = np.random.randn(2,1)

for t in range(n_iterations):
    gd = 2/m * X.T.dot(X.dot(theta) - y)
    theta = theta - eta* gd

In [13]:
theta

array([[167.53977684],
       [  4.13774774]])

In [14]:
#隨機梯度下降法
def learning_schedule(t, t0, t1):
    return t0 / (t+t1)

In [18]:
n_epochs = 50

theta = np.random.randint(m)

for epoch in range(n_epochs):
    for i in range(m):
        
        ind = np.random.randint(m)
        xi = X[ind:ind+1].T
        yi = y[ind:ind+1]
        
        eta = learning_schedule(epoch * m + i, 5, 50)
        
        gd = 2* xi.dot(xi.T.dot(theta) - yi)
        theta = theta - eta * gd

In [19]:
theta

array([[167.5660274 , 167.5660274 ],
       [  4.29021666,   4.29021666]])

In [20]:
#小批次梯度下降法

In [23]:
n_epochs = 50
minibatch_size = 20

theta = np.random.randn(2,1)

t=0
for epoch in range(n_epochs):
    
    ind = np.random.permutation(m)
    X_shuffled = X[ind]
    y_shuffled = y[ind]
    
    for i in range(0, m, minibatch_size):
        
        Xi = X_shuffled[i:i+minibatch_size]
        yi = y_shuffled[i:i+minibatch_size]
        
        t+=1
        eta = learning_schedule(t, 5, 50)
        
        gd = 2 / minibatch_size * Xi.T.dot(Xi.dot(theta) - yi)
        theta = theta - eta * gd

In [24]:
theta

array([[167.54010656],
       [  4.13680857]])