# 矩阵梯度下降

In [1]:
import numpy as np

In [2]:
def dj(theta,x_b,y):
    return x_b.T.dot(x_b.dot(theta)-y)*2/len(y)

In [3]:
def score(x_test,y_test,theta):
    x_b=np.hstack([np.ones((len(x_test),1)),x_test])
    pre=x_b.dot(theta)
    R1=np.sum((y_test-y_test.mean())**2)
    R2=np.sum((y_test-pre)**2)
    return 1-R2/R1

In [4]:
def gradient_descent(x_train,y,theta,eta,iteration=1000):
    x_b=np.hstack([np.ones((len(x_train),1)),x_train])
    for i in range(iteration):
        theta=theta-eta*dj(theta,x_b,y)
    return theta

In [5]:
from sklearn import datasets

In [6]:
boston =datasets.load_boston()
x=boston.data
y=boston.target
x=x[y<50.0]
y=y[y<50.0]

In [7]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=666)

In [8]:
theta=np.zeros(x_train.shape[1]+1)
gradient_descent(x_train,y_train,theta,eta=0.000001,iteration=1000)

array([ 0.00144176, -0.01797504,  0.07080046, -0.01306467,  0.00012196,
        0.00045943,  0.01443104, -0.00410262,  0.00779672, -0.0035714 ,
        0.00274019,  0.01480819,  0.05471852, -0.03105318])

In [9]:
theta=gradient_descent(x_train,y_train,theta,eta=0.000001,iteration=10000)
score(x_test,y_test,theta)

0.30334322896870713

# 归一化

In [10]:
from sklearn.preprocessing import StandardScaler

In [11]:
standardScaler=StandardScaler()
standardScaler.fit(x_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [12]:
x_train_standard=standardScaler.transform(x_train)

In [13]:
theta=gradient_descent(x_train_standard,y_train,theta,eta=0.01,iteration=1000)
score(x_train_standard,y_train,theta)

0.7658815825354975

In [14]:
x_test_standard=standardScaler.transform(x_test)
theta=gradient_descent(x_train_standard,y_train,theta,eta=0.01,iteration=1000)
score(x_test_standard,y_test,theta)

0.8005300562334523

# 梯度下降的优势

In [15]:
m=1000
n=5000

big_x=np.random.normal(size=(m,n))
theta=np.random.uniform(0.0,100.0,size=n+1)
big_y=big_x.dot(theta[1:])+theta[0]+np.random.normal(0,10,size=m)

In [16]:
%%time 
gradient_descent(big_x,big_y,theta,eta=0.01,iteration=1000)

Wall time: 7.92 s


array([ 7.43067799, 12.57910423, 52.40626715, ..., 83.76394577,
       85.2399958 , 56.00662159])