### 关于梯度的调试


![image.png](http://upload-images.jianshu.io/upload_images/5760375-9b4bd2c6238cf6e1.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)


求取梯度,除了通过求出导数获得,还可以通过导数的定义获得.

通过导数定义获得的式子,运算速度较慢,所以用于debug

In [8]:
import numpy as np
import matplotlib.pyplot as plt

In [9]:
np.random.seed(666)
X = np.random.random(size = (1000,10))

In [21]:
true_theta = np.arange(1,12,dtype = 'float')

In [22]:
X_b = np.hstack([np.ones((len(X),1)),X])
y = X_b.dot(true_theta) + np.random.normal(size = 1000)

In [24]:
X_b.shape

(1000, 11)

In [25]:
y.shape

(1000,)

In [26]:
def J(theta,X_b,y):
	try:
		return np.sum((y - X_b.dot(theta))**2) / len(X_b)
	except:
		return float('inf')

In [29]:
def dJ_math(theta,X_b,y):
	return X_b.T.dot(X_b.dot(theta) - y ) *2./len(y)

In [54]:
def dJ_debug(theta,X_b,y,epsilon = 0.01):
	#最后结果是一个J的梯度.. 先开一个存储梯度的数组.
	res = np.empty(len(theta))
	for i in range(len(theta)):
		theta_1 = theta.copy()
		theta_1[i] += epsilon
		theta_2  = theta.copy()
		theta_2[i] -= epsilon
		res[i] = (J(theta_1,X_b,y) - J(theta_2,X_b,y))/(2*epsilon)
	return res

In [55]:
def gradient_descent(dJ,X_b,y,initial_theta,eta,n_iters = 1e4,epsilon = 1e-8):

	theta = initial_theta
	#梯度下降当前的次数
	i_iter = 0 
	#由于theta是矩阵,注释掉
	#theta_history.append(initial_theta)

	while i_iter < n_iters:
		gradient = dJ(theta,X_b,y)
		last_theta = theta
		theta = theta - eta * gradient
		#由于theta是矩阵,注释掉
		#theta_history.append(theta)

		if(abs(J(theta,X_b,y)- J(last_theta,X_b,y)) < epsilon):
			break

		i_iter += 1

	return theta

In [56]:
X_b = np.hstack([np.ones((len(X),1)),X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01

% time gradient_descent(dJ_math,X_b,y,initial_theta,eta)

CPU times: user 1.47 s, sys: 17.7 ms, total: 1.49 s
Wall time: 768 ms


array([  1.10626998,   1.84238151,   3.01548087,   4.03193423,
         4.97307523,   6.09961984,   7.0143187 ,   7.92700623,
         9.03024753,  10.02123504,  10.96469276])

In [57]:
% time gradient_descent(dJ_debug,X_b,y,initial_theta,eta)

CPU times: user 10 s, sys: 152 ms, total: 10.2 s
Wall time: 5.28 s


array([  1.10626998,   1.84238151,   3.01548087,   4.03193423,
         4.97307523,   6.09961984,   7.0143187 ,   7.92700623,
         9.03024753,  10.02123504,  10.96469276])