In [28]:
import numpy as np

In [39]:
np.random.seed(666)
x = np.random.random(size=(1000,10))
theta = np.arange(1,12,dtype = float)
x.shape

(1000, 10)

In [40]:
x_b =  np.column_stack([np.ones((len(x),1)),x])
y = x_b.dot(theta) + np.random.normal(size=1000)
x.shape
y.shape
theta

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [41]:
def J(theta,x_b,y):
    try:
        return np.sum((y - x_b.dot(theta))**2)/len(y)
    except:
        return float('inf')

In [42]:
def DJ(theta,x_b,y):
    return x_b.T.dot(x_b.dot(theta)-y)*2./len(y) 

In [43]:
def gradient_descent(x_b,y,initial_theta,eta,n_iters = 1e4,epsilon = 1e-6):
    theta = initial_theta
    cur_iter = 0
    while cur_iter<n_iters:
        gradient = DJ(theta,x_b,y)
        last_theta = theta
        theta = theta-eta*gradient
        if (abs(J(theta,x_b,y)-J(last_theta,x_b,y))<epsilon):
            break
        cur_iter += 1
    return theta

In [46]:
initial_theta = np.zeros(x_b.shape[1])
eta = 0.01
%%time theta1 = gradient_descent(x_b = x_b,y = y,initial_theta = initial_theta,eta = eta)

UsageError: Line magic function `%%time` not found.


In [47]:
theta1

array([ 1.2352741 ,  2.03861875,  2.89098666,  4.09302554,  5.02861985,
        5.88616601,  6.94975899,  7.9832856 ,  8.836721  ,  9.96669651,
       10.88187589])

In [79]:
def DJ_debug(theta,x_b,y,epsilon = 0.01):
    res = np.empty(len(theta))
    for i in range(len(theta)):              
        theta1 = theta.copy()
        theta1[i] += epsilon 
        theta2 = theta.copy()
        theta2[i] -= epsilon
        res[i] = (J(theta1,x_b,y)-J(theta2,x_b,y))/(2*epsilon)
    return res


In [83]:
def gradient_descent(x_b,y,initial_theta,eta,n_iters = 1e4,epsilon = 1e-6):
    theta = initial_theta
    cur_iter = 0
    while cur_iter<n_iters:
        gradient = DJ_debug(theta,x_b,y)
        last_theta = theta
        theta = theta-eta*gradient
        if (abs(J(theta,x_b,y)-J(last_theta,x_b,y))<epsilon):
            break
        cur_iter += 1
    return theta

In [84]:
%%time
initial_theta = np.zeros(x_b.shape[1])
eta = 0.01
theta1 = gradient_descent(x_b = x_b,y = y,initial_theta = initial_theta,eta = eta)

[-67.00674934 -34.88852187 -34.12158402 -34.12599755 -34.25458534
 -34.37153116 -33.57002731 -34.93556353 -35.45595111 -35.18706586
 -35.82788229]
[-62.19581606 -32.35262041 -31.64623335 -31.65886145 -31.80139151
 -31.91930133 -31.18467103 -32.4661753  -32.95977165 -32.72569832
 -33.33268773]
[-57.72826486 -29.99780014 -29.34763368 -29.36788518 -29.52330551
 -29.64209906 -28.96955598 -30.17300216 -30.64169418 -30.43992319
 -31.01547349]
[-53.57958761 -27.81113666 -27.21316967 -27.24049559 -27.40782938
 -27.52743217 -26.91253158 -28.04346719 -28.48900718 -28.31720811
 -28.86353682]
[-49.72702542 -25.780628   -25.23112638 -25.26501692 -25.4433572
 -25.56370005 -25.00231448 -26.06589116 -26.48990641 -26.3459152
 -26.86508163]
[-46.14944375 -23.89512881 -23.39062498 -23.4306067  -23.61911139
 -23.74013005 -23.22842666 -24.22942837 -24.63343009 -24.51523718
 -25.00915372]
[-42.82721651 -22.14428921 -21.68156311 -21.72719636 -21.92508358
 -22.04671842 -21.58113793 -22.52400725 -22.90939885 -

In [85]:
theta1

array([ 1.2352741 ,  2.03861875,  2.89098666,  4.09302554,  5.02861985,
        5.88616601,  6.94975899,  7.9832856 ,  8.836721  ,  9.96669651,
       10.88187589])