In [2]:
import numpy as np

np.random.seed(666)
X = np.random.random(size=(1000,10))
true_theta = np.arange(1, 12, dtype = float)
X_b = np.hstack([np.ones((len(X), 1)), X])
y = X_b.dot(true_theta) + np.random.normal(size=1000)

In [3]:
X_b[0, :]


array([1.        , 0.70043712, 0.84418664, 0.67651434, 0.72785806,
       0.95145796, 0.0127032 , 0.4135877 , 0.04881279, 0.09992856,
       0.50806631])

In [4]:
print(X_b.shape)
print(y.shape)
print(true_theta)


(1000, 11)
(1000,)
[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]


In [24]:
def J(theta, X_b, y):
    try:
        return np.sum((y - X_b.dot(theta))**2) / len(X_b)
    except:
        return float('inf')

def dJ_math(theta, X_b, y):
    return X_b.T.dot(X_b.dot(theta)-y) * 2/len(X_b)

def dJ_debug(theta, X_b, y, epsilon=1e-6):
    res = np.empty(len(theta))
    for i in range(len(theta)):
        theta_copy1 = theta.copy()
        theta_copy2 = theta.copy()
        theta_copy1[i] = theta_copy1[i] + epsilon
        theta_copy2[i] = theta_copy2[i] - epsilon
        res[i] = (J(theta_copy1, X_b, y) - J(theta_copy2, X_b, y) )/(2*epsilon)
    return res

def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters = 1e4, epsilon=1e-8):
    theta = initial_theta
    iter_cnt = 0

    while iter_cnt < n_iters:
        gradient = dJ(theta, X_b, y)
        last_theta = theta
        theta = theta - eta * gradient
        if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
            break
        iter_cnt += 1

    return theta

In [25]:
X_b = np.hstack([np.ones((len(X), 1)), X])
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01

%time theta = gradient_descent(dJ_debug, X_b, y, initial_theta, eta)
theta

CPU times: user 3.12 s, sys: 5.51 ms, total: 3.13 s
Wall time: 3.13 s


array([ 1.1251597 ,  2.05312521,  2.91522497,  4.11895968,  5.05002117,
        5.90494046,  6.97383745,  8.00088367,  8.86213468,  9.98608331,
       10.90529198])

In [26]:
%time theta = gradient_descent(dJ_math, X_b, y, initial_theta, eta)
theta



CPU times: user 3.83 s, sys: 18.2 ms, total: 3.85 s
Wall time: 483 ms


array([ 1.1251597 ,  2.05312521,  2.91522497,  4.11895968,  5.05002117,
        5.90494046,  6.97383745,  8.00088367,  8.86213468,  9.98608331,
       10.90529198])