In [15]:
import numpy as np

$$loss=\sum_{i}{(w*x_i+b-y_i)^2}$$

In [16]:
def compute_error_for_line_given_points(b, w, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # ((w * x + b) - y) ** 2 就是上面公式单个损失值
        totalError += ((w * x + b) - y) ** 2
    return totalError / float(len(points))

$$
w^{'}=w-lr*\frac{\partial{loss}}{\partial{w}} \\
\frac{\partial{loss}}{\partial{w}}=2*\sum_{i}{(w*x_i+b-y_i)*x_i} \\
b^{'}=b-lr*\frac{\partial{loss}}{\partial{b}} \\
\frac{\partial{loss}}{\partial{b}}=2*\sum_{i}{(w*x_i+b-y_i)}
$$

In [17]:
def step_gradient(b_current, w_current, points, learning_rate):
    b_gradient = 0
    w_gradient = 0
    N = float(len(points))
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += (2/N) * ((w_current * x + b_current) - y)
        w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
    new_w = w_current - (w_gradient * learning_rate)
    new_b = b_current - (b_gradient * learning_rate)
    return [new_b, new_w]

In [18]:
def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
    b = starting_b
    w = starting_w
    for i in range(num_iterations):
        b, w = step_gradient(b, w, points, learning_rate)
    return [b, w]

In [19]:
def run():
    points = np.genfromtxt("data.csv", delimiter=",")
    learning_rate = 0.001
    initial_b = 0
    initial_w = 0
    num_iterations = 1000
    print("b = {0}, w = {1}, error = {2}"
          .format(initial_b, initial_w, compute_error_for_line_given_points(initial_b, initial_w, points)))
    print("Running....")
    [b, w] = gradient_descent_runner(points, initial_b, initial_w, learning_rate, num_iterations)
    print("b = {0}, w = {1}, error = {2}"
          .format(b, w, compute_error_for_line_given_points(b, w, points)))

In [20]:
if __name__ == '__main__':
    run()

b = 0, w = 0, error = 5565.107834483211
Running....
b = nan, w = nan, error = nan


  w_gradient += (2/N) * x * ((w_current * x + b_current) - y)
  new_w = w_current - w_gradient * learning_rate
