## 线性回归     

线性回归是机器学习入门知识，应用十分广泛。线性回归利用数理统计中回归分析，来确定两种或两种以上变量间相互依赖的定量关系的，其表达形式为$y = wx+b+e$，$e$为误差，服从均值为0的正态分布。        

首先让我们来确认线性回归的损失函数：
$$
loss = \sum_i^N ({(wx_i+b) - y_i})^2
$$
然后利用 梯度下降法更新参数$\textbf{w}$和$\textbf{b}$来最小化损失函数，最终学得$\textbf{w}$和$\textbf{b}$的数值

In [1]:
import torch as t
import numpy as np
import matplotlib.pyplot as plt
from IPython import display

In [2]:
# 计算总loss
def compute_totalError(w, b, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (w * x + b)) ** 2
    return totalError / float(len(points))


# 计算每一步更新的梯度
def step_gradient(cur_w, cur_b, points, lr):
    cur_w = 0
    cur_b = 0
    N = float(len(points))
    
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        w_grad = (2/N) * ((cur_w*x + cur_b) - y) * x
        b_grad = (2/N) * ((cur_w*x + cur_b) - y)
    new_w = w - lr * w_grad
    new_b = b - lr * b_grad    
    return [new_w, new_b]
  

def gd_runner(points, start_w, start_b, lr, num_iters):
    w = start_w
    b = start_b
    
    for i in range(int(num_iters)):
        w, b = step_gradient(w, b, np.array(points), lr)
    # 返回最后一组 w,b    
    return [w, b]

In [3]:
def run():
    points = np.genfromtxt('data.csv', delimiter=',')
    lr = 0.001
    initial_w = 0
    initial_b = 0
    num_iters = 1000
    
    print('Starting gradient descent at w = {0}, b = {1}, error = {2}'
          .format(initial_w, initial_b, compute_totalError(initial_w, initial_b, points)))
    
    print('Running...')
    
    [w, b] = gd_runner(points, initial_w, initial_b, lr, num_iters)
    print('After {0} iterations, w = {1}, b = {2}, error={3}'
         .format(num_iters, w, b, compute_totalError(w, b, points)))

    
if __name__ == '__main__':
    run()

Starting gradient descent at w = 0, b = 0, error = 5565.107834483211
Running...


NameError: name 'w' is not defined