### 1. 线性回归模型的构建与训练

In [None]:
import pandas as pd
import numpy as np
# 加载数据集
dataset = pd.read_csv('dataset.csv')
# 取出每个样本的特征
X = np.array(dataset['X'])
# 取出每个样本的标签
y = np.array(dataset['y'])

In [None]:
# 训练集数据
X_train = X[0: 30]
y_train = y[0: 30]
n_train = len(X_train)
# 测试集数据
X_test = X[30:]
y_test = y[30:]
n_test = len(X_test)

In [None]:
# 把模型的参数w与b分别随机初始化为-0.3与0.6。
w = -0.3
b = 0.6
# 指定学习率的值
lr = 0.001
# 指定模型使用梯度下降算法迭代更新参数的次数
epochs = 5000
# 构建线性回归模型
def model(x):
    y_hat = w * x + b
    return y_hat

In [None]:
for epoch in range(epochs):
    # sum_w与sum_b用于存储计算梯度时相加的值
    sum_w = 0.0
    sum_b = 0.0
    # 求取参数w与b的梯度值
    for i in range(n_train):
        xi = X_train[i]
        yi = y_train[i]
        yi_hat = model(xi)
        sum_w += (yi_hat - yi) * xi
        sum_b += (yi_hat - yi)
    # grad_w与grad_b分别为参数w, b的梯度值
    grad_w = (2.0 / n_train) * sum_w
    grad_b = (2.0 / n_train) * sum_b
    # 使用梯度下降算法更新模型参数
    w = w - lr * grad_w
    b = b - lr * grad_b

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']
%matplotlib inline
def plots(w, b, X, y):
    fig, ax = plt.subplots()
    # 画出数据集中的样本
    ax.scatter(X, y)
    # 画出线性回归模型的图像
    ax.plot([i for i in range(0, 20)], 
            [model(i) for i in range(0, 20)])
    plt.legend(('模型', '数据'), 
               loc='upper left', 
               prop={'size': 15})
    plt.title("线性回归模型", fontsize=15)
    plt.show()
plots(w, b, X, y)

In [None]:
def loss_funtion(X, y):
    total_loss = 0
    # 数据集中样本的个数
    n_samples = len(X)
    # 依次取出每一个数据中的每一个样本
    for i in range(n_samples):
        xi = X[i]
        yi = y[i]
        # 使用模型根据样本特征值进行预测
        yi_hat = model(xi)
        # 计算模型预测值与标签值之间的差距值
        total_loss += (yi_hat - yi) ** 2 
        # 计算出对于给定数据集，模型预测的平均误差
    avg_loss = (1 / n_samples) * total_loss
    return avg_loss

In [None]:
train_loss = loss_funtion(X_train, y_train)
test_loss = loss_funtion(X_test, y_test)

### 2. 复杂线性回归模型的构建

In [None]:
import numpy as np
# 把模型的参数w与b进行随机初始化
w = np.random.rand(2)
b = 1.1
# 指定学习率的值
lr = 1e-6
# 指定模型使用梯度下降算法迭代更新参数的次数
epochs = 50000
# 构建复杂线性回归模型
def model(x):
    y_hat = w[0]*x + w[1]*(x**2) + b
    return y_hat
# 使用梯度下降算法更新模型参数
for epoch in range(epochs):
    sum_w = np.zeros(2)
    sum_b = 0.0
    for i in range(n_train):
        xi = X_train[i]
        yi = y_train[i]
        yi_hat = model(xi)
        sum_w[0] += (yi_hat - yi) * xi
        sum_w[1] += (yi_hat - yi) * (xi**2)
        sum_b += (yi_hat - yi)
    grad_w = (2.0 / n_train) * sum_w
    grad_b = (2.0 / n_train) * sum_b
    w = w - lr * grad_w
    b = b - lr * grad_b

In [None]:
plots(w, b, X, y)

In [None]:
train_loss = loss_funtion(X_train, y_train)
test_loss = loss_funtion(X_test, y_test)

### 3.	使用正则项防止过拟合

In [None]:
import numpy as np
w = np.random.rand(2)
b = 1.1
lr = 1e-6
epochs = 10000000
# 定义线性回归模型
def model(x):
    y_hat = w[0]*x + w[1]*(x**2) + b
    return y_hat

In [None]:
# 指定正则项中lambda的值
reg = 10000
for epoch in range(epochs):
    sum_w = np.zeros(2)
    sum_b = 0.0
    for i in range(n_train):
        xi = X_train[i]
        yi = y_train[i]
        yi_hat = model(xi)
        sum_w[0] += (yi_hat - yi) * xi
        sum_w[1] += (yi_hat - yi) * (xi**2)
        sum_b += (yi_hat - yi)
    # 正则项在梯度下降算法中的应用
    grad_w = (2.0 / n_train) * sum_w + (2.0 * reg * w)
    grad_b = (2.0 / n_train) * sum_b
    w = w - lr * grad_w
    b = b - lr * grad_b

In [None]:
train_loss = loss_funtion(X_train, y_train)
test_loss = loss_funtion(X_test, y_test)