### 1. 线性关系

$ f(x)=2x+1 $

$ f(x, y) = 3x + 6y -8 $

$ y = w_1x_1 + w_2x_2 + ... + w_nx_n + b $

### 2. 读取数据

In [1]:
import pandas as pd

In [2]:
"""
    原始数据读取
"""
data = pd.read_csv(filepath_or_buffer="boston_house_prices.csv", skiprows=1)
data = data.to_numpy()
X = data[:, :-1]
y = data[:, -1]

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [5]:
"""
    预处理
        - 标准化
        - 规范化
        - norm
        - standard
"""
mu = X_train.mean(axis=0)
sigma = X_train.std(axis=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

In [6]:
y_train

array([26.7, 21.7, 22. , 22.9, 10.4, 21.9, 20.6, 26.4, 41.3, 17.2, 27.1,
       20.4, 16.5, 24.4,  8.4, 23. ,  9.7, 50. , 30.5, 12.3, 19.4, 21.2,
       20.3, 18.8, 33.4, 18.5, 19.6, 33.2, 13.1,  7.5, 13.6, 17.4,  8.4,
       35.4, 24. , 13.4, 26.2,  7.2, 13.1, 24.5, 37.2, 25. , 24.1, 16.6,
       32.9, 36.2, 11. ,  7.2, 22.8, 28.7, 14.4, 24.4, 18.1, 22.5, 20.5,
       15.2, 17.4, 13.6,  8.7, 18.2, 35.4, 31.7, 33. , 22.2, 20.4, 23.9,
       25. , 12.7, 29.1, 12. , 17.7, 27. , 20.6, 10.2, 17.5, 19.7, 29.8,
       20.5, 14.9, 10.9, 19.5, 22.7, 19.5, 24.6, 25. , 24.5, 50. , 14.3,
       11.8, 31. , 28.7, 16.2, 43.5, 25. , 22. , 19.9, 22.1, 46. , 22.9,
       20.2, 43.1, 34.6, 13.8, 24.3, 21.5, 24.4, 21.2, 23.8, 26.6, 25.1,
        9.6, 19.4, 19.4,  9.5, 14. , 26.5, 13.8, 34.7, 16.3, 21.7, 17.5,
       15.6, 20.9, 21.7, 12.7, 18.5, 23.7, 19.3, 12.7, 21.6, 23.2, 29.6,
       21.2, 23.8, 17.1, 22. , 36.5, 18.8, 21.9, 23.1, 20.2, 17.4, 37. ,
       24.1, 36.2, 15.7, 32.2, 13.5, 17.9, 13.3, 11

$ price = w_1*feature_1 + w_2 * feature_2 + ... + w_{13} * feature_{13} + b $

### 3. 使用 sklearn 解决问题

In [7]:
from sklearn.linear_model import LinearRegression

In [8]:
# 假设函数
lr = LinearRegression()

In [9]:
# 寻找和固定 w 和 b 的过程
lr.fit(X=X_train, y=y_train)

In [10]:
"""
    13 weight 
    1 bias
"""
# weight
lr.coef_

array([-0.97082019,  1.05714873,  0.03831099,  0.59450642, -1.8551476 ,
        2.57321942, -0.08761547, -2.88094259,  2.11224542, -1.87533131,
       -2.29276735,  0.71817947, -3.59245482])

In [11]:
# bias
lr.intercept_

np.float64(22.611881188118836)

In [12]:
# lr.predict(X=X_test)

### 4. 深度学习

In [13]:
import torch

In [14]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((404, 13), (102, 13), (404,), (102,))

In [15]:
"""
    定义参数
    13: weight
    1: bias
"""
# 权重
w = torch.randn(13, 1, requires_grad=True)
# 偏置
b = torch.randn(1, 1, requires_grad=True)
w, b

(tensor([[ 2.5554],
         [ 0.3881],
         [ 0.4914],
         [-0.7114],
         [-0.2376],
         [-1.0044],
         [-0.8060],
         [-0.5342],
         [-1.0483],
         [ 0.9585],
         [ 1.2376],
         [ 0.8014],
         [-2.8253]], requires_grad=True),
 tensor([[-1.1696]], requires_grad=True))

In [16]:
print(w.shape, w.ndim)
print(b.shape, b.ndim)
# wdaxi = torch.tensor(data=w.T, dtype=torch.float32)
# bdaxi = torch.tensor(data=b.T, dtype=torch.float32)
# print(wdaxi.shape)
# print(bdaxi.shape)

torch.Size([13, 1]) 2
torch.Size([1, 1]) 2


In [17]:
"""
    定义模型:定义线性回归的处理逻辑
"""
def model(x):
    return x @ w + b

In [18]:
"""
    转换数据 转成张量
    reshape(-1, 1) : 行自动计算，然后列变成1 -> 也就是转成一个列向量
"""
X_train = torch.tensor(data=X_train, dtype=torch.float32)
y_train = torch.tensor(data=y_train.reshape(-1, 1), dtype=torch.float32)

In [19]:
# 查看转换后的shape
X_train.shape, y_train.shape

(torch.Size([404, 13]), torch.Size([404, 1]))

In [20]:
# 开始优化，第一步设置步数和学习率
steps = 10001
learning_rate = 1e-2

In [21]:
for step in range(steps):
    # 1-1，正向传播 - 计算y_pred
    y_pred = model(X_train)
    # 1-2, 正向传播 - 计算损失 mse
    mes_loss = ((y_train - y_pred) ** 2).mean()
    # 2, 反向传播
    mes_loss.backward()
    # 3, 优化一步
    w.data -= learning_rate * w.grad
    b.data -= learning_rate * b.grad
    # 4, 清空梯度
    w.grad.zero_()
    b.grad.zero_()
    # loss是个张量，item()函数可以将里面的单数字拎出来
    if step % 1000 == 0:
        # 张量.item()  一个张量里只有一个数字的时候用这个方法，可以把唯一的数字提取出来
        print(f'第{step+1}步的 mes:{mes_loss.item()}')

IndentationError: unexpected indent (1885044906.py, line 17)

In [None]:
# 感觉训练的差不多了，就看下参数w
w

In [None]:
# 也看看参数b
b

In [None]:
# 训练好的模型
model

In [None]:
X_test.shape, y_test.shape

### 推理过程

In [None]:
# 自己写一个推测函数
def predict(X_test):
    # 真实的结果 转张量
    X_test = torch.tensor(data=X_test, dtype=torch.float32)

    # 预测, torch.no_grad()可以节省一些资源,临时禁止梯度计算
    with torch.no_grad():
        # y_pred就是预测的结果
        y_pred = model(X_test)

    # 先把y_pred展开成向量，再转numpy，后面好做评估时使用
    y_pred = y_pred.view(-1).numpy()
    # y_pred = y_pred.numpy()
    return y_pred

In [None]:
# 推理
y_pred = predict(X_test)

In [None]:
type(y_pred), type(X_test), type(y_test)

In [None]:
# 评估MES
((y_pred - y_test) ** 2).mean()