# 多变量线性回归
使用线性回归预测房屋价格，样本数据在ex1data2.txt，第一列为房屋面积，单位平方英尺，第二列为卧室数量，第三列为房屋价格


In [None]:
import single_variable_regression as svr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

if __name__ == "__main__":
    path = "C:/Users/l30072207/Documents/ml/1/ex1data2.txt"
    data = pd.read_csv(path, header=None, names=['Aera', 'NumOfBedrooms', 'Price']) # header=None:CSV 文件没有表头（第一行就是数据）
    print(data.head())

## 特征归一化
房屋面积数值是卧室数量的1000倍，需要缩放到同一尺度，以加快梯度下降算法收敛

In [None]:
def featureNormalization(X):
    X = (X - X.mean()) / X.std()
    print(X.head())
    return X

if __name__ == "__main__":
    cols = data.shape[1]
    X = data.iloc[:, :-1]
    y = data.iloc[:, cols-1:cols]
    X = featureNormalization(X)

## 梯度下降

In [None]:
if __name__ == "__main__":
    X = X.to_numpy()
    y = y.to_numpy()
    print(f"X:维度{X.shape}\n{X[:5, :]}")
    print(f"y:维度{y.shape}\n{y[:5]}")

    # X添加theta_0对应列
    ones = np.ones((len(X),1))
    X = np.hstack((ones, X))

    initial_theta = np.zeros((X.shape[1], 1), dtype=np.float32)
    print(f"theta:维度{initial_theta.shape}\n{initial_theta}")

    alphas = np.array([0.01, 0.03, 0.1, 0.3])
    iters = 50

    plt.figure(figsize=(12,8))

    for alpha in alphas:
        current_theta = initial_theta.copy() # 每次循环都使用初始 theta，防止上一个 alpha 的结果干扰
        theta, cost = svr.gradientDescent(X, y, current_theta, alpha, iters)
        plt.plot(range(len(cost)), cost, linewidth=2, label=f"alpha={alpha}")
    
    plt.title("Convergence of Cost Function for different Alphas")
    plt.xlabel("Iterations")
    plt.ylabel("Cost J")
    plt.legend() # 显示不同 alpha 的标签
    plt.grid(True)
    plt.show() # 最后统一显示