<a href="https://colab.research.google.com/github/roberthsu2003/machine_learning/blob/main/%E5%A4%9A%E5%85%83%E7%B7%9A%E6%80%A7%E8%BF%B4%E6%AD%B8/%E5%AD%B8%E6%A0%A1%E7%A6%8F%E5%88%A9%E7%A4%BE%E7%87%9F%E6%A5%AD%E9%A1%8D%E9%A0%90%E6%B8%AC/multiple_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 多元線性迴歸演算法程式

In [1]:
%%capture
%pip install wget

In [2]:
#下載字型
import wget
wget.download("https://github.com/roberthsu2003/machine_learning/raw/refs/heads/main/source_data/ChineseFont.ttf")

#資料集下載
wget.download("https://raw.githubusercontent.com/roberthsu2003/machine_learning/refs/heads/main/source_data/Welfare_Club_Sales.csv")

'Welfare_Club_Sales.csv'

In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# 載入資料集
df = pd.read_csv("Welfare_Club_Sales.csv")

# 選擇特徵和目標變量
X = df[["星期", "氣溫", "降雨量", "請假人數", "活動日"]]
y = df["營業額"]

# 分割訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. 無正規化版本
model_no_norm = LinearRegression()
model_no_norm.fit(X_train, y_train)
y_pred_no_norm = model_no_norm.predict(X_test)

# 計算評估指標
mse_no_norm = mean_squared_error(y_test, y_pred_no_norm)
rmse_no_norm = np.sqrt(mse_no_norm)
r2_no_norm = r2_score(y_test, y_pred_no_norm)

print("無正規化模型結果：")
print(f"均方誤差 (MSE): {mse_no_norm:.2f}")
print(f"均方根誤差 (RMSE): {rmse_no_norm:.2f}")
print(f"R² 分數: {r2_no_norm:.2f}")
print("模型係數：", model_no_norm.coef_)
print("截距：", model_no_norm.intercept_)
print()

# 2. 有正規化版本（標準化）
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model_norm = LinearRegression()
model_norm.fit(X_train_scaled, y_train)
y_pred_norm = model_norm.predict(X_test_scaled)

# 計算評估指標
mse_norm = mean_squared_error(y_test, y_pred_norm)
rmse_norm = np.sqrt(mse_norm)
r2_norm = r2_score(y_test, y_pred_norm)

print("有正規化模型結果：")
print(f"均方誤差 (MSE): {mse_norm:.2f}")
print(f"均方根誤差 (RMSE): {rmse_norm:.2f}")
print(f"R² 分數: {r2_norm:.2f}")
print("模型係數：", model_norm.coef_)
print("截距：", model_norm.intercept_)

無正規化模型結果：
均方誤差 (MSE): 1121035.44
均方根誤差 (RMSE): 1058.79
R² 分數: 0.86
模型係數： [  43.69982993  210.13515885  -56.82618312  -30.97785296 2795.46813349]
截距： 9892.446146228609

有正規化模型結果：
均方誤差 (MSE): 1121035.44
均方根誤差 (RMSE): 1058.79
R² 分數: 0.86
模型係數： [   61.51823113  1198.58349039  -848.46590164 -1803.61955774
  1113.28177824]
截距： 11399.518133856238
