# 模型例子

In [7]:
# 導入所需的庫
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

"""資料收集和整理"""
# 生成虛構數據
np.random.seed(0)
X = np.random.normal(0, 1, (100, 5))  # 假設有100個樣本和5個特徵
y = (
    3 + 2 * X[:, 0] - 4 * X[:, 1] + np.random.normal(0, 2, 100)
)  # y是基於X的線性組合加上噪音

# 將數據轉換為DataFrame
df = pd.DataFrame(X, columns=["X1", "X2", "X3", "X4", "X5"])
df["y"] = y

df

Unnamed: 0,X1,X2,X3,X4,X5,y
0,1.764052,0.400157,0.978738,2.240893,1.867558,5.692941
1,-0.977278,0.950088,-0.151357,-0.103219,0.410599,-2.823394
2,0.144044,1.454274,0.761038,0.121675,0.443863,-0.336313
3,0.333674,1.494079,-0.205158,0.313068,-0.854096,-2.777399
4,-2.552990,0.653619,0.864436,-0.742165,2.269755,-5.415355
...,...,...,...,...,...,...
95,0.994394,1.319137,-0.882419,1.128594,0.496001,0.470712
96,0.771406,1.029439,-0.908763,-0.424318,0.862596,-0.515009
97,-2.655619,1.513328,0.553132,-0.045704,0.220508,-8.798013
98,-1.029935,-0.349943,1.100284,1.298022,2.696224,0.479590


In [None]:
"""資料切分"""

# 將數據分成訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(
    df[["X1", "X2", "X3", "X4", "X5"]], df["y"], test_size=0.2, random_state=42
)

"""建立和訓練模型"""
# 將自變數轉換為多項式特徵
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.transform(X_test)

# 建立線性回歸模型並訓練
model = LinearRegression()
model.fit(X_train_poly, y_train)

"""模型評估"""
# 使用測試集進行預測
y_pred = model.predict(X_test_poly)

# 計算性能指標
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean squared error: {mse:.2f}")
print(f"Coefficient of determination (R^2): {r2:.2f}")

In [None]:
# '''結果呈現'''
# # 可以選擇繪製一些圖形來展示預測結果和實際結果的關係，例如，針對一個自變數:
# plt.scatter(X_test['X1'], y_test, color='black', label='Actual data')
# plt.scatter(X_test['X1'], y_pred, color='blue', marker='^', label='Predicted data')
# plt.xlabel('X1')
# plt.ylabel('y')
# plt.title('Polynomial Regression Results')
# plt.legend()
# plt.show()

## 保存模型

In [None]:
from joblib import dump, load

"""保存模型"""
# 將訓練好的模型保存到檔案系統
dump(model, "polynomial_regression_model.joblib")  # 保存模型
dump(poly_features, "poly_features.joblib")  # 保存多項式特徵轉換器

"""載入模型"""
# 當需要時，可以從檔案系統載入模型
model_loaded = load("polynomial_regression_model.joblib")  # 載入模型
poly_features_loaded = load("poly_features.joblib")  # 載入多項式特徵轉換器

# 現在，您可以使用載入的模型和特徵轉換器對新數據進行預測

## 輸出模型係數

In [6]:
# 輸出多項式特徵的係數
"""
coef 常數項是零
model 是你的模型名稱
"""
coef = model.coef_
intercept = model.intercept_
# print("係數:", coef)
# print("截距:", intercept)

"""
變數名稱
"""
# 嘗試使用 get_feature_names_out 方法
feature_names = poly_features.get_feature_names_out(
    input_features=["X1", "X2", "X3", "X4", "X5"]
)
# print(feature_names)
"""
輸出完整係數和特徵
"""
import pandas as pd
import os

# 創建包含係數和截距的DataFrame
coef_data = {
    "係數": [coef[i] for i in range(len(coef)) if coef[i] != 0],
    "特徵": [feature_names[i] for i in range(len(coef)) if coef[i] != 0],
}
coef_df = pd.DataFrame(coef_data)

coef_df = pd.concat(
    [pd.DataFrame({"係數": [intercept], "特徵": ["截距"]}), coef_df], ignore_index=True
)

# 創建包含特徵名稱的DataFrame
feature_names_df = pd.DataFrame({"特徵名稱": feature_names})

# 輸出表格
print("係數和截距表：")
print(coef_df)
print("\n特徵名稱表：")
print(feature_names_df)

# 將 DataFrame 存入 Excel
coef_df.to_excel("coefficients.xlsx", index=False)

# 使用 os 模組打開 Excel 檔案
os.system("start coefficients.xlsx")

係數和截距表：
          係數     特徵
0   2.809905     截距
1   2.195060     X1
2  -3.792877     X2
3   0.493124     X3
4   0.353789     X4
5  -0.483488     X5
6   0.074973   X1^2
7  -0.160956  X1 X2
8   0.211305  X1 X3
9   0.236894  X1 X4
10  0.299475  X1 X5
11 -0.149328   X2^2
12  0.139880  X2 X3
13  0.311204  X2 X4
14  0.186997  X2 X5
15 -0.060091   X3^2
16  0.429524  X3 X4
17  0.358142  X3 X5
18 -0.013570   X4^2
19 -0.832465  X4 X5
20  0.256892   X5^2

特徵名稱表：
     特徵名稱
0       1
1      X1
2      X2
3      X3
4      X4
5      X5
6    X1^2
7   X1 X2
8   X1 X3
9   X1 X4
10  X1 X5
11   X2^2
12  X2 X3
13  X2 X4
14  X2 X5
15   X3^2
16  X3 X4
17  X3 X5
18   X4^2
19  X4 X5
20   X5^2


0

In [2]:
"""輸出模型係數"""

import pandas as pd
import os


def output_coefficients(model, poly_features, feature_names):
    """
    model：你的模型物件，通常是訓練好的機器學習模型，例如線性回歸模型、支持向量機模型等。

    poly_features：多項式特徵物件，這通常是一個 PolynomialFeatures 物件，用於產生多項式特徵。

    feature_names：特徵名稱列表，這是一個包含特徵名稱的字串列表，用於識別每個特徵
    """
    coef = model.coef_
    intercept = model.intercept_

    # 創建包含係數和特徵名稱的DataFrame
    coef_data = {
        "係數": [coef[i] for i in range(len(coef)) if coef[i] != 0],
        "特徵": [feature_names[i] for i in range(len(coef)) if coef[i] != 0],
    }
    coef_df = pd.DataFrame(coef_data)
    coef_df = pd.concat(
        [pd.DataFrame({"係數": [intercept], "特徵": ["截距"]}), coef_df],
        ignore_index=True,
    )

    # 創建包含特徵名稱的DataFrame
    feature_names_df = pd.DataFrame({"特徵名稱": feature_names})

    # 輸出表格
    print("係數和截距表：")
    print(coef_df)
    print("\n特徵名稱表：")
    print(feature_names_df)

    # 將 DataFrame 存入 Excel
    coef_df.to_excel("coefficients.xlsx", index=False)

    # 使用 os 模組打開 Excel 檔案
    os.system("start coefficients.xlsx")

In [3]:
# 輸出係數和截距
output_coefficients(
    model,
    poly_features,
    poly_features.get_feature_names_out(input_features=["X1", "X2", "X3", "X4", "X5"]),
)

係數和截距表：
          係數     特徵
0   2.809905     截距
1   2.195060     X1
2  -3.792877     X2
3   0.493124     X3
4   0.353789     X4
5  -0.483488     X5
6   0.074973   X1^2
7  -0.160956  X1 X2
8   0.211305  X1 X3
9   0.236894  X1 X4
10  0.299475  X1 X5
11 -0.149328   X2^2
12  0.139880  X2 X3
13  0.311204  X2 X4
14  0.186997  X2 X5
15 -0.060091   X3^2
16  0.429524  X3 X4
17  0.358142  X3 X5
18 -0.013570   X4^2
19 -0.832465  X4 X5
20  0.256892   X5^2

特徵名稱表：
     特徵名稱
0       1
1      X1
2      X2
3      X3
4      X4
5      X5
6    X1^2
7   X1 X2
8   X1 X3
9   X1 X4
10  X1 X5
11   X2^2
12  X2 X3
13  X2 X4
14  X2 X5
15   X3^2
16  X3 X4
17  X3 X5
18   X4^2
19  X4 X5
20   X5^2


In [4]:
"""用來驗證"""

import pandas as pd
import os

# 假設 X_test 和 y_test 分別是您的測試特徵和實際值
# 使用模型進行預測
X_test_poly = poly_features.transform(X_test)  # 記得轉換測試數據
y_pred = model.predict(X_test_poly)

# 創建一個 DataFrame 來保存實際值和預測值
results_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})

# 創建一個 DataFrame 來保存係數
coef_df = pd.DataFrame({"Feature": feature_names, "Coefficient": model.coef_})

# 使用 Pandas 將 DataFrame 導出到 Excel
with pd.ExcelWriter("model_results.xlsx") as writer:
    results_df.to_excel(writer, sheet_name="Predictions", index=False)
    coef_df.to_excel(writer, sheet_name="Coefficients", index=False)

    # 創建包含訓練集和測試集的 DataFrame
    train_df = pd.concat([X_train, y_train], axis=1)
    test_df = pd.concat([X_test, y_test], axis=1)

    # 將訓練集存入 Excel
    train_df.to_excel(writer, sheet_name="訓練集", index=False)

    # 將測試集存入 Excel
    test_df.to_excel(writer, sheet_name="測試集", index=False)

# 使用 os 模組打開 Excel 檔案
os.system("start model_results.xlsx")

0