In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

In [2]:
import pandas as pd

# 讀取數據
data = pd.read_csv('SPX0607.csv')

# 轉換日期列為日期格式
data['Date'] = pd.to_datetime(data['Date'])

# 設置日期列為索引
data.set_index('Date', inplace=True)

# 顯示數據的前幾行
data.head()


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1990-01-02,353.399994,359.690002,351.980011,359.690002,359.690002,162070000
1990-01-03,359.690002,360.589996,357.890015,358.76001,358.76001,192330000
1990-01-04,358.76001,358.76001,352.890015,355.670013,355.670013,177000000
1990-01-05,355.670013,355.670013,351.350006,352.200012,352.200012,158530000
1990-01-08,352.200012,354.23999,350.540009,353.790009,353.790009,140110000


In [3]:
from sklearn.model_selection import train_test_split

# 選擇特徵和目標變量
features = ['Open', 'High', 'Low', 'Volume']
target = 'Close'

X = data[features]
y = data[target]

# 分割數據集為訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 訓練KNN回歸模型
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)

# 預測
y_pred_knn = knn.predict(X_test)

knn_mse = mean_squared_error(y_test, y_pred_knn)
knn_r2 = r2_score(y_test, y_pred_knn)

# 評估
print("KNN MSE:", mean_squared_error(y_test, y_pred_knn))
print("KNN R²:", r2_score(y_test, y_pred_knn))


KNN MSE: 795600.4076515258
KNN R²: 0.3919230938938941


In [12]:
from sklearn.linear_model import LinearRegression

# 訓練簡單線性回歸模型
lr = LinearRegression()
lr.fit(X_train, y_train)

# 預測
y_pred_lr = lr.predict(X_test)

lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_r2 = r2_score(y_test, y_pred_lr)

# 評估
print("Linear Regression MSE:", mean_squared_error(y_test, y_pred_lr))
print("Linear Regression R²:", r2_score(y_test, y_pred_lr))


Linear Regression MSE: 61.048699766832975
Linear Regression R²: 0.9999533405160191


In [13]:
from sklearn.preprocessing import PolynomialFeatures

# 創建多項式特徵
poly = PolynomialFeatures(degree=3)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# 訓練多項式回歸模型
lr_poly = LinearRegression()
lr_poly.fit(X_train_poly, y_train)

# 預測
y_pred_poly = lr_poly.predict(X_test_poly)

poly_mse = mean_squared_error(y_test, y_pred_poly)
poly_r2 = r2_score(y_test, y_pred_poly)

# 評估
print("Polynomial Regression MSE:", mean_squared_error(y_test, y_pred_poly))
print("Polynomial Regression R²:", r2_score(y_test, y_pred_poly))


Polynomial Regression MSE: 117921.04681922669
Polynomial Regression R²: 0.9098730158694998


In [18]:
from sklearn.ensemble import RandomForestRegressor

# 訓練隨機森林回歸模型
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 預測
y_pred_rf = rf.predict(X_test)

rf_mse = mean_squared_error(y_test, y_pred_rf)
rf_r2 = r2_score(y_test, y_pred_rf)

# 評估
print("Random Forest MSE:", mean_squared_error(y_test, y_pred_rf))
print("Random Forest R²:", r2_score(y_test, y_pred_rf))


Random Forest MSE: 111.71762434405791
Random Forest R²: 0.9999146142878821


In [19]:
from sklearn.tree import DecisionTreeRegressor
# 決策樹回歸
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

dt_mse = mean_squared_error(y_test, y_pred_dt)
dt_r2 = r2_score(y_test, y_pred_dt)

# 評估
print("Decision Tree MSE:", mean_squared_error(y_test, y_pred_dt))
print("Decision Tree R²:", r2_score(y_test, y_pred_dt))


Decision Tree MSE: 188.00575922118878
Decision Tree R²: 0.9998563073129452


In [20]:
from sklearn.ensemble import GradientBoostingRegressor
# 梯度提升回歸
gbr = GradientBoostingRegressor(random_state=42)
gbr.fit(X_train, y_train)

y_pred_gbr = gbr.predict(X_test)

gbr_mse = mean_squared_error(y_test, y_pred_gbr)
gbr_r2 = r2_score(y_test, y_pred_gbr)

# 評估
print("GBR MSE:", mean_squared_error(y_test, y_pred_gbr))
print("GBR R²:", r2_score(y_test, y_pred_gbr))

GBR MSE: 204.35016093796014
GBR R²: 0.9998438152966861


In [21]:
import xgboost as xgb

# XGBoost回歸
xgbr = xgb.XGBRegressor(objective='reg:squarederror', random_state=42, n_estimators=10)  # 減少迭代次數到10
xgbr.fit(X_train, y_train)

y_pred_xgbr = xgbr.predict(X_test)

xgbr_mse = mean_squared_error(y_test, y_pred_xgbr)
xgbr_r2 = r2_score(y_test, y_pred_xgbr)

# 評估
print("XGB MSE:", mean_squared_error(y_test, y_pred_xgbr))
print("XGB R²:", r2_score(y_test, y_pred_xgbr))

XGB MSE: 1389.3949674440703
XGB R²: 0.9989380862741679


In [22]:
results = {
    "Model": ["KNN", "Linear Regression", "Polynomial Regression", "Random Forest", "Decision Tree", "Gradient Boosting", "XGBoost"],
    "MSE": [knn_mse, lr_mse, poly_mse, rf_mse, dt_mse, gbr_mse, xgbr_mse],
    "R2": [knn_r2, lr_r2, poly_r2, rf_r2, dt_r2, gbr_r2, xgbr_r2]
}

In [24]:
results_df = pd.DataFrame(results)

results_df

Unnamed: 0,Model,MSE,R2
0,KNN,795600.407652,0.391923
1,Linear Regression,61.0487,0.999953
2,Polynomial Regression,117921.046819,0.909873
3,Random Forest,111.717624,0.999915
4,Decision Tree,188.005759,0.999856
5,Gradient Boosting,204.350161,0.999844
6,XGBoost,1389.394967,0.998938
