In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os

# 載入 CSV
df = pd.read_csv("../online_0616d24.csv")
df = df.dropna(subset=['Quantity'])

# 數值與類別欄位
num_cols = df.select_dtypes(include=['int64', 'float64']).columns.drop('Quantity')
cat_cols = df.select_dtypes(include=['object', 'category']).columns

# 補值
for col in num_cols:
    df[col] = df[col].fillna(df[col].median())
for col in cat_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

# One-hot encoding
df_encoded = pd.get_dummies(df, columns=cat_cols, drop_first=True)

# 特徵與目標變數
X = df_encoded.drop(columns=['Quantity'])
y = df_encoded['Quantity']


In [None]:

# Base models
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
base_models = [('RandomForest', rf_model), ('GradientBoosting', gb_model)]

# Grid Search
param_grid = {'fit_intercept': [True, False]}
grid_search = GridSearchCV(LinearRegression(), param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)
best_params = grid_search.best_params_
best_mse = -grid_search.best_score_

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
refined_meta_model = LinearRegression(**best_params)
stacked_model = StackingRegressor(estimators=base_models, final_estimator=refined_meta_model)
stacked_model.fit(X_train, y_train)

# Save model
model_path = "stacked_model.joblib"
joblib.dump(stacked_model, model_path)
print(f"✅ 模型已儲存至 {model_path}")


In [None]:

# 載入模型
model_path = "stacked_model.joblib"
loaded_model = joblib.load(model_path)

# 預測
stacked_predictions = loaded_model.predict(X)
df_result = X.copy()
df_result["Actual_Quantity"] = y
df_result["Predicted_Quantity"] = stacked_predictions

# 匯出結果
output_path = "0620Stacking.csv"
df_result.to_csv(output_path, index=False)

print(f"✅ 已使用儲存模型預測完成，結果儲存於：{output_path}")
