In [9]:
import os
import math
import pickle
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn

from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
import xgboost
from itertools import combinations
from torch.utils.data import DataLoader, TensorDataset

from utils.weather_api import WeatherApi
from utils.common_function import splitData
from enums.enums import Model, Date, Data, Rmse
from sklearn.metrics import accuracy_score
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge
import itertools

weatherApi = WeatherApi();
area = 'Billings_MT'
# Swanton_OH
X, y = weatherApi.get_weather_data_from_excel(area)
X_train, X_test, y_train, y_test = splitData(X, y, 365)

# MinMaxScaler 적용
scaler = MinMaxScaler()
if 'date' in X_train.columns:
    X_train = X_train.drop(columns=['date'])
    X_test = X_test.drop(columns=['date'])
if 'date' in y_train.columns[0]:
    y_train = y_train.drop(columns=[y_train.columns[0]])
    y_test = y_test.drop(columns=[y_test.columns[0]])

# MinMaxScaler 적용
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

date_range = pd.date_range(start='2023-08-01', end='2024-07-30')
date_df = pd.DataFrame(date_range, columns=['date'])


In [10]:
with open(f'result_model_fold/{area}/MLP_model_with_{area}.pkl', 'rb') as f:
    mlp = pickle.load(f)
mlp_model = mlp[Model.MODEL]

with open(f'result_model_fold/{area}/RF_model_with_{area}.pkl', 'rb') as f:
    rf = pickle.load(f)
rf_model = rf[Model.MODEL]

with open(f'result_model_fold/{area}/ExtraTree_model_with_{area}.pkl', 'rb') as f:
    extra = pickle.load(f)
extra_model = extra[Model.MODEL]

with open(f'result_model_fold/{area}/DecisionTree_model_with_{area}.pkl', 'rb') as f:
    decision = pickle.load(f)
decision_model = decision[Model.MODEL]

with open(f'result_model_fold/{area}/GradientBoosting_model_with_{area}.pkl', 'rb') as f:
    gradient = pickle.load(f)
gradient_model = gradient[Model.MODEL]

with open(f'result_model_fold/{area}/Xgboost_model_with_{area}.pkl', 'rb') as f:
    xgboost_ = pickle.load(f)
xgboost_model = xgboost_[Model.MODEL]

In [165]:
# from sklearn.ensemble import StackingRegressor
# from sklearn.linear_model import Ridge

# # 기본 모델 정의
# estimators = [
#     ('random_forest', rf_model),
#     ('extra', extra_model),
#     ('mlp', mlp_model),
#     ('decision', decision_model),
#     ('xgboost', xgboost_model)
# ]

# # 메타 모델로 Stacking 사용
# stacking_regressor = StackingRegressor(
#     estimators=estimators,
#     final_estimator=Ridge()
# )

# multi_output_staking_reg = MultiOutputRegressor(stacking_regressor)

# # 모델 학습
# multi_output_staking_reg.fit(X_train_scaled, y_train)

# # 예측 및 평가
# y_pred_stack = multi_output_staking_reg.predict(X_test_scaled)
# rmse_stack = np.sqrt(mean_squared_error(y_test, y_pred_stack))

# data_to_save = {
#     Model.MODEL: multi_output_staking_reg,
#     Data.TRAIN_INPUT_DATA: X_train_scaled,
#     Data.TRAIN_OUTPUT_DATA: X_test_scaled,
#     Data.TEST_INPUT_DATA: X_test_scaled,
#     Data.TEST_OUTPUT_DATA: y_test,
#     Data.PREDICTED_OUTPUT_DATA: y_pred_stack,
#     Rmse.BEST_RMSE: math.sqrt(mean_squared_error(y_pred_stack, y_test)),
#     Date.DATE: date_df,
# }

# path = f'result_model_fold/{area}'
# file_path = f'{path}/Staking_DT_XG_model_with_{area}.pkl'

# os.makedirs(path, exist_ok=True)
# with open(file_path, 'wb') as f:
#     pickle.dump(data_to_save, f)

In [15]:

estimators = [
    ('RF', rf_model),
    ('EXTRA', extra_model),
    ('MLP', mlp_model),
    ('DT', decision_model),
    ('XG', xgboost_model)
]
result = []
for r in range(2, len(estimators) + 1):  # 2개부터 모든 모델 조합까지 생성
    combi = list(itertools.combinations(estimators, r))
    result.extend(combi)
print(len(result))

26


In [11]:
estimators = [
    ('RF', rf_model),
    ('EXTRA', extra_model),
    ('MLP', mlp_model),
    ('DT', decision_model),
    ('XG', xgboost_model)
]
combination_result = []
for r in range(2, len(estimators) + 1):  # 2개부터 모든 모델 조합까지 생성
    combi = list(itertools.combinations(estimators, r))
    combination_result.extend(combi)
# 가능한 모든 조합 생성
for i in range(1, len(estimators) + 1):
    for subset in combination_result:
        # Stacking 모델 생성
        stacking_regressor = StackingRegressor(
            estimators=subset,
            final_estimator=Ridge()
        )
        multi_output_staking_reg = MultiOutputRegressor(stacking_regressor)

        # 모델 학습
        multi_output_staking_reg.fit(X_train_scaled, y_train)

        # 예측 및 평가
        y_pred_stack = multi_output_staking_reg.predict(X_test_scaled)
        rmse_stack = np.sqrt(mean_squared_error(y_test, y_pred_stack))

        data_to_save = {
            Model.MODEL: multi_output_staking_reg,
            Data.TRAIN_INPUT_DATA: X_train_scaled,
            Data.TRAIN_OUTPUT_DATA: X_test_scaled,
            Data.TEST_INPUT_DATA: X_test_scaled,
            Data.TEST_OUTPUT_DATA: y_test,
            Data.PREDICTED_OUTPUT_DATA: y_pred_stack,
            Rmse.BEST_RMSE: math.sqrt(mean_squared_error(y_pred_stack, y_test)),
            Date.DATE: date_df,
        }

        # 조합 이름 생성 (모델 이름을 이어붙임)
        model_names = '_'.join([name for name, _ in subset])

        # 경로 및 파일 이름 설정
        path = f'result_model_fold/{area}'
        file_path = f'{path}/Stacking_{model_names}_model_with_{area}.pkl'

        # 디렉토리 생성 및 파일 저장
        os.makedirs(path, exist_ok=True)
        with open(file_path, 'wb') as f:
            pickle.dump(data_to_save, f)

        print(f'Model saved to {file_path} with RMSE: {rmse_stack}')

InvalidParameterError: The 'estimators' parameter of StackingRegressor must be an instance of 'list'. Got (('RF', RandomForestRegressor(max_depth=15, min_samples_leaf=3, n_estimators=200,
                      random_state=42)),) instead.