In [45]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GroupKFold, GridSearchCV
from sklearn.metrics import mean_squared_error
import holidays
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
import optuna
from sklearn.model_selection import cross_val_score, TimeSeriesSplit
import xgboost as xgb
from statsmodels.tsa.filters.hp_filter import hpfilter

In [46]:
data = pd.read_csv("train_temporary_dtw.csv")

In [47]:
data

Unnamed: 0,TM,branch_ID,TA,WD,WS,RN_DAY,RN_HR1,HM,SI,ta_chi,heat_demand
0,2021010101,A,-10.1,78.3,0.5,0.0,0.0,68.2,0.0,-8.2,281.0
1,2021010102,A,-10.2,71.9,0.6,0.0,0.0,69.9,0.0,-8.6,262.0
2,2021010103,A,-10.0,360.0,0.0,0.0,0.0,69.2,0.0,-8.8,266.0
3,2021010104,A,-9.3,155.9,0.5,0.0,0.0,65.0,0.0,-8.9,285.0
4,2021010105,A,-9.0,74.3,1.9,0.0,0.0,63.5,0.0,-9.2,283.0
...,...,...,...,...,...,...,...,...,...,...,...
499296,2023123119,S,3.2,233.5,0.4,2.5,0.0,91.5,0.0,2.8,34.0
499297,2023123120,S,2.9,227.4,0.1,2.5,0.0,92.1,0.0,2.7,35.0
499298,2023123121,S,2.1,360.0,0.0,2.5,0.0,93.3,0.0,1.4,35.0
499299,2023123122,S,2.2,30.0,1.4,2.5,0.0,95.5,0.0,1.3,40.0


### 파생변수 생성
1. 평일,주말
2. 공휴일

In [48]:
# 요일 추출 (0=월요일, 6=일요일)
data['TM'] = pd.to_datetime(data['TM'], format='%Y%m%d%H')
data['year'] = data['TM'].dt.year
data['month'] = data['TM'].dt.month
data['day'] = data['TM'].dt.day
data['hour'] = data['TM'].dt.hour

data['weekday'] = data['TM'].dt.weekday

# 평일/주말 구분 (0=평일, 1=주말)
data['is_weekend'] = (data['weekday'] >= 5).astype(int)

# 한국 공휴일 설정
korea_holidays = holidays.Korea(years=range(2021, 2025))

# 공휴일 여부 (1=공휴일, 0=평일)
data['is_holiday'] = data['TM'].dt.date.apply(lambda x: x in korea_holidays).astype(int)

data.drop("weekday",axis=1, inplace=True)
data.head()

Unnamed: 0,TM,branch_ID,TA,WD,WS,RN_DAY,RN_HR1,HM,SI,ta_chi,heat_demand,year,month,day,hour,is_weekend,is_holiday
0,2021-01-01 01:00:00,A,-10.1,78.3,0.5,0.0,0.0,68.2,0.0,-8.2,281.0,2021,1,1,1,0,1
1,2021-01-01 02:00:00,A,-10.2,71.9,0.6,0.0,0.0,69.9,0.0,-8.6,262.0,2021,1,1,2,0,1
2,2021-01-01 03:00:00,A,-10.0,360.0,0.0,0.0,0.0,69.2,0.0,-8.8,266.0,2021,1,1,3,0,1
3,2021-01-01 04:00:00,A,-9.3,155.9,0.5,0.0,0.0,65.0,0.0,-8.9,285.0,2021,1,1,4,0,1
4,2021-01-01 05:00:00,A,-9.0,74.3,1.9,0.0,0.0,63.5,0.0,-9.2,283.0,2021,1,1,5,0,1


In [49]:
def add_fourier_features(df, col, period, order=3, prefix=None):
    """
    Append 2*order Fourier terms for a cyclical column.

    Parameters
    ----------
    df      : pd.DataFrame              (original data)
    col     : str                       (column holding integers 0-(P-1) or 1-P)
    period  : int                       (cycle length, e.g. 24 for hour, 12 for month)
    order   : int, default 3            (# harmonics K)
    prefix  : str or None               (prefix for new columns; defaults to `col`)
    """
    prefix = prefix or col
    out = df.copy()
    x = out[col].astype(float)

    for k in range(1, order + 1):
        angle = 2.0 * np.pi * k * x / period
        out[f"{prefix}_sin{k}"] = np.sin(angle)
        out[f"{prefix}_cos{k}"] = np.cos(angle)
    return out

# Hour
data = add_fourier_features(data, col='hour', period=24, order=2, prefix='hour')

# Month
data = add_fourier_features(data, col='month', period=12, order=2, prefix='month')



data['days_in_month'] = data['TM'].dt.days_in_month

def fourier_dom(df, order=3, day_col='day', period_col='days_in_month', prefix='dom'):
    out = df.copy()
    d   = out[day_col].astype(float)
    P   = out[period_col].astype(float)
    for k in range(1, order + 1):
        angle = 2 * np.pi * k * d / P
        out[f'{prefix}_sin{k}'] = np.sin(angle)
        out[f'{prefix}_cos{k}'] = np.cos(angle)
    return out

data = fourier_dom(data, order=2,prefix='day')

3. 불쾌지수(DI)  
DI = 0.4×(Ta + Tw) + 15  → 일반적인 식  
Ta : 건구온도  
Tw : 습구온도 (없을 시 밑의 식으로)  

DI = 9/5×Ta - 0.55×(1 - RH)×(9/5×Ta - 26) + 32  
RH : 상대습도 (소수 단위)

In [50]:
def calculate_DI(ta, hm):
    hm = hm / 100
    DI = 1.8*ta - 0.55*(1 - hm)*(1.8*ta - 26) +32
    return DI

In [51]:
def DI_level(di):
    if di < 68:
        return 0  # 전원 쾌적
    elif di < 70:
        return 1  # 불쾌감 나타남  
    elif di < 75:
        return 2  # 10% 정도 불쾌
    elif di < 80:
        return 3  # 50% 정도 불쾌
    elif di < 83:
        return 4  # 전원 불쾌
    else:
        return 5  # 매우 불쾌

In [52]:
data['DI'] = calculate_DI(data['TA'], data['HM'])
data['Discomfort_level'] = data['DI'].apply(DI_level)

4. 난방도일 (HDD) 

원래는 일별로 구하지만 그렇게 하면 같은 값이 24개씩 반복되기 때문에 시간별 난방도일로 계산

In [53]:
def create_HDD_features(
    data: pd.DataFrame,
    *,
    branch_col: str = "branch_ID",   # 그룹 구분 열
    temp_col: str   = "TA",          # 시간별 실외 온도
    time_col: str   = "TM",          # 타임스탬프
    base_temp: int  = 18,            # 기준 온도(°C)
    windows: tuple  = (7, 30)        # 누적 일수(예: 7일·30일)
) -> pd.DataFrame:
    df = data.copy()

    df = df.sort_values([branch_col, time_col])

    # 3) 시간별 HDH = (base – TA)^+    (음수면 0으로 잘림)
    df["hourly_HDH"] = (base_temp - df[temp_col]).clip(lower=0)

    # 4) 최근 N일 누적 HDD — 그룹별 rolling 합계
    for days in windows:
        hours = 24 * days                        # 창 크기(시간)
        df[f"rolling_HDD_{days}d"] = (
            df
            .groupby(branch_col, observed=True)["hourly_HDH"]
            .transform(lambda s: s.rolling(window=hours, min_periods=1).sum())
        )

    return df

# 사용
data = create_HDD_features(
    data,            # 원본 DataFrame
    branch_col="branch_ID",
    temp_col="TA",
    time_col="TM",
    base_temp=18,
    windows=(7, 30)  # 7일 및 30일 누적 HDD
)

### HP 필터

In [54]:
# 2) 그룹별 HP 필터 함수
def hp_filter_group(group, column="heat_demand", lam=1600):
    """
    한 그룹(branch_ID)에 대해 HP 필터 적용 후 trend·noise 열을 추가해 돌려줌
    """
    noise, trend     = hpfilter(group[column], lamb=lam)
    group            = group.copy()          # 원본 손상 방지
    group["heat_demand_trend"]   = trend
    group["heat_demand_cycle"]   = noise
    return group

# 3) 정렬(시간 순서) → 그룹별 apply → 결과를 하나로
data = (
    data.sort_values(["branch_ID", "TM"])
      .groupby("branch_ID", group_keys=False)
      .apply(hp_filter_group)   # <- 추가
)

  .apply(hp_filter_group)   # <- 추가


In [55]:
# lag features 생성
def create_lag_features(df, column, lags):
    """
    Create lag features for a specific column in the DataFrame.
    
    Parameters:
    df (DataFrame): Input DataFrame.
    column (str): Column name to create lag features for.
    lags (list): List of lag periods to create features for.
    
    Returns:
    DataFrame: DataFrame with lag features added.
    """
    for lag in lags:
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

lag_dict = {'TA':[1,3,6,12,24], 'HM':[1,2,3], 'WS':[1,3],'SI':[1]}

# grouped by 'branch_ID'

for col, lags in lag_dict.items():
    for lag in lags:
        data[f"{col}_lag_{lag}"] = (
            data
            .groupby("branch_ID")[col]          # 그룹별 시계열
            .transform(lambda s: s.shift(lag))  # 그룹 내부에서만 shift
        )

In [56]:
# 타겟 변수와 피처 분리
target_col = 'heat_demand_trend'
exclude_cols = ['TM', target_col, 'year','month', 'day','hour','Unnamed: 0','branch_ID',"WD","RN_DAY",'days_in_month', 'heat_demand_cycle','heat_demand']
feature_cols = [col for col in data.columns if col not in exclude_cols]

In [57]:
feature_cols

['TA',
 'WS',
 'RN_HR1',
 'HM',
 'SI',
 'ta_chi',
 'is_weekend',
 'is_holiday',
 'hour_sin1',
 'hour_cos1',
 'hour_sin2',
 'hour_cos2',
 'month_sin1',
 'month_cos1',
 'month_sin2',
 'month_cos2',
 'day_sin1',
 'day_cos1',
 'day_sin2',
 'day_cos2',
 'DI',
 'Discomfort_level',
 'hourly_HDH',
 'rolling_HDD_7d',
 'rolling_HDD_30d',
 'TA_lag_1',
 'TA_lag_3',
 'TA_lag_6',
 'TA_lag_12',
 'TA_lag_24',
 'HM_lag_1',
 'HM_lag_2',
 'HM_lag_3',
 'WS_lag_1',
 'WS_lag_3',
 'SI_lag_1']

In [58]:
data.dropna(subset=feature_cols,inplace=True)
len(data)

498845

In [59]:
data['branch_ID'] = data['branch_ID'].astype('category')

print(f"\nTarget 변수: {target_col}")
print(f"사용될 Feature 개수: {len(feature_cols)}")


Target 변수: heat_demand_trend
사용될 Feature 개수: 36


In [60]:
# 연도 컬럼 추가
data['year'] = pd.to_datetime(data['TM']).dt.year

# 바로 분할
train_data = data[data['year'].isin([2021, 2022])]
val_data = data[data['year'] == 2023]

Branch ID별 모델 학습

In [61]:
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import optuna
import joblib

def find_best_params_by_branch(data: pd.DataFrame, feature_cols: list, target_col: str) -> dict:
    best_params_dict = {}
    unique_branches = data['branch_ID'].unique()
    
    # 학습용과 검증용 데이터 분리
    train_data = data[data['year'].isin([2021, 2022])].copy()
    val_data = data[data['year'] == 2023].copy()
    
    for branch_id in unique_branches:
        print(f"\n===== Finding Best Parameters for branch_ID: {branch_id} =====")
        
        # 지사별 데이터 필터링
        branch_train = train_data[train_data['branch_ID'] == branch_id].copy()
        branch_val = val_data[val_data['branch_ID'] == branch_id].copy()
        
        # 시계열 순서대로 정렬
        branch_train = branch_train.sort_values('TM').reset_index(drop=True)
        branch_val = branch_val.sort_values('TM').reset_index(drop=True)
        
        X_train = branch_train[feature_cols]
        y_train = branch_train[target_col]
        X_val = branch_val[feature_cols]
        y_val = branch_val[target_col]
        
        # 시계열 교차검증 설정 (학습 데이터 내에서)
        tscv = TimeSeriesSplit(n_splits=5)
        
        # Optuna objective 함수 (CV 적용)
        def objective(trial):
            params = {
                'n_estimators': trial.suggest_int('n_estimators', 500, 5000, step=100),
                'max_depth': trial.suggest_int('max_depth', 3, 12),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
                'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.7, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
                'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 7),
                'gamma': trial.suggest_float('gamma', 1e-8, 0.5, log=True),
                'random_state': 42,
                'n_jobs': -1,
                'verbosity': 0
            }
            
            # CV 점수들을 저장할 리스트
            cv_scores = []
            
            # TimeSeriesSplit을 실제로 사용하여 CV 수행
            for train_idx, val_idx in tscv.split(X_train):
                X_fold_train, X_fold_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_fold_train, y_fold_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
                
                # 모델 생성 및 학습
                xgb_model = xgb.XGBRegressor(**params)
                xgb_model.fit(X_fold_train, y_fold_train)
                
                # fold별 예측 및 RMSE 계산
                y_pred = xgb_model.predict(X_fold_val)
                rmse = np.sqrt(mean_squared_error(y_fold_val, y_pred))
                cv_scores.append(rmse)
            
            # CV 점수들의 평균을 반환 (최적화 목표)
            return np.mean(cv_scores)
        
        print(f"--- Starting Optuna Hyperparameter Tuning for {branch_id} ---")
        
        # Optuna study 생성 및 최적화
        study = optuna.create_study(direction='minimize', 
                                  sampler=optuna.samplers.TPESampler(seed=42))
        study.optimize(objective, n_trials=50)
        
        # 최적 파라미터로 2023 검증 데이터 평가
        best_params = study.best_params.copy()
        best_params.update({
            'random_state': 42,
            'n_jobs': -1,
            'verbosity': 0
        })
        
        final_model = xgb.XGBRegressor(**best_params)
        final_model.fit(X_train, y_train)  # 2021-2022 전체로 학습
        y_val_pred = final_model.predict(X_val)  # 2023으로 예측
        val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
        
        best_params_dict[branch_id] = study.best_params
        print(f"--- Best Parameters for {branch_id}: {study.best_params} ---")
        print(f"--- Best CV RMSE for {branch_id}: {study.best_value:.4f} ---")
        print(f"--- 2023 Validation RMSE for {branch_id}: {val_rmse:.4f} ---")
    
    return best_params_dict

def train_final_models_with_best_params(data: pd.DataFrame, feature_cols: list, target_col: str, best_params_dict: dict) -> dict:
    final_models = {}
    unique_branches = data['branch_ID'].unique()
    
    # 2021-2023 전체 데이터 (최종 학습용)
    full_train_data = data[data['year'].isin([2021, 2022, 2023])].copy()
    
    for branch_id in unique_branches:
        print(f"\n===== Training Final Model for branch_ID: {branch_id} =====")
        
        # 지사별 데이터 필터링 및 정렬
        branch_data = full_train_data[full_train_data['branch_ID'] == branch_id].copy()
        branch_data = branch_data.sort_values('TM').reset_index(drop=True)
        
        X = branch_data[feature_cols]
        y = branch_data[target_col]
        
        # 해당 지사의 최적 파라미터 가져오기
        best_params = best_params_dict[branch_id].copy()
        best_params.update({
            'random_state': 42,
            'n_jobs': -1,
            'verbosity': 0
        })
        
        # 최종 모델 학습
        final_model = xgb.XGBRegressor(**best_params)
        final_model.fit(X, y)
        
        final_models[branch_id] = final_model
        print(f"--- Final model training completed for {branch_id} ---")
    
    return final_models

def predict_with_final_models(data_pred: pd.DataFrame, models: dict, feature_cols: list) -> pd.DataFrame:
    predictions = []
    
    data_pred_sorted = data_pred.sort_values('branch_ID').copy()
    
    for branch_id, group_data in data_pred_sorted.groupby('branch_ID'):
        print(f"--- Predicting for branch_ID: {branch_id} ---")
        
        if branch_id in models:
            model = models[branch_id]
            X_pred = group_data[feature_cols]
            group_predictions = model.predict(X_pred)
            result_data = group_data.copy()
            result_data['prediction'] = group_predictions
            predictions.append(result_data)
    
    final_data = pd.concat(predictions).sort_index()
    return final_data

In [62]:
# 1단계: 2021-2022 학습, 2023 검증으로 최적 파라미터 찾기
print("=" * 60)
print("STEP 1: Finding Best Parameters using 2021-2022 train, 2023 validation")
print("=" * 60)

best_params_dict = find_best_params_by_branch(data, feature_cols, target_col)

# 최적 파라미터 저장
joblib.dump(best_params_dict, 'best_params_by_branch_modify1_cv.joblib')
print(f"\nBest parameters saved for {len(best_params_dict)} branches")

final_models = train_final_models_with_best_params(
    data         = train_data,  # <─ 2023 제외
    feature_cols = feature_cols,
    target_col   = target_col,
    best_params_dict = best_params_dict
)

# 3) 2023 데이터 예측
pred_2023 = predict_with_final_models(
    data_pred    = val_data,  # <─ 2023만 전달
    models       = final_models,
    feature_cols = feature_cols
)
pred_2023 = pred_2023.to_csv('2023_trend_lag.csv', index=False)

# 2단계: 최적 파라미터로 2021-2023 전체 데이터 재학습
print("\n" + "=" * 60)
print("STEP 2: Training Final Models with 2021-2023 data using best parameters")
print("=" * 60)

final_models = train_final_models_with_best_params(data, feature_cols, target_col, best_params_dict)



# 최종 모델 저장
joblib.dump(final_models, 'final_models_2021_2023_modify1_cv.joblib')
print(f"\nFinal models saved for {len(final_models)} branches")

STEP 1: Finding Best Parameters using 2021-2022 train, 2023 validation


[I 2025-06-27 00:46:31,005] A new study created in memory with name: no-name-1d23100d-f793-4d7e-96e0-f578bccdd054



===== Finding Best Parameters for branch_ID: A =====
--- Starting Optuna Hyperparameter Tuning for A ---


[I 2025-06-27 00:46:49,002] Trial 0 finished with value: 31.225352280040816 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 31.225352280040816.
[I 2025-06-27 00:47:06,639] Trial 1 finished with value: 32.81045098279331 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 31.225352280040816.
[I 2025-06-27 00:47:32,000] Trial 2 finished with value: 25.631287575463944 and 

--- Best Parameters for A: {'n_estimators': 4100, 'max_depth': 3, 'learning_rate': 0.10591208236555662, 'subsample': 0.9978397176218622, 'colsample_bytree': 0.881733032491407, 'colsample_bylevel': 0.9801088707200357, 'reg_alpha': 0.021403720973640575, 'reg_lambda': 0.004560908945358549, 'min_child_weight': 4, 'gamma': 7.569837047377218e-07} ---
--- Best CV RMSE for A: 21.5353 ---
--- 2023 Validation RMSE for A: 13.4579 ---

===== Finding Best Parameters for branch_ID: B =====
--- Starting Optuna Hyperparameter Tuning for B ---


[I 2025-06-27 01:11:24,439] Trial 0 finished with value: 58.67860596237814 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 58.67860596237814.
[I 2025-06-27 01:11:43,980] Trial 1 finished with value: 71.1821964619215 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 58.67860596237814.
[I 2025-06-27 01:12:09,390] Trial 2 finished with value: 53.39601898408527 and param

--- Best Parameters for B: {'n_estimators': 3800, 'max_depth': 3, 'learning_rate': 0.02069366360085304, 'subsample': 0.9230670536703844, 'colsample_bytree': 0.9970867725133342, 'colsample_bylevel': 0.9981590300259489, 'reg_alpha': 0.004632544202786832, 'reg_lambda': 6.56314641974862e-07, 'min_child_weight': 3, 'gamma': 0.0012243579519115896} ---
--- Best CV RMSE for B: 42.7715 ---
--- 2023 Validation RMSE for B: 28.5053 ---

===== Finding Best Parameters for branch_ID: C =====
--- Starting Optuna Hyperparameter Tuning for C ---


[I 2025-06-27 01:39:08,500] Trial 0 finished with value: 55.01291052430944 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 55.01291052430944.
[I 2025-06-27 01:39:29,844] Trial 1 finished with value: 61.1130883040495 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 55.01291052430944.
[I 2025-06-27 01:39:56,843] Trial 2 finished with value: 41.14655960313296 and param

--- Best Parameters for C: {'n_estimators': 1600, 'max_depth': 3, 'learning_rate': 0.033465245544042045, 'subsample': 0.9288476771453233, 'colsample_bytree': 0.9276683661215837, 'colsample_bylevel': 0.9648457702905309, 'reg_alpha': 6.065547864616037e-05, 'reg_lambda': 1.031621903272465e-05, 'min_child_weight': 5, 'gamma': 8.847341833089386e-07} ---
--- Best CV RMSE for C: 33.1256 ---
--- 2023 Validation RMSE for C: 20.5648 ---

===== Finding Best Parameters for branch_ID: D =====
--- Starting Optuna Hyperparameter Tuning for D ---


[I 2025-06-27 02:06:13,854] Trial 0 finished with value: 36.90712232833919 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 36.90712232833919.
[I 2025-06-27 02:06:36,526] Trial 1 finished with value: 44.92464854864805 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 36.90712232833919.
[I 2025-06-27 02:07:05,533] Trial 2 finished with value: 29.503113364430497 and par

--- Best Parameters for D: {'n_estimators': 2100, 'max_depth': 3, 'learning_rate': 0.022532919177937046, 'subsample': 0.8753372257373101, 'colsample_bytree': 0.926157880517346, 'colsample_bylevel': 0.9581822705778523, 'reg_alpha': 1.0238884122213543e-05, 'reg_lambda': 8.05376499156832e-06, 'min_child_weight': 2, 'gamma': 0.13201547308256123} ---
--- Best CV RMSE for D: 25.2410 ---
--- 2023 Validation RMSE for D: 18.4927 ---

===== Finding Best Parameters for branch_ID: E =====
--- Starting Optuna Hyperparameter Tuning for E ---


[I 2025-06-27 02:32:42,009] Trial 0 finished with value: 13.242056678127705 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 13.242056678127705.
[I 2025-06-27 02:33:02,433] Trial 1 finished with value: 13.68042741147789 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 13.242056678127705.
[I 2025-06-27 02:33:29,212] Trial 2 finished with value: 13.211850182527934 and 

--- Best Parameters for E: {'n_estimators': 2500, 'max_depth': 5, 'learning_rate': 0.10741218390105153, 'subsample': 0.8601527664680185, 'colsample_bytree': 0.756859945542268, 'colsample_bylevel': 0.7896498333420651, 'reg_alpha': 0.29027051437679036, 'reg_lambda': 6.288032537662e-07, 'min_child_weight': 5, 'gamma': 2.9754593671336966e-07} ---
--- Best CV RMSE for E: 12.4506 ---
--- 2023 Validation RMSE for E: 8.3861 ---

===== Finding Best Parameters for branch_ID: F =====
--- Starting Optuna Hyperparameter Tuning for F ---


[I 2025-06-27 03:02:27,323] Trial 0 finished with value: 16.14078071962444 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 16.14078071962444.
[I 2025-06-27 03:02:44,873] Trial 1 finished with value: 17.751952750505986 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 16.14078071962444.
[I 2025-06-27 03:03:09,702] Trial 2 finished with value: 13.6410831545472 and para

--- Best Parameters for F: {'n_estimators': 1600, 'max_depth': 3, 'learning_rate': 0.019558409727344354, 'subsample': 0.7536749739252037, 'colsample_bytree': 0.8728611141795591, 'colsample_bylevel': 0.7635467416312933, 'reg_alpha': 1.1938892491787118e-08, 'reg_lambda': 0.5587548705828778, 'min_child_weight': 6, 'gamma': 0.0558336496718691} ---
--- Best CV RMSE for F: 11.4552 ---
--- 2023 Validation RMSE for F: 6.4819 ---

===== Finding Best Parameters for branch_ID: G =====
--- Starting Optuna Hyperparameter Tuning for G ---


[I 2025-06-27 03:19:17,251] Trial 0 finished with value: 47.06328056116128 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 47.06328056116128.
[I 2025-06-27 03:19:38,096] Trial 1 finished with value: 51.39781834222627 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 47.06328056116128.
[I 2025-06-27 03:20:05,375] Trial 2 finished with value: 38.68470388232864 and para

--- Best Parameters for G: {'n_estimators': 2000, 'max_depth': 4, 'learning_rate': 0.1946690636161366, 'subsample': 0.8649792686862511, 'colsample_bytree': 0.8550929277731799, 'colsample_bylevel': 0.9723478276579524, 'reg_alpha': 0.0008834425907962622, 'reg_lambda': 3.5148260710573724e-08, 'min_child_weight': 7, 'gamma': 6.618966349942729e-05} ---
--- Best CV RMSE for G: 34.0077 ---
--- 2023 Validation RMSE for G: 20.6942 ---

===== Finding Best Parameters for branch_ID: H =====
--- Starting Optuna Hyperparameter Tuning for H ---


[I 2025-06-27 03:42:03,796] Trial 0 finished with value: 32.74910992335599 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 32.74910992335599.
[I 2025-06-27 03:42:22,903] Trial 1 finished with value: 37.57846431405707 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 32.74910992335599.
[I 2025-06-27 03:42:48,744] Trial 2 finished with value: 29.01583404650801 and para

--- Best Parameters for H: {'n_estimators': 500, 'max_depth': 4, 'learning_rate': 0.07054952747278552, 'subsample': 0.8219576136159128, 'colsample_bytree': 0.7909278289625393, 'colsample_bylevel': 0.8102039919563991, 'reg_alpha': 0.028718546263656552, 'reg_lambda': 2.1031526138888165e-07, 'min_child_weight': 6, 'gamma': 6.799516618444042e-07} ---
--- Best CV RMSE for H: 26.0218 ---
--- 2023 Validation RMSE for H: 12.4215 ---

===== Finding Best Parameters for branch_ID: I =====
--- Starting Optuna Hyperparameter Tuning for I ---


[I 2025-06-27 03:58:19,381] Trial 0 finished with value: 17.309091399838593 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 17.309091399838593.
[I 2025-06-27 03:58:38,094] Trial 1 finished with value: 18.382806467244244 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 17.309091399838593.
[I 2025-06-27 03:59:05,037] Trial 2 finished with value: 12.68260218303612 and 

--- Best Parameters for I: {'n_estimators': 3400, 'max_depth': 3, 'learning_rate': 0.019519660027055886, 'subsample': 0.9063663521876025, 'colsample_bytree': 0.9873969669335585, 'colsample_bylevel': 0.9341953878249465, 'reg_alpha': 5.45699557942283e-06, 'reg_lambda': 1.157003013524199e-08, 'min_child_weight': 6, 'gamma': 0.0009384893834526256} ---
--- Best CV RMSE for I: 11.2361 ---
--- 2023 Validation RMSE for I: 7.8579 ---

===== Finding Best Parameters for branch_ID: J =====
--- Starting Optuna Hyperparameter Tuning for J ---


[I 2025-06-27 04:26:15,212] Trial 0 finished with value: 20.130898308555633 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 20.130898308555633.
[I 2025-06-27 04:26:35,259] Trial 1 finished with value: 22.284308953452115 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 20.130898308555633.
[I 2025-06-27 04:27:03,236] Trial 2 finished with value: 15.751391756750035 and

--- Best Parameters for J: {'n_estimators': 1700, 'max_depth': 3, 'learning_rate': 0.06689802001564377, 'subsample': 0.7496668844156134, 'colsample_bytree': 0.8709769020074605, 'colsample_bylevel': 0.7980806294097847, 'reg_alpha': 2.6144510196831477e-05, 'reg_lambda': 0.002890090075116087, 'min_child_weight': 6, 'gamma': 0.007678684161241119} ---
--- Best CV RMSE for J: 13.8106 ---
--- 2023 Validation RMSE for J: 7.8400 ---

===== Finding Best Parameters for branch_ID: K =====
--- Starting Optuna Hyperparameter Tuning for K ---


[I 2025-06-27 04:43:53,463] Trial 0 finished with value: 11.616014334219155 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 11.616014334219155.
[I 2025-06-27 04:44:11,222] Trial 1 finished with value: 10.366829295568612 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 1 with value: 10.366829295568612.
[I 2025-06-27 04:44:36,800] Trial 2 finished with value: 11.074795827448709 and

--- Best Parameters for K: {'n_estimators': 3500, 'max_depth': 3, 'learning_rate': 0.06295943555485368, 'subsample': 0.8276435657002376, 'colsample_bytree': 0.8939366709980718, 'colsample_bylevel': 0.7644996707202668, 'reg_alpha': 0.16047611027970154, 'reg_lambda': 0.06710694359398449, 'min_child_weight': 4, 'gamma': 0.07811002077739672} ---
--- Best CV RMSE for K: 8.1237 ---
--- 2023 Validation RMSE for K: 6.2950 ---

===== Finding Best Parameters for branch_ID: L =====
--- Starting Optuna Hyperparameter Tuning for L ---


[I 2025-06-27 05:05:54,966] Trial 0 finished with value: 5.619698042793315 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 5.619698042793315.
[I 2025-06-27 05:06:10,956] Trial 1 finished with value: 7.050111828695547 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 5.619698042793315.
[I 2025-06-27 05:06:37,424] Trial 2 finished with value: 4.78971293809441 and param

--- Best Parameters for L: {'n_estimators': 5000, 'max_depth': 3, 'learning_rate': 0.020410740045759098, 'subsample': 0.9163785941716567, 'colsample_bytree': 0.9356701212224608, 'colsample_bylevel': 0.9992409485442921, 'reg_alpha': 0.006838771272185121, 'reg_lambda': 2.5060764795546727e-06, 'min_child_weight': 2, 'gamma': 0.022134532353658067} ---
--- Best CV RMSE for L: 4.1287 ---
--- 2023 Validation RMSE for L: 2.7779 ---

===== Finding Best Parameters for branch_ID: M =====
--- Starting Optuna Hyperparameter Tuning for M ---


[I 2025-06-27 05:31:37,977] Trial 0 finished with value: 9.463150839694872 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 9.463150839694872.
[I 2025-06-27 05:31:54,830] Trial 1 finished with value: 12.901844454204115 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 9.463150839694872.
[I 2025-06-27 05:32:19,960] Trial 2 finished with value: 8.622240203630927 and par

--- Best Parameters for M: {'n_estimators': 3500, 'max_depth': 3, 'learning_rate': 0.02406123249392824, 'subsample': 0.8591985371669211, 'colsample_bytree': 0.8826874841198813, 'colsample_bylevel': 0.7736602889306025, 'reg_alpha': 0.0006950228700398124, 'reg_lambda': 1.305058323440499e-07, 'min_child_weight': 7, 'gamma': 0.0002370271402528232} ---
--- Best CV RMSE for M: 7.6415 ---
--- 2023 Validation RMSE for M: 3.1108 ---

===== Finding Best Parameters for branch_ID: N =====
--- Starting Optuna Hyperparameter Tuning for N ---


[I 2025-06-27 05:51:54,585] Trial 0 finished with value: 20.762235289526807 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 20.762235289526807.
[I 2025-06-27 05:52:13,615] Trial 1 finished with value: 25.915605614214325 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 20.762235289526807.
[I 2025-06-27 05:52:41,108] Trial 2 finished with value: 17.60476164301032 and 

--- Best Parameters for N: {'n_estimators': 3900, 'max_depth': 3, 'learning_rate': 0.028114041928115972, 'subsample': 0.7395193290934324, 'colsample_bytree': 0.9693725373080471, 'colsample_bylevel': 0.795919326649101, 'reg_alpha': 8.960474194522618e-07, 'reg_lambda': 8.332665012298388e-05, 'min_child_weight': 7, 'gamma': 0.029029635533696724} ---
--- Best CV RMSE for N: 15.1977 ---
--- 2023 Validation RMSE for N: 7.9840 ---

===== Finding Best Parameters for branch_ID: O =====
--- Starting Optuna Hyperparameter Tuning for O ---


[I 2025-06-27 06:16:54,501] Trial 0 finished with value: 16.38361062861271 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 16.38361062861271.
[I 2025-06-27 06:17:12,080] Trial 1 finished with value: 18.377990023204 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 16.38361062861271.
[I 2025-06-27 06:17:37,743] Trial 2 finished with value: 12.27528985121093 and parame

--- Best Parameters for O: {'n_estimators': 3800, 'max_depth': 3, 'learning_rate': 0.052102913140168655, 'subsample': 0.9074593344736567, 'colsample_bytree': 0.8660778775828336, 'colsample_bylevel': 0.9693034373984352, 'reg_alpha': 0.27416835349449337, 'reg_lambda': 2.209211574994819e-06, 'min_child_weight': 2, 'gamma': 1.069818645585572e-06} ---
--- Best CV RMSE for O: 11.2690 ---
--- 2023 Validation RMSE for O: 8.8434 ---

===== Finding Best Parameters for branch_ID: P =====
--- Starting Optuna Hyperparameter Tuning for P ---


[I 2025-06-27 06:44:26,770] Trial 0 finished with value: 24.421232534559103 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 24.421232534559103.
[I 2025-06-27 06:44:45,939] Trial 1 finished with value: 26.262951399203786 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 24.421232534559103.
[I 2025-06-27 06:45:13,096] Trial 2 finished with value: 17.347704063894092 and

--- Best Parameters for P: {'n_estimators': 3400, 'max_depth': 3, 'learning_rate': 0.019519660027055886, 'subsample': 0.9063663521876025, 'colsample_bytree': 0.9873969669335585, 'colsample_bylevel': 0.9341953878249465, 'reg_alpha': 5.45699557942283e-06, 'reg_lambda': 1.157003013524199e-08, 'min_child_weight': 6, 'gamma': 0.0009384893834526256} ---
--- Best CV RMSE for P: 16.1674 ---
--- 2023 Validation RMSE for P: 9.1248 ---

===== Finding Best Parameters for branch_ID: Q =====
--- Starting Optuna Hyperparameter Tuning for Q ---


[I 2025-06-27 07:13:52,899] Trial 0 finished with value: 19.168878514425998 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 19.168878514425998.
[I 2025-06-27 07:14:11,915] Trial 1 finished with value: 21.048085529184135 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 19.168878514425998.
[I 2025-06-27 07:14:39,708] Trial 2 finished with value: 18.146006577425595 and

--- Best Parameters for Q: {'n_estimators': 1200, 'max_depth': 3, 'learning_rate': 0.07747417910353468, 'subsample': 0.8541907736207547, 'colsample_bytree': 0.907049346266474, 'colsample_bylevel': 0.9804676444721017, 'reg_alpha': 6.790728886861827e-05, 'reg_lambda': 0.4100993444802534, 'min_child_weight': 7, 'gamma': 0.003965678919111874} ---
--- Best CV RMSE for Q: 16.2956 ---
--- 2023 Validation RMSE for Q: 8.1113 ---

===== Finding Best Parameters for branch_ID: R =====
--- Starting Optuna Hyperparameter Tuning for R ---


[I 2025-06-27 07:31:46,586] Trial 0 finished with value: 3.703599949623771 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 3.703599949623771.
[I 2025-06-27 07:32:02,147] Trial 1 finished with value: 3.986812761851065 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 3.703599949623771.
[I 2025-06-27 07:32:28,559] Trial 2 finished with value: 3.439624973631466 and para

--- Best Parameters for R: {'n_estimators': 3300, 'max_depth': 3, 'learning_rate': 0.011922199456943734, 'subsample': 0.7860370521127933, 'colsample_bytree': 0.8611611074443941, 'colsample_bylevel': 0.8830669585904953, 'reg_alpha': 6.8949853953911e-06, 'reg_lambda': 0.005354198289758076, 'min_child_weight': 1, 'gamma': 1.2336611145533483e-06} ---
--- Best CV RMSE for R: 3.1709 ---
--- 2023 Validation RMSE for R: 1.7249 ---

===== Finding Best Parameters for branch_ID: S =====
--- Starting Optuna Hyperparameter Tuning for S ---


[I 2025-06-27 07:58:52,597] Trial 0 finished with value: 5.212966968441124 and parameters: {'n_estimators': 2200, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.7468055921327309, 'colsample_bylevel': 0.7467983561008608, 'reg_alpha': 2.9152036385288193e-08, 'reg_lambda': 0.08499808989182997, 'min_child_weight': 5, 'gamma': 0.0028276752940872697}. Best is trial 0 with value: 5.212966968441124.
[I 2025-06-27 07:59:07,677] Trial 1 finished with value: 5.493803325225851 and parameters: {'n_estimators': 500, 'max_depth': 12, 'learning_rate': 0.16967533607196555, 'subsample': 0.7637017332034828, 'colsample_bytree': 0.7545474901621302, 'colsample_bylevel': 0.7550213529560301, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995, 'min_child_weight': 4, 'gamma': 1.7464832096591078e-06}. Best is trial 0 with value: 5.212966968441124.
[I 2025-06-27 07:59:34,959] Trial 2 finished with value: 5.148823072105334 and para

--- Best Parameters for S: {'n_estimators': 1600, 'max_depth': 3, 'learning_rate': 0.02005428294764937, 'subsample': 0.8344442903941833, 'colsample_bytree': 0.8728611141795591, 'colsample_bylevel': 0.8288423633724422, 'reg_alpha': 0.000681190207353296, 'reg_lambda': 6.206285337375777e-06, 'min_child_weight': 6, 'gamma': 0.00035123732525810215} ---
--- Best CV RMSE for S: 5.0586 ---
--- 2023 Validation RMSE for S: 6.9625 ---

Best parameters saved for 19 branches

===== Training Final Model for branch_ID: A =====
--- Final model training completed for A ---

===== Training Final Model for branch_ID: B =====
--- Final model training completed for B ---

===== Training Final Model for branch_ID: C =====
--- Final model training completed for C ---

===== Training Final Model for branch_ID: D =====
--- Final model training completed for D ---

===== Training Final Model for branch_ID: E =====
--- Final model training completed for E ---

===== Training Final Model for branch_ID: F =====
--

  for branch_id, group_data in data_pred_sorted.groupby('branch_ID'):


--- Predicting for branch_ID: C ---
--- Predicting for branch_ID: D ---
--- Predicting for branch_ID: E ---
--- Predicting for branch_ID: F ---
--- Predicting for branch_ID: G ---
--- Predicting for branch_ID: H ---
--- Predicting for branch_ID: I ---
--- Predicting for branch_ID: J ---
--- Predicting for branch_ID: K ---
--- Predicting for branch_ID: L ---
--- Predicting for branch_ID: M ---
--- Predicting for branch_ID: N ---
--- Predicting for branch_ID: O ---
--- Predicting for branch_ID: P ---
--- Predicting for branch_ID: Q ---
--- Predicting for branch_ID: R ---
--- Predicting for branch_ID: S ---

STEP 2: Training Final Models with 2021-2023 data using best parameters

===== Training Final Model for branch_ID: A =====
--- Final model training completed for A ---

===== Training Final Model for branch_ID: B =====
--- Final model training completed for B ---

===== Training Final Model for branch_ID: C =====
--- Final model training completed for C ---

===== Training Final Model

### test data 파생변수 처리

In [63]:
data1 = pd.read_csv("train_temporary_dtw.csv")
data2 = pd.read_csv('test_temporary_dtw.csv')
datafull = pd.concat([data1,data2])

In [64]:
data_test = datafull
# 요일 추출 (0=월요일, 6=일요일)
data_test['TM'] = pd.to_datetime(data_test['TM'], format='%Y%m%d%H')
data_test['year'] = data_test['TM'].dt.year
data_test['month'] = data_test['TM'].dt.month
data_test['day'] = data_test['TM'].dt.day
data_test['hour'] = data_test['TM'].dt.hour

data_test['weekday'] = data_test['TM'].dt.weekday

# 평일/주말 구분 (0=평일, 1=주말)
data_test['is_weekend'] = (data_test['weekday'] >= 5).astype(int)

# 한국 공휴일 설정
korea_holidays = holidays.Korea(years=range(2021, 2025))

# 공휴일 여부 (1=공휴일, 0=평일)
data_test['is_holiday'] = data_test['TM'].dt.date.apply(lambda x: x in korea_holidays).astype(int)

data_test.drop("weekday",axis=1, inplace=True)
data_test.head()

Unnamed: 0,TM,branch_ID,TA,WD,WS,RN_DAY,RN_HR1,HM,SI,ta_chi,heat_demand,year,month,day,hour,is_weekend,is_holiday
0,2021-01-01 01:00:00,A,-10.1,78.3,0.5,0.0,0.0,68.2,0.0,-8.2,281.0,2021,1,1,1,0,1
1,2021-01-01 02:00:00,A,-10.2,71.9,0.6,0.0,0.0,69.9,0.0,-8.6,262.0,2021,1,1,2,0,1
2,2021-01-01 03:00:00,A,-10.0,360.0,0.0,0.0,0.0,69.2,0.0,-8.8,266.0,2021,1,1,3,0,1
3,2021-01-01 04:00:00,A,-9.3,155.9,0.5,0.0,0.0,65.0,0.0,-8.9,285.0,2021,1,1,4,0,1
4,2021-01-01 05:00:00,A,-9.0,74.3,1.9,0.0,0.0,63.5,0.0,-9.2,283.0,2021,1,1,5,0,1


In [65]:
#Hour
data_test = add_fourier_features(data_test, col='hour', period=24, order=2, prefix='hour')

# Month
data_test = add_fourier_features(data_test, col='month', period=12, order=2, prefix='month')

data_test['days_in_month'] = data_test['TM'].dt.days_in_month

data_test = fourier_dom(data_test, order=2, prefix='day')

In [66]:
data_test['DI'] = calculate_DI(data_test['TA'], data_test['HM'])
data_test['Discomfort_level'] = data_test['DI'].apply(DI_level)

In [67]:
data_test = create_HDD_features(
    data_test,            # 원본 DataFrame
    branch_col="branch_ID",
    temp_col="TA",
    time_col="TM",
    base_temp=18,
    windows=(7, 30)  # 7일 및 30일 누적 HDD
)

In [68]:
# grouped by 'branch_ID'
for col, lags in lag_dict.items():
    for lag in lags:
        data_test[f"{col}_lag_{lag}"] = (
            data_test
            .groupby("branch_ID")[col]          # 그룹별 시계열
            .transform(lambda s: s.shift(lag))  # 그룹 내부에서만 shift
        )

In [69]:
data_test

Unnamed: 0,TM,branch_ID,TA,WD,WS,RN_DAY,RN_HR1,HM,SI,ta_chi,...,TA_lag_3,TA_lag_6,TA_lag_12,TA_lag_24,HM_lag_1,HM_lag_2,HM_lag_3,WS_lag_1,WS_lag_3,SI_lag_1
0,2021-01-01 01:00:00,A,-10.1,78.3,0.5,0.0,0.0,68.2,0.0,-8.2,...,,,,,,,,,,
1,2021-01-01 02:00:00,A,-10.2,71.9,0.6,0.0,0.0,69.9,0.0,-8.6,...,,,,,68.2,,,0.5,,0.0
2,2021-01-01 03:00:00,A,-10.0,360.0,0.0,0.0,0.0,69.2,0.0,-8.8,...,,,,,69.9,68.2,,0.6,,0.0
3,2021-01-01 04:00:00,A,-9.3,155.9,0.5,0.0,0.0,65.0,0.0,-8.9,...,-10.1,,,,69.2,69.9,68.2,0.0,0.5,0.0
4,2021-01-01 05:00:00,A,-9.0,74.3,1.9,0.0,0.0,63.5,0.0,-9.2,...,-10.2,,,,65.0,69.2,69.9,0.5,0.6,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166910,2024-12-31 20:00:00,S,-1.1,360.0,0.0,0.0,0.0,45.8,0.0,-1.7,...,1.1,3.3,-2.0,5.6,48.4,46.3,40.8,0.0,0.0,0.0
166911,2024-12-31 21:00:00,S,-1.3,360.0,0.0,0.0,0.0,48.3,0.0,-2.3,...,-0.3,2.5,-0.8,4.8,45.8,48.4,46.3,0.0,0.0,0.0
166912,2024-12-31 22:00:00,S,-2.4,360.0,0.0,0.0,0.0,60.0,0.0,-3.1,...,-1.0,2.1,0.4,4.7,48.3,45.8,48.4,0.0,0.0,0.0
166913,2024-12-31 23:00:00,S,-3.6,360.0,0.0,0.0,0.0,65.7,0.0,-3.9,...,-1.1,1.1,1.7,4.2,60.0,48.3,45.8,0.0,0.0,0.0


In [None]:
# 타겟 변수와 피처 분리
target_col = 'heat_demand_cycle'
exclude_cols = ['TM', target_col, 'year','month', 'day','hour','Unnamed: 0','branch_ID',"WD","RN_DAY",'days_in_month', 'heat_demand_trend','heat_demand']
feature_cols = [col for col in data.columns if col not in exclude_cols]

print(f"\nTarget 변수: {target_col}")
print(f"사용될 Feature 개수: {len(feature_cols)}")

final_models = train_final_models_with_best_params(
    data         = train_data,  # <─ 2023 제외
    feature_cols = feature_cols,
    target_col   = target_col,
    best_params_dict = best_params_dict
)

# 3) 2023 데이터 예측
pred_2023 = predict_with_final_models(
    data_pred    = val_data,  # <─ 2023만 전달
    models       = final_models,
    feature_cols = feature_cols
)

# 4) 예측 결과 확인
print(pred_2023.head())

final = pd.read_csv("2023_trend.csv")
pred_trend = final['prediction']

pred = pred_2023['prediction'].values + pred_trend
rmse = np.sqrt(mean_squared_error(val_data['heat_demand'], pred))

print(rmse)

In [70]:
data_test = data_test[data_test['year'].isin([2024,2025])]
data_test

Unnamed: 0,TM,branch_ID,TA,WD,WS,RN_DAY,RN_HR1,HM,SI,ta_chi,...,TA_lag_3,TA_lag_6,TA_lag_12,TA_lag_24,HM_lag_1,HM_lag_2,HM_lag_3,WS_lag_1,WS_lag_3,SI_lag_1
0,2024-01-01 00:00:00,A,0.5,171.3,0.8,2.5,0.0,97.1,0.0,0.3,...,0.0,1.4,1.6,1.9,97.0,96.7,96.3,0.8,0.4,0.0
1,2024-01-01 01:00:00,A,0.4,93.7,1.0,0.0,0.0,96.8,0.0,0.1,...,0.0,1.1,1.9,2.1,97.1,97.0,96.7,0.8,0.4,0.0
2,2024-01-01 02:00:00,A,-0.1,133.0,0.8,0.0,0.0,97.0,0.0,0.0,...,-0.1,0.8,2.4,2.4,96.8,97.1,97.0,1.0,0.8,0.0
3,2024-01-01 03:00:00,A,-0.8,218.6,0.6,0.0,0.0,96.9,0.0,-0.2,...,0.5,0.0,3.7,2.3,97.0,96.8,97.1,0.8,0.8,0.0
4,2024-01-01 04:00:00,A,0.1,58.7,1.5,0.0,0.0,97.0,0.0,-0.1,...,0.4,0.0,4.1,2.2,96.9,97.0,96.8,0.6,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166910,2024-12-31 20:00:00,S,-1.1,360.0,0.0,0.0,0.0,45.8,0.0,-1.7,...,1.1,3.3,-2.0,5.6,48.4,46.3,40.8,0.0,0.0,0.0
166911,2024-12-31 21:00:00,S,-1.3,360.0,0.0,0.0,0.0,48.3,0.0,-2.3,...,-0.3,2.5,-0.8,4.8,45.8,48.4,46.3,0.0,0.0,0.0
166912,2024-12-31 22:00:00,S,-2.4,360.0,0.0,0.0,0.0,60.0,0.0,-3.1,...,-1.0,2.1,0.4,4.7,48.3,45.8,48.4,0.0,0.0,0.0
166913,2024-12-31 23:00:00,S,-3.6,360.0,0.0,0.0,0.0,65.7,0.0,-3.9,...,-1.1,1.1,1.7,4.2,60.0,48.3,45.8,0.0,0.0,0.0


In [71]:
# 타겟 변수와 피처 분리
target_col = 'heat_demand_trend'
exclude_cols = ['TM', target_col, 'year','month', 'day','hour','Unnamed: 0','branch_ID',"WD","RN_DAY",'days_in_month', 'heat_demand_cycle','heat_demand']
feature_cols = [col for col in data.columns if col not in exclude_cols]

In [72]:
data_test['branch_ID'] = data_test['branch_ID'].astype('category')

print(f"\nTarget 변수: {target_col}")
print(f"사용될 Feature 개수: {len(feature_cols)}")


Target 변수: heat_demand_trend
사용될 Feature 개수: 36


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test['branch_ID'] = data_test['branch_ID'].astype('category')


In [75]:
# 3단계: 2024년 데이터로 최종 예측
print("\n" + "=" * 60)
print("STEP 3: Making final predictions on 2024 test data")
print("=" * 60)

# 2024년 테스트 데이터 준비 (이미 전처리된 data_test 사용)
test_data_2024 = data_test.copy()
test_data_for_pred = test_data_2024.drop(columns=[target_col], errors='ignore')

# 최종 예측 수행
final_predictions = predict_with_final_models(test_data_for_pred, final_models, feature_cols)

# 예측 결과 저장
prediction_results = np.round(final_predictions['prediction'], 1)
pred_df = pd.DataFrame({'pred': final_predictions['prediction']})
pred_df.to_csv('2024_pred_trend.csv', index=False)



STEP 3: Making final predictions on 2024 test data
--- Predicting for branch_ID: A ---
--- Predicting for branch_ID: B ---


  for branch_id, group_data in data_pred_sorted.groupby('branch_ID'):


--- Predicting for branch_ID: C ---
--- Predicting for branch_ID: D ---
--- Predicting for branch_ID: E ---
--- Predicting for branch_ID: F ---
--- Predicting for branch_ID: G ---
--- Predicting for branch_ID: H ---
--- Predicting for branch_ID: I ---
--- Predicting for branch_ID: J ---
--- Predicting for branch_ID: K ---
--- Predicting for branch_ID: L ---
--- Predicting for branch_ID: M ---
--- Predicting for branch_ID: N ---
--- Predicting for branch_ID: O ---
--- Predicting for branch_ID: P ---
--- Predicting for branch_ID: Q ---
--- Predicting for branch_ID: R ---
--- Predicting for branch_ID: S ---
