In [1]:
# Data Wrangling
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

# Visualization
import matplotlib.pylab as plt
from matplotlib import font_manager, rc
import seaborn as sns

pd.set_option('max_columns', 20, 'max_rows', 20)
%matplotlib inline

# OS
import os
import time
import random
import warnings; warnings.filterwarnings("ignore")
import pickle
from tqdm import tqdm
import platform
from itertools import combinations
from scipy.stats.mstats import gmean

# EDA
import klib

# Preprocessing & Feature Engineering
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import PowerTransformer
from sklearn.feature_selection import SelectPercentile
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.feature_selection import SelectKBest

# Hyperparameter Optimization
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from bayes_opt import BayesianOptimization

# Modeling
from catboost import CatBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.linear_model import ARDRegression
from sklearn.linear_model import BayesianRidge
from sklearn.dummy import DummyRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import VotingRegressor
from vecstack import StackingTransformer
from vecstack import stacking
from itertools import combinations


# Evaluation
from sklearn.metrics import mean_squared_error 
from sklearn.model_selection import cross_val_score
seed = 42

kfold = KFold(n_splits=5, shuffle=True, random_state=seed)

# Utility
import os
import time
import random
import warnings; warnings.filterwarnings("ignore")
from IPython.display import Image
import pickle
from tqdm import tqdm
import platform
from itertools import combinations
from scipy.stats.mstats import gmean
import datetime
import joblib

# 1.Data Load

In [2]:
feature = pd.read_csv('../Data/Feature.csv')
train_label = pd.read_csv('../Data/train_label.csv')
submission = pd.read_csv("../Data/sample_submission.csv")
test = pd.read_csv("../Data/stk_hld_test.csv")

In [3]:
display(feature.head())
print(feature.shape)

Unnamed: 0,hist_d,stk_dit_cd,tot_aet_amt,stk_par_pr,stk_p,byn_dt_last,hist/tot_h,hist/tot_l,byn_last_h,byn_last_l,...,계좌별_매수_평균_주당금액,계좌별_최대보유기간,계좌별_평균보유기간,계좌별_최고_일매수매도_수량,종목별_매수_최소_주당금액,종목별_매수_최대_주당금액,종목별_평균보유기간,pca_1,pca_2,pca_5
0,5.0,1.0,11782000.0,5000.0,43000.0,5.0,0.00137,0.002741,0.00137,0.002741,...,77150.068027,434.0,52.291139,6000.0,15050.0,54500.0,86.673759,0.493103,1.169701,-0.613599
1,40.0,1.0,4990000.0,5000.0,2495000.0,40.0,0.010962,0.02193,0.010962,0.02193,...,77150.068027,434.0,52.291139,6000.0,37450.0,2861000.0,160.183769,0.493103,1.169701,-0.613599
2,2.0,99.0,14619600.0,1000.0,18600.0,2.0,0.000548,0.001096,0.000548,0.001096,...,77150.068027,434.0,52.291139,6000.0,2745.0,28350.0,64.793496,0.56732,1.005716,0.356687
3,11.0,1.0,462000.0,500.0,7700.0,11.0,0.003015,0.006031,0.003015,0.006031,...,77150.068027,434.0,52.291139,6000.0,3695.0,12800.0,63.845113,0.56732,1.005716,0.356687
4,162.0,99.0,8125200.0,5000.0,44400.0,76.0,0.044396,0.088816,0.020828,0.041667,...,77150.068027,434.0,52.291139,6000.0,18700.0,50100.0,70.68595,0.853838,0.194931,-0.558155


(752068, 27)


In [4]:
display(train_label.head())
print(train_label.shape)

Unnamed: 0,hold_d
0,11
1,80
2,5
3,22
4,324


(681472, 1)


In [5]:
display(submission.head())
print(submission.shape)

Unnamed: 0,submit_id,hold_d
0,IDX00001,0
1,IDX00002,0
2,IDX00003,0
3,IDX00004,0
4,IDX00005,0


(70596, 2)


# 2.Data Merge & Split

In [6]:
train_data = feature.iloc[:681472,:]
test_data = feature.iloc[681472:,:]

In [7]:
train_data.reset_index(drop = True, inplace=True)
train_label.reset_index(drop = True, inplace=True)

In [8]:
display(train_data.head())
print(train_data.shape)

Unnamed: 0,hist_d,stk_dit_cd,tot_aet_amt,stk_par_pr,stk_p,byn_dt_last,hist/tot_h,hist/tot_l,byn_last_h,byn_last_l,...,계좌별_매수_평균_주당금액,계좌별_최대보유기간,계좌별_평균보유기간,계좌별_최고_일매수매도_수량,종목별_매수_최소_주당금액,종목별_매수_최대_주당금액,종목별_평균보유기간,pca_1,pca_2,pca_5
0,5.0,1.0,11782000.0,5000.0,43000.0,5.0,0.00137,0.002741,0.00137,0.002741,...,77150.068027,434.0,52.291139,6000.0,15050.0,54500.0,86.673759,0.493103,1.169701,-0.613599
1,40.0,1.0,4990000.0,5000.0,2495000.0,40.0,0.010962,0.02193,0.010962,0.02193,...,77150.068027,434.0,52.291139,6000.0,37450.0,2861000.0,160.183769,0.493103,1.169701,-0.613599
2,2.0,99.0,14619600.0,1000.0,18600.0,2.0,0.000548,0.001096,0.000548,0.001096,...,77150.068027,434.0,52.291139,6000.0,2745.0,28350.0,64.793496,0.56732,1.005716,0.356687
3,11.0,1.0,462000.0,500.0,7700.0,11.0,0.003015,0.006031,0.003015,0.006031,...,77150.068027,434.0,52.291139,6000.0,3695.0,12800.0,63.845113,0.56732,1.005716,0.356687
4,162.0,99.0,8125200.0,5000.0,44400.0,76.0,0.044396,0.088816,0.020828,0.041667,...,77150.068027,434.0,52.291139,6000.0,18700.0,50100.0,70.68595,0.853838,0.194931,-0.558155


(681472, 27)


In [9]:
display(test_data.head())
print(test_data.shape)

Unnamed: 0,hist_d,stk_dit_cd,tot_aet_amt,stk_par_pr,stk_p,byn_dt_last,hist/tot_h,hist/tot_l,byn_last_h,byn_last_l,...,계좌별_매수_평균_주당금액,계좌별_최대보유기간,계좌별_평균보유기간,계좌별_최고_일매수매도_수량,종목별_매수_최소_주당금액,종목별_매수_최대_주당금액,종목별_평균보유기간,pca_1,pca_2,pca_5
681472,153.0,1.0,3945000.0,5000.0,13150.0,153.0,0.041929,0.083882,0.041929,0.083882,...,77150.068027,434.0,52.291139,6000.0,9140.0,18300.0,93.801282,0.493103,1.169701,-0.613599
681473,335.0,99.0,2524500.0,500.0,12750.0,86.0,0.091806,0.183662,0.023568,0.047149,...,77150.068027,434.0,52.291139,6000.0,310.0,13200.0,46.6875,0.853838,0.194931,-0.558155
681474,139.0,99.0,4291800.0,500.0,31100.0,139.0,0.038093,0.076206,0.038093,0.076206,...,77150.068027,434.0,52.291139,6000.0,13850.0,51600.0,34.410714,0.56732,1.005716,0.356687
681475,236.0,99.0,6716720.0,500.0,2260.0,236.0,0.064675,0.129386,0.064675,0.129386,...,77150.068027,434.0,52.291139,6000.0,1010.0,8750.0,52.367816,0.853838,0.194931,-0.558155
681476,9.0,2.0,464500.0,100.0,46450.0,9.0,0.002466,0.004934,0.002466,0.004934,...,77150.068027,434.0,52.291139,6000.0,100.0,81100.0,19.496084,0.493103,1.169701,-0.613599


(70596, 27)


In [10]:
train_data.columns

Index(['hist_d', 'stk_dit_cd', 'tot_aet_amt', 'stk_par_pr', 'stk_p',
       'byn_dt_last', 'hist/tot_h', 'hist/tot_l', 'byn_last_h', 'byn_last_l',
       'bnc_qty_s', 'tot_aet_amt_s', 'profit/tot_bnc_qty',
       'bnc_qty/tot_bnc_qty', 'aet_amt/tot_aet', '계좌별_평균_일매수_수량',
       '계좌별_매수_최소_주당금액', '계좌별_매수_평균_주당금액', '계좌별_최대보유기간', '계좌별_평균보유기간',
       '계좌별_최고_일매수매도_수량', '종목별_매수_최소_주당금액', '종목별_매수_최대_주당금액', '종목별_평균보유기간',
       'pca_1', 'pca_2', 'pca_5'],
      dtype='object')

In [11]:
test_data.columns

Index(['hist_d', 'stk_dit_cd', 'tot_aet_amt', 'stk_par_pr', 'stk_p',
       'byn_dt_last', 'hist/tot_h', 'hist/tot_l', 'byn_last_h', 'byn_last_l',
       'bnc_qty_s', 'tot_aet_amt_s', 'profit/tot_bnc_qty',
       'bnc_qty/tot_bnc_qty', 'aet_amt/tot_aet', '계좌별_평균_일매수_수량',
       '계좌별_매수_최소_주당금액', '계좌별_매수_평균_주당금액', '계좌별_최대보유기간', '계좌별_평균보유기간',
       '계좌별_최고_일매수매도_수량', '종목별_매수_최소_주당금액', '종목별_매수_최대_주당금액', '종목별_평균보유기간',
       'pca_1', 'pca_2', 'pca_5'],
      dtype='object')

## 3. Build Model 

In [12]:
# 단일모델 정의
knn_reg = KNeighborsRegressor(n_jobs=-1)
extra_reg = ExtraTreesRegressor(random_state=seed, n_jobs=-1)
gbm_reg = GradientBoostingRegressor(random_state=seed)
xgb_reg = XGBRegressor(random_state=seed, n_jobs=-1)
lgb_reg = LGBMRegressor(random_state=seed, n_jobs=-1)
cat_reg = CatBoostRegressor(random_state=seed)

In [15]:
regs = [knn_reg, extra_reg, gbm_reg, xgb_reg, lgb_reg, cat_reg]

In [16]:
# Cross_val_score 함수정의
def get_model_cv_prediction(model, feature_data, y_target):
    neg_mse_scores = cross_val_score(model, feature_data, y_target, scoring='neg_mean_squared_error', cv=kfold, n_jobs=-1)
    rmse_scores = np.sqrt(-1 * neg_mse_scores)
    avg_rmse = np.mean(rmse_scores)
    print(f'{model.__class__.__name__} 모델의 평균 RMSE: {avg_rmse:.3f}')
    model = model.fit(feature_data, y_target)
    return model.__class__.__name__, model

In [17]:
# 단일 모델별 평균 성능 출력
models = []
for reg in regs:
    models.append(get_model_cv_prediction(reg, train_data, train_label))
    
    

KNeighborsRegressor 모델의 평균 RMSE: 56.198
ExtraTreesRegressor 모델의 평균 RMSE: 0.475
GradientBoostingRegressor 모델의 평균 RMSE: 0.564
XGBRegressor 모델의 평균 RMSE: 0.441
LGBMRegressor 모델의 평균 RMSE: 0.892
CatBoostRegressor 모델의 평균 RMSE: 0.962
Learning rate set to 0.124668
0:	learn: 46.3699857	total: 179ms	remaining: 2m 58s
1:	learn: 40.8753782	total: 209ms	remaining: 1m 44s
2:	learn: 35.9819294	total: 240ms	remaining: 1m 19s
3:	learn: 31.7195398	total: 269ms	remaining: 1m 6s
4:	learn: 27.9816276	total: 297ms	remaining: 59.2s
5:	learn: 24.6735617	total: 325ms	remaining: 53.9s
6:	learn: 21.7509119	total: 358ms	remaining: 50.8s
7:	learn: 19.2069396	total: 390ms	remaining: 48.4s
8:	learn: 16.9577129	total: 422ms	remaining: 46.4s
9:	learn: 14.9705902	total: 450ms	remaining: 44.6s
10:	learn: 13.2340630	total: 481ms	remaining: 43.2s
11:	learn: 11.7062976	total: 510ms	remaining: 42s
12:	learn: 10.3698433	total: 539ms	remaining: 40.9s
13:	learn: 9.1860709	total: 568ms	remaining: 40s
14:	learn: 8.1468072	total: 

161:	learn: 0.6982826	total: 4.67s	remaining: 24.1s
162:	learn: 0.6973453	total: 4.7s	remaining: 24.1s
163:	learn: 0.6939225	total: 4.72s	remaining: 24.1s
164:	learn: 0.6921691	total: 4.75s	remaining: 24.1s
165:	learn: 0.6901398	total: 4.78s	remaining: 24s
166:	learn: 0.6887249	total: 4.8s	remaining: 24s
167:	learn: 0.6876351	total: 4.83s	remaining: 23.9s
168:	learn: 0.6864350	total: 4.86s	remaining: 23.9s
169:	learn: 0.6853253	total: 4.88s	remaining: 23.8s
170:	learn: 0.6840386	total: 4.9s	remaining: 23.8s
171:	learn: 0.6815125	total: 4.93s	remaining: 23.7s
172:	learn: 0.6798415	total: 4.96s	remaining: 23.7s
173:	learn: 0.6779245	total: 4.99s	remaining: 23.7s
174:	learn: 0.6765925	total: 5.02s	remaining: 23.7s
175:	learn: 0.6755602	total: 5.05s	remaining: 23.7s
176:	learn: 0.6740752	total: 5.09s	remaining: 23.7s
177:	learn: 0.6723015	total: 5.11s	remaining: 23.6s
178:	learn: 0.6708235	total: 5.13s	remaining: 23.6s
179:	learn: 0.6686861	total: 5.16s	remaining: 23.5s
180:	learn: 0.66773

324:	learn: 0.5353081	total: 8.87s	remaining: 18.4s
325:	learn: 0.5350282	total: 8.9s	remaining: 18.4s
326:	learn: 0.5344584	total: 8.92s	remaining: 18.4s
327:	learn: 0.5337502	total: 8.94s	remaining: 18.3s
328:	learn: 0.5333503	total: 8.96s	remaining: 18.3s
329:	learn: 0.5327711	total: 8.99s	remaining: 18.3s
330:	learn: 0.5324624	total: 9.02s	remaining: 18.2s
331:	learn: 0.5317899	total: 9.04s	remaining: 18.2s
332:	learn: 0.5313529	total: 9.06s	remaining: 18.2s
333:	learn: 0.5311159	total: 9.09s	remaining: 18.1s
334:	learn: 0.5298304	total: 9.11s	remaining: 18.1s
335:	learn: 0.5291759	total: 9.14s	remaining: 18.1s
336:	learn: 0.5282302	total: 9.16s	remaining: 18s
337:	learn: 0.5277479	total: 9.19s	remaining: 18s
338:	learn: 0.5269950	total: 9.21s	remaining: 18s
339:	learn: 0.5258009	total: 9.24s	remaining: 17.9s
340:	learn: 0.5251330	total: 9.27s	remaining: 17.9s
341:	learn: 0.5247505	total: 9.29s	remaining: 17.9s
342:	learn: 0.5242007	total: 9.31s	remaining: 17.8s
343:	learn: 0.52365

485:	learn: 0.4802082	total: 12.9s	remaining: 13.7s
486:	learn: 0.4801499	total: 13s	remaining: 13.7s
487:	learn: 0.4800213	total: 13s	remaining: 13.6s
488:	learn: 0.4798279	total: 13s	remaining: 13.6s
489:	learn: 0.4793446	total: 13s	remaining: 13.6s
490:	learn: 0.4789506	total: 13.1s	remaining: 13.5s
491:	learn: 0.4786316	total: 13.1s	remaining: 13.5s
492:	learn: 0.4780451	total: 13.1s	remaining: 13.5s
493:	learn: 0.4778415	total: 13.1s	remaining: 13.5s
494:	learn: 0.4776376	total: 13.2s	remaining: 13.4s
495:	learn: 0.4775157	total: 13.2s	remaining: 13.4s
496:	learn: 0.4771588	total: 13.2s	remaining: 13.4s
497:	learn: 0.4770068	total: 13.2s	remaining: 13.3s
498:	learn: 0.4768402	total: 13.2s	remaining: 13.3s
499:	learn: 0.4765754	total: 13.3s	remaining: 13.3s
500:	learn: 0.4764525	total: 13.3s	remaining: 13.2s
501:	learn: 0.4763504	total: 13.3s	remaining: 13.2s
502:	learn: 0.4762382	total: 13.3s	remaining: 13.2s
503:	learn: 0.4761593	total: 13.4s	remaining: 13.2s
504:	learn: 0.475976

644:	learn: 0.4556697	total: 17s	remaining: 9.34s
645:	learn: 0.4555901	total: 17s	remaining: 9.31s
646:	learn: 0.4554228	total: 17s	remaining: 9.28s
647:	learn: 0.4552232	total: 17s	remaining: 9.26s
648:	learn: 0.4551588	total: 17.1s	remaining: 9.23s
649:	learn: 0.4550960	total: 17.1s	remaining: 9.2s
650:	learn: 0.4550341	total: 17.1s	remaining: 9.17s
651:	learn: 0.4547918	total: 17.1s	remaining: 9.14s
652:	learn: 0.4547538	total: 17.2s	remaining: 9.12s
653:	learn: 0.4546340	total: 17.2s	remaining: 9.09s
654:	learn: 0.4544695	total: 17.2s	remaining: 9.06s
655:	learn: 0.4543982	total: 17.2s	remaining: 9.04s
656:	learn: 0.4543529	total: 17.3s	remaining: 9.01s
657:	learn: 0.4542848	total: 17.3s	remaining: 8.98s
658:	learn: 0.4542486	total: 17.3s	remaining: 8.95s
659:	learn: 0.4540680	total: 17.3s	remaining: 8.92s
660:	learn: 0.4540213	total: 17.4s	remaining: 8.9s
661:	learn: 0.4538442	total: 17.4s	remaining: 8.88s
662:	learn: 0.4537878	total: 17.4s	remaining: 8.85s
663:	learn: 0.4537046	

806:	learn: 0.4425845	total: 21.1s	remaining: 5.04s
807:	learn: 0.4425329	total: 21.1s	remaining: 5.01s
808:	learn: 0.4424535	total: 21.1s	remaining: 4.99s
809:	learn: 0.4424039	total: 21.2s	remaining: 4.96s
810:	learn: 0.4423209	total: 21.2s	remaining: 4.94s
811:	learn: 0.4422694	total: 21.2s	remaining: 4.91s
812:	learn: 0.4421732	total: 21.2s	remaining: 4.88s
813:	learn: 0.4420916	total: 21.3s	remaining: 4.86s
814:	learn: 0.4420671	total: 21.3s	remaining: 4.83s
815:	learn: 0.4420180	total: 21.3s	remaining: 4.8s
816:	learn: 0.4419662	total: 21.3s	remaining: 4.78s
817:	learn: 0.4419102	total: 21.4s	remaining: 4.75s
818:	learn: 0.4418837	total: 21.4s	remaining: 4.72s
819:	learn: 0.4418300	total: 21.4s	remaining: 4.7s
820:	learn: 0.4417817	total: 21.4s	remaining: 4.67s
821:	learn: 0.4417344	total: 21.5s	remaining: 4.65s
822:	learn: 0.4416777	total: 21.5s	remaining: 4.62s
823:	learn: 0.4416092	total: 21.5s	remaining: 4.59s
824:	learn: 0.4415810	total: 21.5s	remaining: 4.57s
825:	learn: 0.

968:	learn: 0.4356581	total: 25.2s	remaining: 807ms
969:	learn: 0.4356274	total: 25.3s	remaining: 781ms
970:	learn: 0.4355681	total: 25.3s	remaining: 755ms
971:	learn: 0.4355389	total: 25.3s	remaining: 729ms
972:	learn: 0.4355150	total: 25.3s	remaining: 703ms
973:	learn: 0.4354897	total: 25.4s	remaining: 677ms
974:	learn: 0.4354687	total: 25.4s	remaining: 651ms
975:	learn: 0.4354359	total: 25.4s	remaining: 625ms
976:	learn: 0.4354148	total: 25.4s	remaining: 599ms
977:	learn: 0.4353964	total: 25.5s	remaining: 573ms
978:	learn: 0.4353753	total: 25.5s	remaining: 547ms
979:	learn: 0.4353464	total: 25.5s	remaining: 521ms
980:	learn: 0.4353068	total: 25.5s	remaining: 495ms
981:	learn: 0.4352871	total: 25.6s	remaining: 469ms
982:	learn: 0.4352591	total: 25.6s	remaining: 443ms
983:	learn: 0.4352356	total: 25.6s	remaining: 417ms
984:	learn: 0.4352017	total: 25.7s	remaining: 391ms
985:	learn: 0.4351630	total: 25.7s	remaining: 365ms
986:	learn: 0.4351330	total: 25.7s	remaining: 339ms
987:	learn: 

In [18]:
models

[('KNeighborsRegressor', KNeighborsRegressor(n_jobs=-1)),
 ('ExtraTreesRegressor', ExtraTreesRegressor(n_jobs=-1, random_state=42)),
 ('GradientBoostingRegressor', GradientBoostingRegressor(random_state=42)),
 ('XGBRegressor',
  XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
               importance_type='gain', interaction_constraints='',
               learning_rate=0.300000012, max_delta_step=0, max_depth=6,
               min_child_weight=1, missing=nan, monotone_constraints='()',
               n_estimators=100, n_jobs=-1, num_parallel_tree=1, random_state=42,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', validate_parameters=1, verbosity=None)),
 ('LGBMRegressor', LGBMRegressor(random_state=42)),
 ('CatBoostRegressor', <catboost.core.CatBoostRegressor at 0x1e5b8587a00>)]

In [19]:
joblib.dump(models,f'../Models/Base_new_feature_Final.pkl')

['../Models/Base_new_feature_Final.pkl']

In [20]:
# 모델 로드
#models = joblib.load('../Models/Base_new_feature_Final.pkl')

In [21]:
# 모델 훈련 함수 정의
def return_fitted_model(model, train, target):
    rmse_scores = []
    for iter_count, (train_idx, valid_idx) in enumerate(kfold.split(train, target)):

        X_train, X_valid = train[train_idx], train[valid_idx]
        y_train, y_valid = target[train_idx], target[valid_idx]

        model.fit(X_train, y_train)

        pred = model.predict(X_valid)
        rmse_score = np.sqrt(mean_squared_error(y_valid, pred))
        rmse_scores.append(rmse_score)
    return model, np.mean(rmse_scores)


In [22]:
selected_reg = [
    #'KNeighborsRegressor', 
    'ExtraTreesRegressor',
    'CatBoostRegressor', 
    'GradientBoostingRegressor', 
    'XGBRegressor',
    'LGBMRegressor'
]
models_for_ensemble = [(reg[0], reg[1]) for reg in models if reg[0] in selected_reg]

 - 모든 조합에 대해서 앙상블을 수행

In [36]:
start = time.time()
best_avg_score = np.inf
avg_models = []
for model_nums in range(2, len(models_for_ensemble ) + 1):
    for avg_estimator in (combinations(models_for_ensemble , model_nums)):
        avg_reg = VotingRegressor(estimators = avg_estimator, n_jobs=-1)
        avg_model, avg_score = return_fitted_model(avg_reg, train_data.values, train_label.values)
        avg_models.append(avg_model)
        print(f'{"●".join([reg_name for reg_name, _, in avg_estimator])}: {avg_score:.3f}')
        if avg_score < best_avg_score:
            best_avg_score = avg_score
            best_avg_reg = avg_reg
print(time.time() - start)

ExtraTreesRegressor●GradientBoostingRegressor: 0.476
ExtraTreesRegressor●XGBRegressor: 0.441
ExtraTreesRegressor●LGBMRegressor: 0.624
ExtraTreesRegressor●CatBoostRegressor: 0.655
GradientBoostingRegressor●XGBRegressor: 0.470
GradientBoostingRegressor●LGBMRegressor: 0.617
GradientBoostingRegressor●CatBoostRegressor: 0.644
XGBRegressor●LGBMRegressor: 0.588
XGBRegressor●CatBoostRegressor: 0.615
LGBMRegressor●CatBoostRegressor: 0.883
ExtraTreesRegressor●GradientBoostingRegressor●XGBRegressor: 0.451
ExtraTreesRegressor●GradientBoostingRegressor●LGBMRegressor: 0.542
ExtraTreesRegressor●GradientBoostingRegressor●CatBoostRegressor: 0.559
ExtraTreesRegressor●XGBRegressor●LGBMRegressor: 0.528
ExtraTreesRegressor●XGBRegressor●CatBoostRegressor: 0.544
ExtraTreesRegressor●LGBMRegressor●CatBoostRegressor: 0.702
GradientBoostingRegressor●XGBRegressor●LGBMRegressor: 0.525
GradientBoostingRegressor●XGBRegressor●CatBoostRegressor: 0.539
GradientBoostingRegressor●LGBMRegressor●CatBoostRegressor: 0.687
XG

 - 가장 좋은 성능의 모델 출력

In [24]:
best_avg_reg

VotingRegressor(estimators=(('ExtraTreesRegressor',
                             ExtraTreesRegressor(n_jobs=-1, random_state=42)),
                            ('XGBRegressor',
                             XGBRegressor(base_score=0.5, booster='gbtree',
                                          colsample_bylevel=1,
                                          colsample_bynode=1,
                                          colsample_bytree=1, gamma=0,
                                          gpu_id=-1, importance_type='gain',
                                          interaction_constraints='',
                                          learning_rate=0.300000012,
                                          max_delta_step=0, max_depth=6,
                                          min_child_weight=1, missing=nan,
                                          monotone_constraints='()',
                                          n_estimators=100, n_jobs=-1,
                                          num_pa

 - 앙상블 모델 저장

In [26]:
joblib.dump(best_avg_reg,f'../Models/Average_Ensemble_base_best_Final.pkl')

['../Models/Average_Ensemble_base_best_Final.pkl']

In [57]:
# 앙상블 모델 로드
#best_avg_reg = joblib.load('../Models/Average_Ensemble_base_best_Final.pkl')

#### 결과 추론

In [27]:
predict = best_avg_reg.predict(test_data.values)

In [28]:
submission["hold_d"] = np.round(predict)

In [29]:
submission

Unnamed: 0,submit_id,hold_d
0,IDX00001,306.0
1,IDX00002,662.0
2,IDX00003,278.0
3,IDX00004,472.0
4,IDX00005,19.0
...,...,...
70591,IDX70592,46.0
70592,IDX70593,206.0
70593,IDX70594,1095.0
70594,IDX70595,22.0


In [30]:
s = []
for i in range(len(submission["hold_d"])):
    if submission["hold_d"][i] - test['hist_d'][i] <= 146:
        if submission["hold_d"][i] - test['hist_d'][i] < 0:
            s.append(test['hist_d'][i] + 1)
        else:
            s.append(submission["hold_d"][i])
    else:
        s.append(146+test['hist_d'][i])

In [31]:
submission["hold_d"] = s
submission

Unnamed: 0,submit_id,hold_d
0,IDX00001,299.0
1,IDX00002,481.0
2,IDX00003,278.0
3,IDX00004,382.0
4,IDX00005,19.0
...,...,...
70591,IDX70592,46.0
70592,IDX70593,206.0
70593,IDX70594,1004.0
70594,IDX70595,22.0


In [32]:
submission["hold_d"].min()

2.0

### 결과물 저장

In [33]:
submission.to_csv("../Submission/Submission.csv", index = False)