# Stacking Ensenble을 활용한 자율주행 센서 안테나 성능예측
> 팀명 : 될때까지간다리
>
> 작성일 : '22.08.31
>
> 개발환경 : Jupyter Notebook

## Development Environment Setting

In [3]:
# hyper parameter tuning을 위한 패키지 설치
!pip install optuna
!pip install catboost
!pip install skranger
!pip install ngboost
!pip install lightgbm
!pip install hyperopt





In [4]:
# 기본 modules
import pandas as pd
import random
import os
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import tqdm

# 머신러닝 modules
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold

from lightgbm import LGBMRegressor
from ngboost import NGBRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import ElasticNet, LinearRegression, Lasso, Ridge
from catboost import CatBoostRegressor, Pool
from skranger.ensemble import RangerForestRegressor
from sklearn.neighbors import RadiusNeighborsRegressor
from ngboost.scores import LogScore

from hyperopt import fmin, hp, tpe
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.inspection import permutation_importance

# 모듈화된 함수 사용
import utils.preprocessing as preprocessing
import utils.utils as utils
import utils.stacking as stk

ModuleNotFoundError: No module named 'utils'

## Utils

In [None]:
# seed 고정
def seed_everything(seed): 
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

In [None]:
class Config:
    seed = 42
    epochs = 200
    cv=10
    test_size = 0.2

In [None]:
# Y_Feature별 NRMSE의 총합
def lg_nrmse(gt, preds):
    """
    @Description: Metric used in this project
    @Params1: gt, pandas dataframe
    @Param2: preds, pandas dataframe
    @Return: nrmse score
    """
    preds = pd.DataFrame(preds)
    all_nrmse = []
    for idx in range(0,14):
        rmse = mean_squared_error(gt.iloc[:,idx], preds.iloc[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt.iloc[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:15]) # Y_01 ~ Y_08 까지 20% 가중치 부여
    return score


In [None]:
# Y_Feature 개별 NRMSE 계산
def lg_individual_nrmse(gt, preds):
    """
    @Description: Metric used in this project (individual)
    @Params1: gt, pandas dataframe
    @Param2: preds, pandas dataframe
    @Return: nrmse score
    """
    rmse = mean_squared_error(gt, preds, squared=False)
    nrmse = rmse/np.mean(np.abs(gt))
    return nrmse

In [None]:
# 데이터에서 X_feature, Y_feature 구분
def dataset_split_X_y(df):    
    """
    @Description: split data into features and labels
    @Param: df, pandas dataframe with columns starting with X for features and Y for labels
    @Return: features and labels in pandas dataframes
    """
    xs = df.filter(regex='X') # Input : X Feature
    ys = df.filter(regex='Y') # Output : Y Feature
    return xs, ys

In [None]:
# DF 안의 결측치(NA)를 확인
def check_for_NAs(df, show=False):
    """
    @Description: checks for the NAs in the dataframe
    @Param1: df, pandas dataframe
    @Param2: show, boolean indicating whether NaN data are also necessary as a part of the output
    @Return: name of the columns with NaN
    """
    nan_values = df.loc[:, df.isnull().any()]
    if show:
        return df[df.isna().any(axis=1)]
    return list(nan_values.columns)

In [None]:
# DF 안의 결측치(NA)를 확인
def check_for_NAs(df, show=False):
    """
    @Description: checks for the NAs in the dataframe
    @Param1: df, pandas dataframe
    @Param2: show, boolean indicating whether NaN data are also necessary as a part of the output
    @Return: name of the columns with NaN
    """
    nan_values = df.loc[:, df.isnull().any()]
    if show:
        return df[df.isna().any(axis=1)]
    return list(nan_values.columns)

In [None]:
# 분산이 0인 feature 탐색
def zero_variance(train_x):
    """
    @Description: check for zero_variance
    @Param1: df, pandas dataframe
    @Return: names of the columns with zero variance
    """
    result = []
    for col in train_x.columns:
        if train_x[col].var() == 0:
            result.append(col)
    return result

In [None]:
# 가장 높은 상관계수값과 feature 탐색
def get_top_correlation(df, n=10):
    """
    @Description: print out top correlated features
    @Param1: df, pandas dataframe
    @Param2: n, number of lines to print 
    @Return: pandas series
    """
    pairs = set()
    for idx1 in range(0, df.shape[1]):
        for idx2 in range(0, idx1+1):
            pairs.add((df.columns[idx1], df.columns[idx2]))
    corr = df.corr().abs().unstack()
    corr = corr.drop(labels=pairs).sort_values(ascending=False)
    return corr[0:n]

In [None]:
# 정규화 진행 및 이상치 탐색
def find_outlier_zscore(data, threshold = 3):
    mean = np.mean(data)
    std = np.std(data)
    zs = [(y - mean) / std for y in data]
    masks = np.where(np.abs(zs) > threshold)
    return masks[0]

In [None]:
# histogram 시각화
def adjacent_histogram_boxplot(feature_var, figsize = (7, 5)):
    """
    @Description: plot histogram and boxplot in next to each other
    @Param1: feature_var, pandas series 
    @Param2: figsize, size of the figure 
    """
    fig, (hist_plot, box_plot) = plt.subplots(nrows=2, sharex=True, gridspec_kw={'height_ratios':(.85,.15)}, figsize=figsize)
    sns.distplot(feature_var, kde=True, ax=hist_plot, kde_kws= {"linewidth":1.5}) 
    sns.boxplot(feature_var, ax=box_plot, linewidth = 1, width = 0.5)
    hist_plot.set_ylabel('')    
    hist_plot.set_xlabel('')
    box_plot.set_xlabel('')
    hist_plot.tick_params(labelsize=8)
    box_plot.tick_params(labelsize=8)
    fig.suptitle(feature_var.name, fontsize = 10)
    hist_plot.axvline(np.mean(feature_var),color='red',linestyle='-',lw = 1.5)
    hist_plot.axvline(np.median(feature_var),color='green',linestyle='--',lw = 1.5)

In [None]:
# data 불러오기
def load_data(train, test):
    train_df = pd.read_csv(train)
    test_df = pd.read_csv(test)

    train_x, train_y = dataset_split_X_y(train_df)
    cols_with_zero_variance = zero_variance(train_x) # 분산이 0 (통과 여부)
    train_x = train_x.drop(cols_with_zero_variance, axis=1)
    
    test_df = test_df.drop(cols_with_zero_variance, axis=1)

    train_x = train_x.drop(['X_10', 'X_11'], axis = 1) # 결측치가 많음 (결측치 = 0, 공지사항)
    test_df = test_df.drop(['X_10', 'X_11'], axis = 1)

    test_df = test_df.drop('ID', axis=1) 

    return train_x, train_y, test_df

## Load Data

In [5]:
utils.seed_everything(utils.Config.seed)

train_df = pd.read_csv('Data/train.csv')
test_x = pd.read_csv('Data/test.csv')
train_x, train_y = preprocessing.dataset_split_X_y(train_df)

cols_with_zero_variance = preprocessing.zero_variance(train_x) # 분산이 0 (통과 여부)
train_x = train_x.drop(cols_with_zero_variance, axis = 1)
test_x = test_x.drop(cols_with_zero_variance, axis = 1)

train_x = train_x.drop(['X_10', 'X_11'], axis = 1) # 결측치가 많음 http://localhost:8888/notebooks/20220801%20LG%20AI%20Research%20%EC%9E%90%EC%9C%A8%EC%A3%BC%ED%96%89%20%EC%84%BC%EC%84%9C%EC%9D%98%20%EC%95%88%ED%85%8C%EB%82%98%20%EC%84%B1%EB%8A%A5%20%EC%98%88%EC%B8%A1%20AI%20%EA%B2%BD%EC%A7%84%EB%8C%80%ED%9A%8C/Model_Submit/Submit1_Stacking%20ensemble_Full_Version.ipynb#(결측치 = 0, 공지사항)
test_x = test_x.drop(['X_10', 'X_11'], axis = 1)

test_x = test_x.drop('ID', axis=1)

NameError: name 'utils' is not defined

## Model
- 모델별 개별학습(타겟 Y_01~Y_14) 반복

### LGBM

In [None]:
# Parameter Setting
def lgbm_objective(params):
    params = {
        'n_estimators': int(params['n_estimators']),
        'max_depth': int(params['max_depth']),
        'num_leaves': int(params['num_leaves']),
        'min_child_samples': int(params['min_child_samples']),
        'colsample_bytree': '{:.5f}'.format(params['colsample_bytree']),
        'subsample': '{:.5f}'.format(params['subsample']),
        'min_split_gain': '{:.5f}'.format(params['min_split_gain']),
        'scale_pos_weight': '{:.5f}'.format(params['scale_pos_weight']),
        'reg_alpha': '{:.5f}'.format(params['reg_alpha']),
        'reg_lambda': '{:.5f}'.format(params['reg_lambda']),
        'learning_rate': '{:.5f}'.format(params['learning_rate']),   
    }

    model = LGBMRegressor(
        n_jobs = -1,
        random_state = 1,
        verbose = 100,
        **params
    )

    losses = np.sqrt(-cross_val_score(model, train_x, train_y['Y_01'], cv=Config.cv, scoring='neg_mean_squared_error')) # cross_val_score : 교차검증
    losses = losses / np.mean(np.abs(train_y['Y_01']))
    print("NRMSE Loss {:.5f} params {}".format(losses.mean(), params))
    return losses.mean()

In [None]:
# Parameter Tunning
space_lgbm = {
    'n_estimators' : hp.quniform('n_estimators', 100, 1500, 1),
    'max_depth': hp.quniform('max_depth', 5, 250, 1),
    'num_leaves': hp.quniform('num_leaves', 20, 200, 5),
    'min_child_samples': hp.quniform('min_child_samples', 10, 150, 5),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1.0),
    'subsample': hp.uniform('subsample', 0.3, 1.0),
    'min_split_gain': hp.uniform('min_split_gain', 0, 0.7),
    'scale_pos_weight': hp.uniform('scale_pos_weight', 1, 10),
    'reg_alpha': hp.uniform('reg_alpha', 0, 500),
    'reg_lambda': hp.uniform('reg_lambda', 0, 500),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.5)),
}

best = fmin(fn = lgbm_objective,
            space = space_lgbm,
            algo = tpe.suggest,
            verbose = 10,
            max_evals = 200)

print(best)
best['n_estimators'] = int(best['n_estimators'])
best['num_leaves'] = int(best['num_leaves'])
best['max_depth'] = int(best['max_depth'])
best['min_child_samples'] = int(best['min_child_samples'])

## Catboost Regressor

In [None]:
# Parameter Setting
def cat_objective(params):
    params = {
        'n_estimators': int(params['n_estimators']),
        'depth': int(params['depth']),
        'learning_rate': params['learning_rate'],   
        'l2_leaf_reg': params['l2_leaf_reg'],
        'max_bin': int(params['max_bin']),
        'min_data_in_leaf': int(params['min_data_in_leaf']),
        'random_strength': params['random_strength'],
        'fold_len_multiplier': params['fold_len_multiplier'],
        
    }

    model = CatBoostRegressor(
        logging_level='Silent',
        **params
    )

    losses = np.sqrt(-cross_val_score(model, train_x, train_y['Y_01'], cv=Config.cv, scoring='neg_mean_squared_error'))
    losses = losses / np.mean(np.abs(train_y['Y_01']))
    print("NRMSE Loss {:.5f} params {}".format(losses.mean(), params))
    return losses.mean()

In [None]:
# Parameter Tunning
space_catboost = {
    'n_estimators' : hp.quniform('n_estimators', 100, 300, 50),
    'depth': hp.quniform("depth", 2, 16, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'l2_leaf_reg': hp.uniform('l2_leaf_reg', 3, 8),
    'max_bin' : hp.quniform('max_bin', 1, 254, 1),
    'min_data_in_leaf' : hp.quniform('min_data_in_leaf', 2, 700, 1),
    'random_strength' : hp.loguniform('random_strength', np.log(0.005), np.log(5)),
    'fold_len_multiplier' : hp.loguniform('fold_len_multiplier', np.log(1.01), np.log(2.5)),
}

best = fmin(fn = cat_objective,
            space = space_catboost,
            algo = tpe.suggest,
            verbose = 1,
            max_evals = 200)

print(best)

## Extra Tree Regressor

In [None]:
# Parameter Setting
def extra_objective(params):
    params = {
        'n_estimators': int(params['n_estimators']),
        'max_depth': int(params['max_depth']),
        'min_samples_split': int(params['min_samples_split']),
        'min_samples_leaf': int(params['min_samples_leaf']),
        'min_weight_fraction_leaf': params['min_weight_fraction_leaf'],
        'max_features': params['max_features'],
        'max_leaf_nodes': int(params['max_leaf_nodes']),
        'min_impurity_decrease': params['min_impurity_decrease'],
        'bootstrap': params['bootstrap'],
        'ccp_alpha': params['ccp_alpha'],  
    }

    model = ExtraTreesRegressor(
        n_jobs = -1,
        verbose = 0,
        random_state = 1,
        **params
    )

    losses = np.sqrt(-cross_val_score(model, train_x, train_y['Y_01'], cv=Config.cv, scoring='neg_mean_squared_error'))
    losses = losses / np.mean(np.abs(train_y['Y_01']))
    print("NRMSE Loss {:.5f} params {}".format(losses.mean(), params))
    return losses.mean()

In [None]:
# Parameter Tunning
space_extra = {
    'n_estimators' : hp.quniform('n_estimators', 100, 1500, 50),
    'max_depth': hp.quniform('max_depth', 3, 50, 1),
    'min_samples_split': hp.quniform('min_samples_split', 5, 50, 5),
    'min_samples_leaf': hp.quniform('min_samples_leaf', 5, 50, 1),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0.01, 0.5),
    'max_features': hp.choice('max_features', ['sqrt', 'log2', None, 'auto']),
    'max_leaf_nodes': hp.quniform('max_leaf_nodes', 3, 30, 1),
    'min_impurity_decrease': hp.uniform('min_impurity_decrease', 0, 200),
    'bootstrap':  hp.choice('bootstrap', [True, False]),
    'ccp_alpha': hp.uniform('ccp_alpha', 0.01, 1.0),
}

best = fmin(fn = extra_objective,
            space = space_extra,
            algo = tpe.suggest,
            verbose = 1,
            max_evals = 2)

best['n_estimators'] = int(best['n_estimators'])
best['max_depth'] = int(best['max_depth'])
best['max_leaf_nodes'] = int(best['max_leaf_nodes'])

## NGBR

In [None]:
# Parameter Setting
def ngbr_objective(params):
    params = {
        'n_estimators': int(params['n_estimators']),
        'learning_rate': params['learning_rate'],
        'natural_gradient': params['natural_gradient'],
        'col_sample': float(params['col_sample']),
        'minibatch_frac': float(params['minibatch_frac']),
        'tol': float(params['tol']),
    }

    model = NGBRegressor(
        verbose = 100,
        random_state = 1,
        **params
    )

    losses = np.sqrt(-cross_val_score(model, train_x, train_y['Y_01'], cv=Config.cv, scoring='neg_mean_squared_error'))
    losses = losses / np.mean(np.abs(train_y['Y_01']))
    print("NRMSE Loss {:.5f} params {}".format(losses.mean(), params))
    return losses.mean()

In [None]:
# Parameter Tunning
space_ngboost = {
    'n_estimators': hp.quniform('n_estimators', 100, 500, 10),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'natural_gradient': hp.choice('natural_gradient', [True, False]),
    'col_sample': hp.quniform('col_sample', 0, 1, 0.01),
    'minibatch_frac': hp.quniform('minibatch_frac', 0, 1, 0.01),
    'tol': hp.uniform('tol', 1e-6, 3e-4),
}

best = fmin(fn = ngbr_objective,
            space = space_ngboost,
            algo = tpe.suggest,
            verbose = 10,
            max_evals = 100)

print(best)
best['n_estimators'] = int(best['n_estimators'])

## Stacking Ensenble
- 모델별 개별학습(타겟 Y_01~Y_14) 반복

In [None]:
def get_stacking_base_datasets(model, train_x, train_y, col,test, params):
    kf = KFold(n_splits=10, shuffle=False)
    train_fold_pred = np.zeros((train_x.shape[0],1))
    test_pred = np.zeros((test.shape[0],10))
    
    
    for folder_counter, (train_index, valid_index) in enumerate(kf.split(train_x)):
        print('Fold : ', folder_counter, ' Start')
        X_tr = train_x.loc[train_index]
        y_tr = train_y[col].loc[train_index]
        X_te = train_x.loc[valid_index] 
        
        if model == 'cat':
          model = CatBoostRegressor(random_state=1,
                                    **params)
        
        elif model == 'extra':
          model = ExtraTreesRegressor(random_state=1, 
                                      **params)

        elif model == 'ngbr':
          model = NGBRegressor(random_state = 1)
        
        elif model == 'lgbm':
          model = LGBMRegressor(random_state=1, n_jobs=-1, 
                                **params)

        model.fit(X_tr, y_tr)
        train_fold_pred[valid_index, :] = model.predict(X_te).reshape(-1,1) 
        test_pred[:, folder_counter] = model.predict(test) 
        
    test_pred_mean = np.mean(test_pred, axis=1).reshape(-1,1)
    
    return train_fold_pred, test_pred_mean 

In [None]:
# Y_01
#min_samples_split

cat_01 = {'depth': 9, 'fold_len_multiplier': 1.6722688563924544, 'l2_leaf_reg': 9.992348977307927, 'learning_rate': 0.03686783566671033, 'max_bin': 16, 'min_data_in_leaf': 7, 'n_estimators': 500, 'random_strength': 0.5478002316160607}

extra_01 = {'bootstrap': 0, 'ccp_alpha': 0.5956203348598316, 'max_depth': 43, 'max_features': 1, 'max_leaf_nodes': 22, 'min_impurity_decrease': 195.59697998782488, 'min_samples_leaf': 0.4, 'min_samples_split': 0.427, 'min_weight_fraction_leaf': 0.3872874854064327, 'n_estimators': 200}

lgbm_01 = {'colsample_bytree': 0.572280100273023, 'learning_rate': 0.010283635038627429, 'max_depth': 180, 'min_child_samples': 135, 'min_split_gain': 0.04511227284338413, 'n_estimators': 900, 'num_leaves': 70, 'reg_alpha': 4.406681827912319, 'reg_lambda': 20.4785600448913, 'scale_pos_weight': 8.302374117433086, 'subsample': 0.1688669888026464}

ngbr_01 = {'n_estimators': 250, 'learning_rate': 0.027115337704182965, 'natural_gradient': True, 'col_sample': 0.2, 'minibatch_frac': 0.8, 'tol': 5.5136412071576055e-05}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_01', test=test_x, params = cat_01)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_01', test=test_x, params = extra_01)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_01', test=test_x, params = lgbm_01)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_01', test=test_x, params = ngbr_01)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_01)
lr_final.fit(Stack_final_X_train, train_y['Y_01'])
stack_final1 = lr_final.predict(Stack_final_X_test)




# Y_02
#min_samples_split

cat_02 = {'depth': 9, 'fold_len_multiplier': 1.8379420467251593, 'l2_leaf_reg': 27.102344731579784, 'learning_rate': 0.03597820559455176, 'max_bin': 14, 'min_data_in_leaf': 2, 'n_estimators': 500, 'random_strength': 0.9800368067026318}

extra_02 = {'bootstrap': 0, 'ccp_alpha': 0.9020983921597531, 'max_depth': 42, 'max_features': 2, 'max_leaf_nodes': 7, 'min_impurity_decrease': 150.7925115966371, 'min_samples_leaf': 18, 'min_samples_split': 15, 'min_weight_fraction_leaf': 0.3321219768527379, 'n_estimators': 200}

lgbm_02 =  {'colsample_bytree': 0.7641322280477741, 'learning_rate': 0.010977205425053654, 'max_depth': 90, 'min_child_samples': 75, 'min_split_gain': 0.13379952895779884, 'n_estimators': 900, 'num_leaves': 80, 'reg_alpha': 1.9214119194170154, 'reg_lambda': 14.454450236504218, 'scale_pos_weight': 2.171961031806387, 'subsample': 0.9552593593877317}

ngbr_02 = {'n_estimators': 250, 'learning_rate': 0.0656349966273891, 'natural_gradient': True, 'col_sample': 0.8, 'minibatch_frac': 0.55, 'tol': 0.00028029350235701545}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_02', test=test_x, params = cat_02)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_02', test=test_x, params = extra_02)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_02', test=test_x, params = lgbm_02)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_02', test=test_x, params = ngbr_02)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_02)
lr_final.fit(Stack_final_X_train, train_y['Y_02'])
stack_final2 = lr_final.predict(Stack_final_X_test)




# Y_03
#min_samples_split

cat_03 = {'depth': 7, 'fold_len_multiplier': 2.498263900973405, 'l2_leaf_reg': 36.312606304859514, 'learning_rate': 0.043147429358500425, 'max_bin': 14, 'min_data_in_leaf': 2, 'n_estimators': 450, 'random_strength': 0.6650471273750369}

extra_03 = {'bootstrap': 1, 'ccp_alpha': 0.9068334703113171, 'max_depth': 22, 'max_features': 2, 'max_leaf_nodes': 11, 'min_impurity_decrease': 6.407983868655598, 'min_samples_leaf': 17, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.4864839919729328, 'n_estimators': 1450}

lgbm_03 = {'colsample_bytree': 0.5504769098255781,  'learning_rate': 0.019653385015120244, 'max_depth': 220, 'min_child_samples': 25, 'min_split_gain': 0.1273611040963466, 'n_estimators': 470, 'num_leaves': 160, 'reg_alpha': 3.5549669150756706, 'reg_lambda': 39.88636182674132, 'scale_pos_weight': 12.46696320152359, 'subsample': 0.7590007450921917}

ngbr_03 = {'n_estimators': 410, 'learning_rate': 0.11407060690033853, 'natural_gradient': True, 'col_sample': 0.25, 'minibatch_frac': 1, 'tol': 0.000166350313681024}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_03', test=test_x, params = cat_03)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_03', test=test_x, params = extra_03)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_03', test=test_x, params = lgbm_03)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_03', test=test_x, params = ngbr_03)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_03)
lr_final.fit(Stack_final_X_train, train_y['Y_03'])
stack_final3 = lr_final.predict(Stack_final_X_test)



# Y_04
#min_samples_split

cat_04 = {'depth': 10, 'fold_len_multiplier': 1.3980250309157025, 'l2_leaf_reg': 5.838850864558845, 'learning_rate': 0.2737350830778467, 'max_bin': 78, 'min_data_in_leaf': 561, 'n_estimators': 20, 'random_strength': 0.009672204431612739}

extra_04 = {'bootstrap': 0, 'ccp_alpha': 0.7076108565007323, 'max_depth': 48, 'max_features': 3, 'max_leaf_nodes': 16, 'min_impurity_decrease': 45.084212905659186, 'min_samples_leaf': 11, 'min_samples_split': 20, 'min_weight_fraction_leaf': 0.1164511175367393, 'n_estimators': 100}

lgbm_04 = {'colsample_bytree': 0.5597537952569402, 'learning_rate': 0.02374663979814546, 'max_depth': 32, 'min_child_samples': 100, 'min_split_gain': 0.12211426885216736, 'n_estimators': 1263, 'num_leaves': 200, 'reg_alpha': 14.606693962963451, 'reg_lambda': 299.52278825209424, 'scale_pos_weight': 7.7785016838070735, 'subsample': 0.6254745287838821}

ngbr_04 = {'n_estimators': 431, 'learning_rate': 0.08883519586581468, 'natural_gradient': True, 'col_sample': 0.97, 'minibatch_frac': 0.75, 'tol': 8.896682623929568e-05}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_04', test=test_x, params = cat_04)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_04', test=test_x, params = extra_04)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_04', test=test_x, params = lgbm_04)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_04', test=test_x, params = ngbr_04)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_04)
lr_final.fit(Stack_final_X_train, train_y['Y_04'])
stack_final4 = lr_final.predict(Stack_final_X_test)



# Y_05
#min_samples_split

cat_05 = {'depth': 9, 'fold_len_multiplier': 1.7527609013156893, 'l2_leaf_reg': 5.150371128645829, 'learning_rate': 0.23166991521375363, 'max_bin': 181, 'min_data_in_leaf': 591, 'n_estimators': 17, 'random_strength': 0.08626442162325075}

extra_05 = {'bootstrap': 0, 'ccp_alpha': 0.17223432236304015, 'max_depth': 39, 'max_features': 2, 'max_leaf_nodes': 16, 'min_impurity_decrease': 166.70077338146032, 'min_samples_leaf': 34, 'min_samples_split': 15, 'min_weight_fraction_leaf': 0.22146453407955657, 'n_estimators': 200}

lgbm_05 = {'colsample_bytree': 0.4311015575880258, 'learning_rate': 0.01749725932551278, 'max_depth': 53, 'min_child_samples': 15, 'min_split_gain': 0.2820951740673634, 'n_estimators': 974, 'num_leaves': 165, 'reg_alpha': 9.604623064885754, 'reg_lambda': 12.314490508636432, 'scale_pos_weight': 6.6422956907936825, 'subsample': 0.7390190399971659}

ngbr_05 = {'n_estimators': 449, 'learning_rate': 0.08850472848590257, 'natural_gradient': True, 'col_sample': 0.8, 'minibatch_frac': 0.97, 'tol': 7.048508838263751e-05}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_05', test=test_x, params = cat_05)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_05', test=test_x, params = extra_05)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_05', test=test_x, params = lgbm_05)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_05', test=test_x, params = ngbr_05)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_05)
lr_final.fit(Stack_final_X_train, train_y['Y_05'])
stack_final5 = lr_final.predict(Stack_final_X_test)



# Y_06
#min_samples_split

cat_06 = {'depth': 8, 'fold_len_multiplier': 1.4734463589684192, 'l2_leaf_reg': 7.343561976614034, 'learning_rate': 0.2546701358021111, 'max_bin': 8, 'min_data_in_leaf': 280, 'n_estimators': 13, 'random_strength': 0.06423202371274109}

extra_06 = {'bootstrap': 0, 'ccp_alpha': 0.263122350467869, 'max_depth': 12, 'max_features': 1, 'max_leaf_nodes': 6, 'min_impurity_decrease': 140.20905071348278, 'min_samples_leaf': 50, 'min_samples_split': 50, 'min_weight_fraction_leaf': 0.17986464144348094, 'n_estimators': 450}

lgbm_06 = {'colsample_bytree': 0.6889745043181079, 'learning_rate': 0.06146161938790444, 'max_depth': 89, 'min_child_samples': 10, 'min_split_gain': 0.669592868575692, 'n_estimators': 1169, 'num_leaves': 175, 'reg_alpha': 11.405277636150856, 'reg_lambda': 112.37954230084294, 'scale_pos_weight': 5.932435783263877, 'subsample': 0.8265223228903998}

ngbr_06 = {'n_estimators': 153, 'learning_rate': 0.05121743231435252, 'natural_gradient': True, 'col_sample': 0.7000000000000001, 'minibatch_frac': 0.97, 'tol': 0.00013297533787653073}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_06', test=test_x, params = cat_06)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_06', test=test_x, params = extra_06)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_06', test=test_x, params = lgbm_06)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_06', test=test_x, params = ngbr_06)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_06)
lr_final.fit(Stack_final_X_train, train_y['Y_06'])
stack_final6 = lr_final.predict(Stack_final_X_test)



# Y_07
#min_samples_split

cat_07 = {'n_estimators': 350, 'depth': 9, 'learning_rate': 0.07700980062937977, 'l2_leaf_reg': 7.366594895982364, 'max_bin': 80, 'min_data_in_leaf': 654, 'random_strength': 0.12106590929604114, 'fold_len_multiplier': 2.3574644740356874}

extra_07 = {'n_estimators': 100, 'max_depth': 9, 'min_samples_split': 15, 'min_samples_leaf': 7, 'min_weight_fraction_leaf': 0.15163758513069608, 'max_features': 'auto', 'max_leaf_nodes': 16, 'min_impurity_decrease': 28.474990166012663, 'bootstrap': True, 'ccp_alpha': 0.8351211481986806}

lgbm_07 = {'colsample_bytree': 0.8663251864650988, 'learning_rate': 0.018110306887688978, 'max_depth': 166, 'min_child_samples': 50, 'min_split_gain': 0.025403061552667243, 'n_estimators': 1080, 'num_leaves': 100, 'reg_alpha': 2.0131018839563666, 'reg_lambda': 63.56640846106552, 'scale_pos_weight': 1.8584564419776715, 'subsample': 0.7643028435523616}

ngbr_07 = {'n_estimators': 280, 'learning_rate': 0.0543058099317307, 'natural_gradient': False, 'col_sample': 0.61, 'minibatch_frac': 0.53, 'tol': 4.259736507913848e-06}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_07', test=test_x, params = cat_07)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_07', test=test_x, params = extra_07)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_07', test=test_x, params = lgbm_07)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_07', test=test_x, params = ngbr_07)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_07)
lr_final.fit(Stack_final_X_train, train_y['Y_07'])
stack_final7 = lr_final.predict(Stack_final_X_test)



# Y_08
#min_samples_split

cat_08 = {'n_estimators': 450, 'depth': 11, 'learning_rate': 0.06098987016322603, 'l2_leaf_reg': 7.78479685048665, 'max_bin': 244, 'min_data_in_leaf': 423, 'random_strength': 0.4646435148235979, 'fold_len_multiplier': 1.7857138740606202}

extra_08 = {'n_estimators': 150, 'max_depth': 31, 'min_samples_split': 45, 'min_samples_leaf': 38, 'min_weight_fraction_leaf': 0.3764820550107259, 'max_features': 'log2', 'max_leaf_nodes': 21, 'min_impurity_decrease': 41.54716726233874, 'bootstrap': True, 'ccp_alpha': 0.9985147070627858}

lgbm_08 = {'colsample_bytree': 0.8970390757241629, 'learning_rate': 0.03571726260659087, 'max_depth': 164, 'min_child_samples': 30, 'min_split_gain': 0.2863362850926679, 'n_estimators': 740, 'num_leaves': 100, 'reg_alpha': 1.1167159754886287, 'reg_lambda': 280.9798636389436, 'scale_pos_weight': 4.75867892931176, 'subsample': 0.681716202670263}

ngbr_08 = {'n_estimators': 490, 'learning_rate': 0.04770929499260395, 'natural_gradient': True, 'col_sample': 1, 'minibatch_frac': 0.6, 'tol': 0.00019175990157775972}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_08', test=test_x, params = cat_08)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_08', test=test_x, params = extra_08)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_08', test=test_x, params = lgbm_08)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_08', test=test_x, params = ngbr_08)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_08)
lr_final.fit(Stack_final_X_train, train_y['Y_08'])
stack_final8 = lr_final.predict(Stack_final_X_test)



# Y_09
#min_samples_split

cat_09 = {'n_estimators': 450, 'depth': 7, 'learning_rate': 0.08417441728687995, 'l2_leaf_reg': 7.003716950710109, 'max_bin': 249, 'min_data_in_leaf': 85, 'random_strength': 0.01346018468768982, 'fold_len_multiplier': 1.8510933222341048}

extra_09 = {'n_estimators': 50, 'max_depth': 35, 'min_samples_split': 45, 'min_samples_leaf': 11, 'min_weight_fraction_leaf': 0.1446169507139342, 'max_features': None, 'max_leaf_nodes': 10, 'min_impurity_decrease': 190.68425357881898, 'bootstrap': True, 'ccp_alpha': 0.8503455224547213}

lgbm_09 = {'n_estimators': 900, 'max_depth': 86, 'num_leaves': 150, 'min_child_samples': 85, 'colsample_bytree': '0.90507', 'subsample': '0.62362', 'min_split_gain': '0.21034', 'scale_pos_weight': '8.77311', 'reg_alpha': '0.07069', 'reg_lambda': '499.10672', 'learning_rate': '0.04679'}

ngbr_09 = {'n_estimators': 250, 'learning_rate': 0.0531679007741611, 'natural_gradient': False, 'col_sample': 0.8200000000000001, 'minibatch_frac': 0.65, 'tol': 0.00013547208288125422}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_09', test=test_x, params = cat_09)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_09', test=test_x, params = extra_09)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_09', test=test_x, params = lgbm_09)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_09', test=test_x, params = ngbr_09)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_09)
lr_final.fit(Stack_final_X_train, train_y['Y_09'])
stack_final9 = lr_final.predict(Stack_final_X_test)



# Y_10
#min_samples_split

cat_10 = {'depth': 10, 'fold_len_multiplier': 1.8964686014236263, 'l2_leaf_reg': 6.890506058346803, 'learning_rate': 0.04431783808011194, 'max_bin': 135, 'min_data_in_leaf': 449, 'n_estimators': 400, 'random_strength': 0.0715293516760773}

extra_10 = {'bootstrap': 0, 'ccp_alpha': 0.06816154953537701, 'max_depth': 34, 'max_features': 2, 'max_leaf_nodes': 17, 'min_impurity_decrease': 10.897433724670414, 'min_samples_leaf': 45, 'min_samples_split': 25, 'min_weight_fraction_leaf': 0.22786627091058692, 'n_estimators': 100}

lgbm_10 = {'colsample_bytree': 0.8350973419202665, 'learning_rate': 0.03134966396365972, 'max_depth': 114, 'min_child_samples': 20, 'min_split_gain': 0.24406788869557822, 'n_estimators': 454, 'num_leaves': 115, 'reg_alpha': 1.0870546166564243, 'reg_lambda': 346.21163772786895, 'scale_pos_weight': 5.81617865285278, 'subsample': 0.45612075761336973}

ngbr_10 = {'n_estimators': 400, 'learning_rate': 0.10343505554950799, 'natural_gradient': True, 'col_sample': 0.71, 'minibatch_frac': 0.96, 'tol': 6.075001314642106e-05}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_10', test=test_x, params = cat_10)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_10', test=test_x, params = extra_10)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_10', test=test_x, params = lgbm_10)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_10', test=test_x, params = ngbr_10)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_10)
lr_final.fit(Stack_final_X_train, train_y['Y_10'])
stack_final10 = lr_final.predict(Stack_final_X_test)



# Y_11
#min_samples_split

cat_11 = {'depth': 9, 'fold_len_multiplier': 1.469922031736939, 'l2_leaf_reg': 5.365254126430433, 'learning_rate': 0.05182013518976086, 'max_bin': 147, 'min_data_in_leaf': 238, 'n_estimators': 450, 'random_strength': 0.07839813420603847}

extra_11 = {'bootstrap': 0, 'ccp_alpha': 0.22748332620407474, 'max_depth': 18, 'max_features': 2, 'max_leaf_nodes': 30, 'min_impurity_decrease': 63.03156125087142, 'min_samples_leaf': 6, 'min_samples_split': 20, 'min_weight_fraction_leaf': 0.04105676583350759, 'n_estimators': 150}

lgbm_11 = {'colsample_bytree': 0.7285829045071064, 'learning_rate': 0.019839273085108612, 'max_depth': 71, 'min_child_samples': 50, 'min_split_gain': 0.35567737788276876, 'n_estimators': 970, 'num_leaves': 140, 'reg_alpha': 0.27353134227182774, 'reg_lambda': 157.85749037224548, 'scale_pos_weight': 5.956126991298146, 'subsample': 0.7509931500532172}

ngbr_11 = {'n_estimators': 330, 'learning_rate': 0.10650808882845716, 'natural_gradient': False, 'col_sample': 0.67, 'minibatch_frac': 0.8200000000000001, 'tol': 5.808799526610105e-05}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_11', test=test_x, params = cat_11)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_11', test=test_x, params = extra_11)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_11', test=test_x, params = lgbm_11)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_11', test=test_x, params = ngbr_11)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_11)
lr_final.fit(Stack_final_X_train, train_y['Y_11'])
stack_final11 = lr_final.predict(Stack_final_X_test)



# Y_12
#min_samples_split

cat_12 = {'n_estimators': 250, 'depth': 6, 'learning_rate': 0.15612168413836122, 'l2_leaf_reg': 4.521702132180398, 'max_bin': 249, 'min_data_in_leaf': 218, 'random_strength': 4.051962608010968, 'fold_len_multiplier': 1.1690440537893567}

extra_12 = {'bootstrap': 0, 'ccp_alpha': 0.7514591922993081, 'max_depth': 50, 'max_features': 3, 'max_leaf_nodes': 3, 'min_impurity_decrease': 112.61937027705285, 'min_samples_leaf': 25, 'min_samples_split': 30, 'min_weight_fraction_leaf': 0.16488737437051704, 'n_estimators': 100}

lgbm_12 = {'colsample_bytree': 0.6115826698158419, 'learning_rate': 0.010052927231718068, 'max_depth': 71, 'min_child_samples': 85, 'min_split_gain': 0.12003011548878659, 'n_estimators': 1300, 'num_leaves': 120, 'reg_alpha': 1.3013867029804251, 'reg_lambda': 269.3915696845848, 'scale_pos_weight': 5.290961082236748, 'subsample': 0.7542724715058367}

ngbr_12 = {'col_sample': 0.79, 'learning_rate': 0.11842716149209648, 'minibatch_frac': 0.78, 'n_estimators': 100, 'natural_gradient': 1, 'tol': 0.00017488848399995865}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_12', test=test_x, params = cat_12)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_12', test=test_x, params = extra_12)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_12', test=test_x, params = lgbm_12)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_12', test=test_x, params = ngbr_12)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_12)
lr_final.fit(Stack_final_X_train, train_y['Y_12'])
stack_final12 = lr_final.predict(Stack_final_X_test)



# Y_13
#min_samples_split

cat_13 = {'n_estimators': 250, 'depth': 10, 'learning_rate': 0.06019223910388208, 'l2_leaf_reg': 3.692745142783531, 'max_bin': 30, 'min_data_in_leaf': 71, 'random_strength': 0.061877335687993515, 'fold_len_multiplier': 2.1969765304562054}

extra_13 = {'bootstrap': 0, 'ccp_alpha': 0.9305876780539839, 'max_depth': 32, 'max_features': 3, 'max_leaf_nodes': 6, 'min_impurity_decrease': 103.59182427685685, 'min_samples_leaf': 41, 'min_samples_split': 10, 'min_weight_fraction_leaf': 0.2746586014429824, 'n_estimators': 100}

lgbm_13 = {'colsample_bytree': 0.9511047907962863, 'learning_rate': 0.023257873709858216, 'max_depth': 58, 'min_child_samples': 80, 'min_split_gain': 0.21488153574891886, 'n_estimators': 1300, 'num_leaves': 150, 'reg_alpha': 0.33761852089148814, 'reg_lambda': 57.05291849099506, 'scale_pos_weight': 2.0801436555772854, 'subsample': 0.5580106548214563}

ngbr_13 = {'n_estimators': 100, 'learning_rate': 0.09415734373605988, 'natural_gradient': False, 'col_sample': 0.67, 'minibatch_frac': 0.86, 'tol': 0.00010625760957734133}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_13', test=test_x, params = cat_13)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_13', test=test_x, params = extra_13)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_13', test=test_x, params = lgbm_13)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_13', test=test_x, params = ngbr_13)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_13)
lr_final.fit(Stack_final_X_train, train_y['Y_13'])
stack_final13 = lr_final.predict(Stack_final_X_test)



# Y_14
#min_samples_split

cat_14 = {'n_estimators': 250, 'depth': 9, 'learning_rate': 0.0994089124916012, 'l2_leaf_reg': 7.430077445061214, 'max_bin': 163, 'min_data_in_leaf': 420, 'random_strength': 0.5517727972303491, 'fold_len_multiplier': 1.1173814975860605}

extra_14 = {'bootstrap': 0, 'ccp_alpha': 0.5803551586810121, 'max_depth': 9, 'max_features': 0, 'max_leaf_nodes': 24, 'min_impurity_decrease': 114.7107697231918, 'min_samples_leaf': 6, 'min_samples_split': 35, 'min_weight_fraction_leaf': 0.1492044083631023, 'n_estimators': 150}

lgbm_14 = {'colsample_bytree': 0.8851122740930837, 'learning_rate': 0.013136814152245062, 'max_depth': 249, 'min_child_samples': 65, 'min_split_gain': 0.2072264172906347, 'n_estimators': 450, 'num_leaves': 135, 'reg_alpha': 0.642890771203696, 'reg_lambda': 45.624663648443345, 'scale_pos_weight': 6.400746088779947, 'subsample': 0.30084274480143686}

ngbr_14 = {'n_estimators': 300, 'learning_rate': 0.08509952436476127, 'natural_gradient': False, 'col_sample': 1, 'minibatch_frac': 0.86, 'tol': 0.00011282289882632527}

xx_train, xx_test = stk.get_stacking_base_datasets('cat', train_x, train_y, col='Y_14', test=test_x, params = cat_14)
yy_train, yy_test = stk.get_stacking_base_datasets('extra', train_x, train_y, col='Y_14', test=test_x, params = extra_14)
zz_train, zz_test = stk.get_stacking_base_datasets('lgbm', train_x, train_y, col='Y_14', test=test_x, params = lgbm_14)
qq_train, qq_test = stk.get_stacking_base_datasets('ngbr', train_x, train_y, col='Y_14', test=test_x, params = ngbr_14)


Stack_final_X_train = np.concatenate((xx_train, yy_train, zz_train, qq_train), axis=1)
Stack_final_X_test = np.concatenate((xx_test, yy_test, zz_test, qq_test), axis=1)

# final_model 선택
lr_final = LGBMRegressor(**lgbm_14)
lr_final.fit(Stack_final_X_train, train_y['Y_14'])
stack_final14 = lr_final.predict(Stack_final_X_test)

## Data Save(to CSV)

In [None]:
sub = pd.read_csv('./sample_submission.csv')
sub['Y_01'] = stack_final1
sub['Y_02'] = stack_final2
sub['Y_03'] = stack_final3
sub['Y_04'] = stack_final4
sub['Y_05'] = stack_final5
sub['Y_06'] = stack_final6
sub['Y_07'] = stack_final7
sub['Y_08'] = stack_final8
sub['Y_09'] = stack_final9
sub['Y_10'] = stack_final10
sub['Y_11'] = stack_final11
sub['Y_12'] = stack_final12
sub['Y_13'] = stack_final13
sub['Y_14'] = stack_final14
sub.to_csv('./stack_.csv', index=False)