# 패키지 불러오기

In [1]:
import math
import pandas as pd
import numpy as np

import lightgbm as lgb
from catboost import Pool, CatBoostRegressor

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression

n_splits = 5

# 데이터 불러오기

In [2]:
hitter_simple_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/단순가중/타자_단순가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
hitter_simple_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/단순가중/타자_단순가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_simple_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/단순가중/투수_단순가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_simple_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/단순가중/투수_단순가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

hitter_time_weighted_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간가중/타자_시간가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
hitter_time_weighted_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간가중/타자_시간가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_time_weighted_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간가중/투수_시간가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_time_weighted_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간가중/투수_시간가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

hitter_play_weighted_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/경기가중/타자_경기가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
hitter_play_weighted_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/경기가중/타자_경기가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_play_weighted_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/경기가중/투수_경기가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_play_weighted_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/경기가중/투수_경기가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

hitter_time_and_play_weighted_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간경기가중/타자_시간경기가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
hitter_time_and_play_weighted_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간경기가중/타자_시간경기가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_time_and_play_weighted_fa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간경기가중/투수_시간경기가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)
pitcher_time_and_play_weighted_nonfa = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간경기가중/투수_시간경기가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

# 모델 정의

In [3]:
def Xy_split(dataset):
    
    dataset_X = dataset.drop('연봉', axis=1)
    dataset_y = dataset['연봉']
    
    return dataset_X, dataset_y

In [4]:
def lgbm_model(data_x, data_y):

    # lgbm 모델링
    lgb_params = {"objective" : "rmse",
                 "verbosity" : -1}

    lgb_models={}
    step = 1

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        print(f'\n\n ============================ {step} ============================')    

        X_train = data_x.iloc[tr_idx, :].values
        y_train = data_y[tr_idx].values

        X_valid = data_x.iloc[val_idx, :].values
        y_valid = data_y[val_idx].values

        lgb_dtrain = lgb.Dataset(data = X_train, label = y_train) 
        lgb_dvalid = lgb.Dataset(data = X_valid, label = y_valid) 

        lgb_model = lgb.train(lgb_params, lgb_dtrain, 20000, valid_sets=[lgb_dvalid], early_stopping_rounds=100, verbose_eval=1000)
        lgb_models[step] = lgb_model

        step += 1
        
    # lgbm 성능종합
    performance = [lgb_models[step].best_score['valid_0']['rmse'] for step in range(1,6)]
    cv_performance = round(np.mean(performance),1)

    return cv_performance

In [5]:
def cb_model(data_x, data_y):

    # cb 모델링
    cat_cols = []

    cb_models={}
    step = 1

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        cb_dtrain = Pool(data=X_train, label=y_train, cat_features=cat_cols)
        cb_dvalid = Pool(data=X_valid, label=y_valid, cat_features=cat_cols)

        cb_model = CatBoostRegressor(iterations=20000, eval_metric='RMSE', loss_function='RMSE', verbose = 0)

        cb_model.fit(cb_dtrain, eval_set=cb_dvalid, early_stopping_rounds=100, verbose_eval=1000, use_best_model=True)
        cb_models[step] = cb_model

        step += 1
        
    # cb 성능종합
    performance = [cb_models[step].best_score_['validation']['RMSE'] for step in range(1,6)]
    cv_performance = round(np.mean(performance),1)

    return cv_performance

In [6]:
def lgbm_log_model(data_x, data_y):

    # lgbm 모델링
    lgb_params = {"objective" : "rmse",
                 "verbosity" : -1}

    performance = []

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :].values
        y_train = data_y[tr_idx].values

        X_valid = data_x.iloc[val_idx, :].values
        y_valid = data_y[val_idx].values

        lgb_dtrain = lgb.Dataset(data = X_train, label = y_train) 
        lgb_dvalid = lgb.Dataset(data = X_valid, label = y_valid) 

        lgb_model = lgb.train(lgb_params, lgb_dtrain, 20000, valid_sets=[lgb_dvalid], early_stopping_rounds=100, verbose_eval=1000)
        pred_valid = list(map(lambda x: 10 ** x, lgb_model.predict(X_valid)))
        rmse = mean_squared_error(list(map(lambda x: 10 ** x, y_valid)), pred_valid, squared=False)

        performance.append(rmse)

    # lgbm 성능종합
    cv_performance = round(np.mean(performance),1)

    return cv_performance

In [7]:
def cb_log_model(data_x, data_y):

    # cb 모델링
    cat_cols = []
    performance = []

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        cb_dtrain = Pool(data=X_train, label=y_train, cat_features=cat_cols)
        cb_dvalid = Pool(data=X_valid, label=y_valid, cat_features=cat_cols)

        cb_model = CatBoostRegressor(iterations=20000, eval_metric='RMSE', loss_function='RMSE', verbose = 0)

        cb_model.fit(cb_dtrain, eval_set=cb_dvalid, early_stopping_rounds=100, verbose_eval=1000, use_best_model=True)
        pred_valid = list(map(lambda x: 10 ** x, cb_model.predict(X_valid)))
        rmse = mean_squared_error(list(map(lambda x: 10 ** x, y_valid)), pred_valid, squared=False)

        performance.append(rmse)
        
    # cb 성능종합
    cv_performance = round(np.mean(performance),1)

    return cv_performance

# 변수선택법 방법론 정의

In [8]:
def get_lgbm_RFE_rank(X, y):

    X, X_test, y, y_test = train_test_split(X, y, test_size = 0.25, shuffle = True, random_state = 42)

    X = X.reset_index(drop=True)
    X_test = X_test.reset_index(drop=True)
    y = y.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)
    
    original_feature_num = X.shape[1]

    # RFE 적용
    lgb_params = {"objective" : "rmse",
                 "verbosity" : -1}

    remove_cols = []

    while True:

        # 변수명 정의
        column_names = list(X.columns)
        print(f'남은변수개수: {len(column_names)}')

        # lgbm 모델링
        lgb_dtrain = lgb.Dataset(data = X, label = y) 
        lgb_dvalid = lgb.Dataset(data = X_test, label = y_test) 

        lgb_model = lgb.train(lgb_params, lgb_dtrain, 20000, valid_sets=[lgb_dvalid], early_stopping_rounds=100, verbose_eval=1000)

        # 가장 낮은 변수들 중 1개 삭제
        remove_col = [col for col, importance in zip(column_names, lgb_model.feature_importance()) if importance == min(lgb_model.feature_importance())][-1]
        remove_cols.append(remove_col)
        X.drop(remove_col, axis=1, inplace=True)
        X_test.drop(remove_col, axis=1, inplace=True)

        # while문 종료조건
        if X.shape[1] == 1:
            remove_cols.append(X.columns[0])
            break

    # 데이터프레임으로 생성
    result = pd.DataFrame({'변수명':remove_cols,
                           'RFE순위':[i for i in range(original_feature_num, 0, -1)]})

    return result

In [9]:
def get_cb_RFE_rank(X, y):

    X, X_test, y, y_test = train_test_split(X, y, test_size = 0.25, shuffle = True, random_state = 42)

    X = X.reset_index(drop=True)
    X_test = X_test.reset_index(drop=True)
    y = y.reset_index(drop=True)
    y_test = y_test.reset_index(drop=True)

    original_feature_num = X.shape[1]
        
    # RFE 적용
    remove_cols = []

    while True:

        # 변수명 정의
        column_names = list(X.columns)
        print(f'남은변수개수: {len(column_names)}')

        # cb 모델링
        cat_cols = []
        cb_dtrain = Pool(data=X, label=y, cat_features=cat_cols)
        cb_dvalid = Pool(data=X_test, label=y_test, cat_features=cat_cols)

        cb_model = CatBoostRegressor(iterations=20000, eval_metric='RMSE', loss_function='RMSE', verbose = 0)
        cb_model.fit(cb_dtrain, eval_set=cb_dvalid, early_stopping_rounds=100, verbose_eval=1000, use_best_model=True)

        # 가장 낮은 변수들 중 1개 삭제
        remove_col = [col for col, importance in zip(column_names, cb_model.feature_importances_) if importance == min(cb_model.feature_importances_)][-1]
        remove_cols.append(remove_col)
        X.drop(remove_col, axis=1, inplace=True)
        X_test.drop(remove_col, axis=1, inplace=True)

        # while문 종료조건
        if X.shape[1] == 1:
            remove_cols.append(X.columns[0])
            break

    # 데이터프레임으로 생성
    result = pd.DataFrame({'변수명':remove_cols,
                           'RFE순위':[i for i in range(original_feature_num, 0, -1)]})
    
    return result

In [10]:
def lgbm_RFE_modeling(data_x, data_y, score_dataframe, log_transformation):

    # 성능을 담을 변수 초기화
    performances = []

    # 변수개수별 성능파악
    for n_features in range(0, data_x.shape[1]+1, 5):

        # 변수가 0개일때는 실행x
        if n_features == 0:
            continue

        ### 변수목록 선정
        features = list(score_dataframe.loc[score_dataframe['RFE순위'].isin([i for i in range(1, n_features+1)]), '변수명'])

        # 모델링 후 데이터프레임 생성
        data_x_tmp = data_x[features]
        
        if log_transformation == True:
            performances.append(lgbm_log_model(data_x_tmp, data_y))
        else:
            performances.append(lgbm_model(data_x_tmp, data_y))

    result = pd.DataFrame({'RMSE':performances}, index = [i for i in range(5, data_x.shape[1]+1, 5)])
    
    return result

In [11]:
def cb_RFE_modeling(data_x, data_y, score_dataframe, log_transformation):

    # 성능을 담을 변수 초기화
    performances = []

    # 변수개수별 성능파악
    for n_features in range(0, data_x.shape[1]+1, 5):

        # 변수가 0개일때는 실행x
        if n_features == 0:
            continue

        ### 변수목록 선정
        features = list(score_dataframe.loc[score_dataframe['RFE순위'].isin([i for i in range(1, n_features+1)]), '변수명'])

        # 모델링 후 데이터프레임 생성
        data_x_tmp = data_x[features]
        
        if log_transformation == True:
            performances.append(cb_log_model(data_x_tmp, data_y))
        else:
            performances.append(cb_model(data_x_tmp, data_y))

    result = pd.DataFrame({'RMSE':performances}, index = [i for i in range(5, data_x.shape[1]+1, 5)])
    
    return result

In [12]:
def get_mi_rank(X, y):

    # score 산정
    mutual_info = SelectKBest(mutual_info_regression, k=1).fit(X, y)
    mutual_info_importance = pd.Series(mutual_info.scores_).rank(ascending=False)

    # 데이터프레임 생성
    mi_score = pd.DataFrame({'변수명':list(X.columns),
                        'mi순위':list(mutual_info_importance)})

    return mi_score

In [13]:
def lgbm_mi_modeling(data_x, data_y, score_dataframe, log_transformation):
    
    # 성능을 담을 변수 초기화
    performances = []
    
    # 변수개수별 성능파악
    for n_features in range(0, data_x.shape[1]+1, 5):
        
        # 변수가 0개일때는 실행x
        if n_features == 0:
            continue

        ### 변수목록 선정
        features = list(score_dataframe.loc[score_dataframe['mi순위'].isin([i for i in range(1, n_features+1)]), '변수명'])
        
        # 모델링 후 데이터프레임 생성
        data_x_tmp = data_x[features]
        
        if log_transformation == True:
            performances.append(lgbm_log_model(data_x_tmp, data_y))
        else:
            performances.append(lgbm_model(data_x_tmp, data_y))
    
    result = pd.DataFrame({'RMSE':performances}, index = [i for i in range(5, data_x.shape[1]+1, 5)])
    
    return result

In [14]:
def cb_mi_modeling(data_x, data_y, score_dataframe, log_transformation):
    
    # 성능을 담을 변수 초기화
    performances = []
    
    # 변수개수별 성능파악
    for n_features in range(0, data_x.shape[1]+1, 5):
        
        # 변수가 0개일때는 실행x
        if n_features == 0:
            continue

        ### 변수목록 선정
        features = list(score_dataframe.loc[score_dataframe['mi순위'].isin([i for i in range(1, n_features+1)]), '변수명'])
        
        # 모델링 후 데이터프레임 생성
        data_x_tmp = data_x[features]
        
        if log_transformation == True:
            performances.append(cb_log_model(data_x_tmp, data_y))
        else:
            performances.append(cb_model(data_x_tmp, data_y))    
    
    result = pd.DataFrame({'RMSE':performances}, index = [i for i in range(5, data_x.shape[1]+1, 5)])
    
    return result

In [15]:
def feature_seleciton_modeling(model_type, selection_method, log_transformation):
    
    if (model_type == 'lightgbm') & (selection_method == 'RFE'):
        score = get_lgbm_RFE_rank(dataset)
        performance = lgbm_RFE_modeling(data_x, data_y, score, log_transformation)
        
    elif (model_type == 'catboost') & (selection_method == 'RFE'):
        score = get_cb_RFE_rank(dataset)
        performance = cb_RFE_modeling(data_x, data_y, score, log_transformation)
        
    elif (model_type == 'lgbm') & (selection_method == 'mi'):
        score = get_mi_rank(dataset)
        performance = lgbm_mi_modeling(data_x, data_y, score, log_transformation)
        
    elif (model_type == 'catboost') & (selection_method == 'mi'):
        score = get_mi_rank(dataset)
        performance = cb_mi_modeling(data_x, data_y, score, log_transformation)
        
    return performance

# util 정의

In [16]:
def load_dataset(player_type, weight_type, fa_type):
    
    #### 단순가중 ####
    if (player_type == '타자') & (weight_type == 'simple') & (fa_type == 'fa'):
        return hitter_simple_fa
    
    elif (player_type == '타자') & (weight_type == 'simple') & (fa_type == 'nonfa'):
        return hitter_simple_nonfa
    
    elif (player_type == '투수') & (weight_type == 'simple') & (fa_type == 'fa'):
        return pitcher_simple_fa
    
    elif (player_type == '투수') & (weight_type == 'simple') & (fa_type == 'nonfa'):
        return pitcher_simple_nonfa
    
    #### 시간가중 ####
    elif (player_type == '타자') & (weight_type == 'time') & (fa_type == 'fa'):
        return hitter_time_weighted_fa
    
    elif (player_type == '타자') & (weight_type == 'time') & (fa_type == 'nonfa'):
        return hitter_time_weighted_nonfa
    
    elif (player_type == '투수') & (weight_type == 'time') & (fa_type == 'fa'):
        return pitcher_time_weighted_fa
    
    elif (player_type == '투수') & (weight_type == 'time') & (fa_type == 'nonfa'):
        return pitcher_time_weighted_nonfa
    
    #### 경기가중 ####
    elif (player_type == '타자') & (weight_type == 'play') & (fa_type == 'fa'):
        return hitter_play_weighted_fa
    
    elif (player_type == '타자') & (weight_type == 'play') & (fa_type == 'nonfa'):
        return hitter_play_weighted_nonfa
    
    elif (player_type == '투수') & (weight_type == 'play') & (fa_type == 'fa'):
        return pitcher_play_weighted_fa
    
    elif (player_type == '투수') & (weight_type == 'play') & (fa_type == 'nonfa'):
        return pitcher_play_weighted_nonfa
    
    #### 시간경기가중 ####
    elif (player_type == '타자') & (weight_type == 'time_and_play') & (fa_type == 'fa'):
        return hitter_time_and_play_weighted_fa
    
    elif (player_type == '타자') & (weight_type == 'time_and_play') & (fa_type == 'nonfa'):
        return hitter_time_and_play_weighted_nonfa
    
    elif (player_type == '투수') & (weight_type == 'time_and_play') & (fa_type == 'fa'):
        return pitcher_time_and_play_weighted_fa
    
    elif (player_type == '투수') & (weight_type == 'time_and_play') & (fa_type == 'nonfa'):
        return pitcher_time_and_play_weighted_nonfa

In [17]:
def scaler_transformation(dataset, scaler_type):
    
    # Z-score scaler
    if scaler_type == 'standardized_scaler':
        scaler = StandardScaler()
        dataset_scaled = scaler.fit_transform(dataset)
        dataset_scaled = pd.DataFrame(dataset_scaled)
    
    # Minmax scaler
    elif scaler_type == 'minmax_scaler':
        scaler = MinMaxScaler()
        dataset_scaled = scaler.fit_transform(dataset)
        dataset_scaled = pd.DataFrame(dataset_scaled)
        
    return dataset_scaled

In [18]:
def log_transformation(dataset):
    
    dataset_log = pd.Series(list(map(lambda x: math.log10(x), dataset)))
    return dataset_log

# 종합함수

In [19]:
def modeling(data_x, data_y, model_type, selection_method, log_transformation):
    
    if (model_type == 'lightgbm') & (selection_method == 'RFE'):
        score = get_lgbm_RFE_rank(data_x, data_y)
        performance = lgbm_RFE_modeling(data_x, data_y, score, log_transformation)
        return score, performance
        
    elif (model_type == 'catboost') & (selection_method == 'RFE'):
        score = get_cb_RFE_rank(data_x, data_y)
        performance = cb_RFE_modeling(data_x, data_y, score, log_transformation)
        return score, performance
        
    elif (model_type == 'lightgbm') & (selection_method == 'mi'):
        score = get_mi_rank(data_x, data_y)
        performance = lgbm_mi_modeling(data_x, data_y, score, log_transformation)
        return score, performance
        
    elif (model_type == 'catboost') & (selection_method == 'mi'):
        score = get_mi_rank(data_x, data_y)
        performance = cb_mi_modeling(data_x, data_y, score, log_transformation)
        return score, performance
        
    elif (model_type == 'lightgbm') & (log_transformation == True) & (selection_method == 'no_use'):
        performance = lgbm_log_model(data_x, data_y)
        return performance
        
    elif (model_type == 'lightgbm') & (log_transformation == False) & (selection_method == 'no_use'):
        performance = lgbm_model(data_x, data_y)
        return performance
        
    elif (model_type == 'catboost') & (log_transformation == True) & (selection_method == 'no_use'):
        performance = cb_log_model(data_x, data_y)
        return performance
        
    elif (model_type == 'catboost') & (log_transformation == False) & (selection_method == 'no_use'):
        performance = cb_model(data_x, data_y)
        return performance

In [20]:
def get_performance(player_type, weight_type, fa_type, scaler_type, log_transform, model_type, selection_method):
    
    # 데이터 불러오기
    df = load_dataset(player_type, weight_type, fa_type)
    
    # X, y 분할
    X_data, y_data = Xy_split(df)
    
    # 스케일링여부 체크
    if scaler_type in ['standardized_scaler', 'minmax_scaler']:
        X_data = scaler_transformation(X_data, scaler_type)
    
    # 로그변환 여부 체크
    if log_transform == True:
        y_data = log_transformation(y_data)
        
    if selection_method == 'no_use':
        performance = modeling(X_data, y_data, model_type, selection_method, log_transform)
        
    elif selection_method in ['RFE', 'mi']:
        score_df, performance = modeling(X_data, y_data, model_type, selection_method, log_transform)
    
    print(f'\n\n===== {model_type} === {player_type} {fa_type} =====')
    print(f'===== {weight_type} == 스케일링: {scaler_type} == log변환: {log_transform} == 변수선택법: {selection_method} =====')
    
    if selection_method == 'no_use':
        print(f'===== RMSE: {performance} =====')    
        return performance
        
    elif selection_method in ['RFE', 'mi']:
        display(performance)
        return score_df, performance

# 최종실험

In [21]:
def cb_final_model(data_x, data_y):

    # cb 모델링
    cat_cols = []

    cb_models={}
    r2_scores={}
    step = 1

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        cb_dtrain = Pool(data=X_train, label=y_train, cat_features=cat_cols)
        cb_dvalid = Pool(data=X_valid, label=y_valid, cat_features=cat_cols)

        cb_model = CatBoostRegressor(iterations=20000, eval_metric='RMSE', loss_function='RMSE', verbose = 0)

        cb_model.fit(cb_dtrain, eval_set=cb_dvalid, early_stopping_rounds=100, verbose_eval=1000, use_best_model=True)
        r2 = r2_score(y_valid, cb_model.predict(X_valid))
        
        cb_models[step] = cb_model
        r2_scores[step] = r2
        
        step += 1
        
    return cb_models, r2_scores

#### (1) 투수FA
 - 시간경기가중 // Z-scoring + Feature selection(RFE)

In [22]:
# 데이터 load
train = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간경기가중/투수_시간경기가중_fa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

# Xy split
train_x, train_y = Xy_split(train)

# 스케일링
train_x = scaler_transformation(dataset = train_x, 
                                scaler_type = 'standardized_scaler')

# RFE 변수 순위 도출
ranking = get_cb_RFE_rank(train_x, train_y)
ranking

남은변수개수: 143
Learning rate set to 0.006278
0:	learn: 48273.8152265	test: 39428.9240563	best: 39428.9240563 (0)	total: 171ms	remaining: 57m 2s
1000:	learn: 7008.0480110	test: 14039.0031596	best: 14039.0031596 (1000)	total: 5.53s	remaining: 1m 45s
2000:	learn: 3742.5687661	test: 13153.8802287	best: 13153.8802287 (2000)	total: 11.1s	remaining: 1m 39s
3000:	learn: 2126.3724595	test: 12958.0678529	best: 12957.7203966 (2994)	total: 16.9s	remaining: 1m 35s
4000:	learn: 1284.0760268	test: 12893.1872847	best: 12892.8698430 (3974)	total: 22.5s	remaining: 1m 29s
5000:	learn: 817.4530858	test: 12868.8443529	best: 12868.7059655 (4998)	total: 27.9s	remaining: 1m 23s
6000:	learn: 528.4267291	test: 12855.0395703	best: 12854.4532713 (5923)	total: 33.6s	remaining: 1m 18s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12852.42794
bestIteration = 6082

Shrink model to first 6083 iterations.
남은변수개수: 142
Learning rate set to 0.006278
0:	learn: 48273.8152265	test: 39428.9240563	best: 39428

6000:	learn: 520.0095702	test: 12799.2529575	best: 12799.2529575 (6000)	total: 30.8s	remaining: 1m 11s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12791.07687
bestIteration = 6637

Shrink model to first 6638 iterations.
남은변수개수: 133
Learning rate set to 0.006278
0:	learn: 48290.8026126	test: 39425.7431938	best: 39425.7431938 (0)	total: 6.5ms	remaining: 2m 10s
1000:	learn: 7081.1124873	test: 13869.7901000	best: 13869.7901000 (1000)	total: 5.15s	remaining: 1m 37s
2000:	learn: 3723.2952632	test: 12980.0434315	best: 12980.0434315 (2000)	total: 10.3s	remaining: 1m 33s
3000:	learn: 2126.8264309	test: 12794.8043814	best: 12794.8043814 (3000)	total: 15.5s	remaining: 1m 27s
4000:	learn: 1304.4524196	test: 12705.7408650	best: 12705.6624937 (3998)	total: 20.6s	remaining: 1m 22s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12681.05629
bestIteration = 4765

Shrink model to first 4766 iterations.
남은변수개수: 132
Learning rate set to 0.006278
0:	learn: 48249.52

3000:	learn: 2072.6860619	test: 12975.0328951	best: 12975.0328951 (3000)	total: 15.2s	remaining: 1m 26s
4000:	learn: 1300.1028005	test: 12902.1421575	best: 12902.1421575 (4000)	total: 20.3s	remaining: 1m 21s
5000:	learn: 829.4900827	test: 12862.9028963	best: 12862.7804505 (4999)	total: 25.4s	remaining: 1m 16s
6000:	learn: 549.1918092	test: 12846.4525350	best: 12846.2454858 (5995)	total: 30.5s	remaining: 1m 11s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12841.18926
bestIteration = 6283

Shrink model to first 6284 iterations.
남은변수개수: 123
Learning rate set to 0.006278
0:	learn: 48280.3530464	test: 39453.3212127	best: 39453.3212127 (0)	total: 6.1ms	remaining: 2m 2s
1000:	learn: 7027.6583530	test: 13923.6628535	best: 13923.6628535 (1000)	total: 5.08s	remaining: 1m 36s
2000:	learn: 3714.1354327	test: 13008.6698966	best: 13008.6698966 (2000)	total: 10.2s	remaining: 1m 31s
3000:	learn: 2090.9230898	test: 12824.2954699	best: 12824.2954699 (3000)	total: 15.2s	remaining: 1

1000:	learn: 7028.6994362	test: 13513.4165369	best: 13513.4165369 (1000)	total: 4.88s	remaining: 1m 32s
2000:	learn: 3741.8665969	test: 12640.7677389	best: 12639.5060793 (1999)	total: 9.75s	remaining: 1m 27s
3000:	learn: 2148.1651490	test: 12449.9421811	best: 12449.2268972 (2991)	total: 14.6s	remaining: 1m 22s
4000:	learn: 1303.0280736	test: 12348.6762149	best: 12348.2650916 (3997)	total: 19.6s	remaining: 1m 18s
5000:	learn: 834.3308905	test: 12310.6052954	best: 12310.5798732 (4998)	total: 24.5s	remaining: 1m 13s
6000:	learn: 549.6223558	test: 12289.4115665	best: 12289.4115665 (6000)	total: 29.4s	remaining: 1m 8s
7000:	learn: 350.2695188	test: 12274.5631608	best: 12274.5631608 (7000)	total: 34.2s	remaining: 1m 3s
8000:	learn: 224.1414006	test: 12267.2142356	best: 12267.1949643 (7966)	total: 39.1s	remaining: 58.7s
9000:	learn: 144.0598073	test: 12262.9651889	best: 12262.8374527 (8978)	total: 44s	remaining: 53.8s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12262.05

3000:	learn: 2160.6692167	test: 12849.9773256	best: 12848.6905862 (2986)	total: 14.2s	remaining: 1m 20s
4000:	learn: 1332.7771491	test: 12741.7929805	best: 12741.6726569 (3999)	total: 19s	remaining: 1m 15s
5000:	learn: 851.4733698	test: 12710.2152298	best: 12709.7156587 (4990)	total: 23.8s	remaining: 1m 11s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12698.0095
bestIteration = 5438

Shrink model to first 5439 iterations.
남은변수개수: 106
Learning rate set to 0.006278
0:	learn: 48254.8414879	test: 39412.5739995	best: 39412.5739995 (0)	total: 19.5ms	remaining: 6m 30s
1000:	learn: 6897.8664103	test: 13708.6597491	best: 13708.6597491 (1000)	total: 4.73s	remaining: 1m 29s
2000:	learn: 3619.5156580	test: 12862.8491584	best: 12862.8491584 (2000)	total: 9.44s	remaining: 1m 24s
3000:	learn: 2054.3916498	test: 12671.1869435	best: 12670.8997964 (2996)	total: 14.2s	remaining: 1m 20s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12639.32747
bestIteration = 327

9000:	learn: 160.2229435	test: 12424.7561601	best: 12424.6504429 (8988)	total: 40.8s	remaining: 49.9s
10000:	learn: 107.2160065	test: 12421.7264577	best: 12421.7141905 (9993)	total: 45.3s	remaining: 45.3s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12420.1002
bestIteration = 10851

Shrink model to first 10852 iterations.
남은변수개수: 98
Learning rate set to 0.006278
0:	learn: 48267.8582549	test: 39411.4431940	best: 39411.4431940 (0)	total: 5.95ms	remaining: 1m 59s
1000:	learn: 6860.0392146	test: 13620.4788631	best: 13620.4788631 (1000)	total: 4.5s	remaining: 1m 25s
2000:	learn: 3656.5850564	test: 12779.8446528	best: 12779.8262630 (1999)	total: 8.96s	remaining: 1m 20s
3000:	learn: 2054.9084472	test: 12572.6803180	best: 12572.6803180 (3000)	total: 13.5s	remaining: 1m 16s
4000:	learn: 1295.5145305	test: 12482.4482006	best: 12482.4482006 (4000)	total: 18s	remaining: 1m 11s
5000:	learn: 850.6305170	test: 12445.9169168	best: 12445.8016221 (4997)	total: 22.4s	remaining: 1m 7

1000:	learn: 6841.6546346	test: 13700.9217715	best: 13700.9217715 (1000)	total: 4.27s	remaining: 1m 21s
2000:	learn: 3711.0720904	test: 12879.0152769	best: 12879.0152769 (2000)	total: 8.49s	remaining: 1m 16s
3000:	learn: 2106.8127264	test: 12652.1416471	best: 12652.1416471 (3000)	total: 12.7s	remaining: 1m 12s
4000:	learn: 1312.4279792	test: 12547.2817949	best: 12547.2817949 (4000)	total: 17s	remaining: 1m 7s
5000:	learn: 849.0890378	test: 12499.7768911	best: 12499.7568543 (4995)	total: 21.2s	remaining: 1m 3s
6000:	learn: 553.3413830	test: 12479.3327743	best: 12479.3302338 (5996)	total: 25.5s	remaining: 59.4s
7000:	learn: 358.9008937	test: 12461.8020920	best: 12461.7933289 (6999)	total: 29.7s	remaining: 55.2s
8000:	learn: 237.8983841	test: 12453.4808098	best: 12453.2541372 (7985)	total: 34s	remaining: 51s
9000:	learn: 155.9158497	test: 12448.5181229	best: 12448.4894757 (8925)	total: 38.3s	remaining: 46.7s
10000:	learn: 103.8368865	test: 12445.1412277	best: 12445.1412277 (10000)	total: 

1000:	learn: 6943.9446485	test: 13652.1867103	best: 13652.1867103 (1000)	total: 3.96s	remaining: 1m 15s
2000:	learn: 3735.7520392	test: 12741.5524628	best: 12740.5381044 (1999)	total: 7.94s	remaining: 1m 11s
3000:	learn: 2120.7638403	test: 12522.4392657	best: 12522.3569358 (2998)	total: 11.9s	remaining: 1m 7s
4000:	learn: 1313.7169341	test: 12432.3363199	best: 12431.9914525 (3988)	total: 15.9s	remaining: 1m 3s
5000:	learn: 862.8628221	test: 12393.2627630	best: 12393.2228999 (4998)	total: 19.9s	remaining: 59.6s
6000:	learn: 568.4921216	test: 12369.5458099	best: 12369.5446547 (5995)	total: 23.8s	remaining: 55.6s
7000:	learn: 372.3883002	test: 12353.9892612	best: 12353.9508604 (6996)	total: 27.8s	remaining: 51.6s
8000:	learn: 248.4431013	test: 12343.5866540	best: 12343.4839054 (7994)	total: 31.8s	remaining: 47.7s
9000:	learn: 170.2601869	test: 12338.7401438	best: 12338.7262650 (8999)	total: 35.7s	remaining: 43.7s
10000:	learn: 117.5949687	test: 12335.4043944	best: 12335.4043944 (10000)	to

0:	learn: 48241.6477898	test: 39404.5697506	best: 39404.5697506 (0)	total: 5.29ms	remaining: 1m 45s
1000:	learn: 6798.1995243	test: 13512.4537891	best: 13512.4537891 (1000)	total: 3.72s	remaining: 1m 10s
2000:	learn: 3729.9118224	test: 12698.6229713	best: 12698.6229713 (2000)	total: 7.43s	remaining: 1m 6s
3000:	learn: 2157.3294193	test: 12476.7427404	best: 12476.7195965 (2999)	total: 11.2s	remaining: 1m 3s
4000:	learn: 1350.4346111	test: 12395.0894537	best: 12394.8965542 (3996)	total: 14.9s	remaining: 59.5s
5000:	learn: 882.4493460	test: 12355.2089747	best: 12355.2089747 (5000)	total: 18.6s	remaining: 55.9s
6000:	learn: 573.2348898	test: 12327.9929884	best: 12327.8407940 (5997)	total: 22.4s	remaining: 52.2s
7000:	learn: 373.2491195	test: 12313.6992037	best: 12313.6026854 (6996)	total: 26.1s	remaining: 48.4s
8000:	learn: 247.8585092	test: 12302.6084809	best: 12302.4959559 (7988)	total: 29.8s	remaining: 44.7s
9000:	learn: 166.8260267	test: 12296.0180881	best: 12296.0111012 (8998)	total: 

Stopped by overfitting detector  (100 iterations wait)

bestTest = 12329.57978
bestIteration = 7169

Shrink model to first 7170 iterations.
남은변수개수: 70
Learning rate set to 0.006278
0:	learn: 48249.0990767	test: 39392.4791500	best: 39392.4791500 (0)	total: 10.8ms	remaining: 3m 36s
1000:	learn: 6862.4503737	test: 13454.7613426	best: 13454.1190333 (999)	total: 3.44s	remaining: 1m 5s
2000:	learn: 3776.6520588	test: 12586.9686390	best: 12586.9686390 (2000)	total: 6.87s	remaining: 1m 1s
3000:	learn: 2202.3331436	test: 12375.8611192	best: 12375.8611192 (3000)	total: 10.3s	remaining: 58.3s
4000:	learn: 1360.0407682	test: 12299.2846417	best: 12299.2846417 (4000)	total: 13.7s	remaining: 54.8s
5000:	learn: 906.3711182	test: 12262.3426818	best: 12262.3426818 (5000)	total: 17.1s	remaining: 51.3s
6000:	learn: 611.2564686	test: 12242.6973319	best: 12242.6973319 (6000)	total: 20.5s	remaining: 47.8s
7000:	learn: 409.2233900	test: 12226.9879531	best: 12226.9879531 (7000)	total: 23.9s	remaining: 44.5s
80

9000:	learn: 219.4118806	test: 12306.9445877	best: 12306.7676049 (8955)	total: 28s	remaining: 34.3s
10000:	learn: 150.7491992	test: 12302.2381676	best: 12302.2119230 (9994)	total: 31.2s	remaining: 31.2s
11000:	learn: 105.4637273	test: 12299.1376448	best: 12299.1376448 (11000)	total: 34.4s	remaining: 28.1s
12000:	learn: 74.2591382	test: 12297.3596658	best: 12297.3555026 (11999)	total: 37.5s	remaining: 25s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12296.34135
bestIteration = 12818

Shrink model to first 12819 iterations.
남은변수개수: 61
Learning rate set to 0.006278
0:	learn: 48262.4011920	test: 39430.5647208	best: 39430.5647208 (0)	total: 9.91ms	remaining: 3m 18s
1000:	learn: 6799.4615371	test: 13429.6670478	best: 13429.6670478 (1000)	total: 3.08s	remaining: 58.4s
2000:	learn: 3768.1344346	test: 12685.7390663	best: 12685.7390663 (2000)	total: 6.15s	remaining: 55.3s
3000:	learn: 2273.8073298	test: 12539.6524771	best: 12539.6524771 (3000)	total: 9.21s	remaining: 52.2s


5000:	learn: 1025.6408015	test: 12535.4723400	best: 12535.0501628 (4994)	total: 14.1s	remaining: 42.3s
6000:	learn: 660.5511002	test: 12499.7621268	best: 12499.7621268 (6000)	total: 17s	remaining: 39.5s
7000:	learn: 432.3472373	test: 12478.5050888	best: 12478.4577049 (6999)	total: 19.8s	remaining: 36.8s
8000:	learn: 297.0980371	test: 12466.2062239	best: 12466.1514014 (7973)	total: 22.6s	remaining: 33.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12464.61761
bestIteration = 8183

Shrink model to first 8184 iterations.
남은변수개수: 54
Learning rate set to 0.006278
0:	learn: 48266.5909849	test: 39401.9047212	best: 39401.9047212 (0)	total: 7.58ms	remaining: 2m 31s
1000:	learn: 6808.5584002	test: 13537.2281673	best: 13537.2281673 (1000)	total: 2.79s	remaining: 52.9s
2000:	learn: 3831.2151424	test: 12804.9307158	best: 12804.9307158 (2000)	total: 5.57s	remaining: 50.1s
3000:	learn: 2287.3097945	test: 12620.5737296	best: 12619.6848837 (2995)	total: 8.38s	remaining: 47.4s
400

5000:	learn: 903.4417442	test: 12559.6357444	best: 12559.6357444 (5000)	total: 12.5s	remaining: 37.4s
6000:	learn: 602.1446877	test: 12538.2184489	best: 12538.1204646 (5995)	total: 15s	remaining: 35s
7000:	learn: 415.1205930	test: 12529.2805263	best: 12529.2805263 (7000)	total: 17.5s	remaining: 32.5s
8000:	learn: 290.7619679	test: 12521.2842220	best: 12521.1201183 (7965)	total: 20s	remaining: 30s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12521.12012
bestIteration = 7965

Shrink model to first 7966 iterations.
남은변수개수: 47
Learning rate set to 0.006278
0:	learn: 48276.7140666	test: 39416.4164049	best: 39416.4164049 (0)	total: 15.3ms	remaining: 5m 5s
1000:	learn: 6780.8952531	test: 13494.9192917	best: 13494.9192917 (1000)	total: 2.38s	remaining: 45.2s
2000:	learn: 3997.9030450	test: 12886.1370660	best: 12886.1370660 (2000)	total: 4.75s	remaining: 42.8s
3000:	learn: 2425.7821777	test: 12718.5535936	best: 12718.3361124 (2997)	total: 7.13s	remaining: 40.4s
4000:	learn

1000:	learn: 6764.0768346	test: 13218.5683532	best: 13218.5683532 (1000)	total: 2.05s	remaining: 38.9s
2000:	learn: 4085.2259433	test: 12542.3589456	best: 12542.0089320 (1992)	total: 4.06s	remaining: 36.5s
3000:	learn: 2643.0378585	test: 12379.9920236	best: 12379.9920236 (3000)	total: 6.09s	remaining: 34.5s
4000:	learn: 1709.1438150	test: 12302.9652228	best: 12302.6672350 (3994)	total: 8.11s	remaining: 32.4s
5000:	learn: 1155.4640080	test: 12267.8264692	best: 12267.6054762 (4999)	total: 10.1s	remaining: 30.4s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12257.45015
bestIteration = 5534

Shrink model to first 5535 iterations.
남은변수개수: 36
Learning rate set to 0.006278
0:	learn: 48257.7733468	test: 39422.3420088	best: 39422.3420088 (0)	total: 19.9ms	remaining: 6m 38s
1000:	learn: 6860.9858808	test: 13271.9387567	best: 13271.9387567 (1000)	total: 2.01s	remaining: 38.1s
2000:	learn: 4242.2651811	test: 12651.4608338	best: 12651.4608338 (2000)	total: 3.99s	remaining: 35.9

Stopped by overfitting detector  (100 iterations wait)

bestTest = 11900.46055
bestIteration = 6634

Shrink model to first 6635 iterations.
남은변수개수: 27
Learning rate set to 0.006278
0:	learn: 48265.9840293	test: 39409.2202869	best: 39409.2202869 (0)	total: 2.38ms	remaining: 47.6s
1000:	learn: 6729.9813743	test: 12633.4246178	best: 12633.4246178 (1000)	total: 1.46s	remaining: 27.7s
2000:	learn: 3965.6176294	test: 11908.3485470	best: 11907.6946965 (1996)	total: 2.92s	remaining: 26.3s
3000:	learn: 2623.2996075	test: 11732.9780335	best: 11732.6611311 (2998)	total: 4.42s	remaining: 25s
4000:	learn: 1749.2295726	test: 11638.9742416	best: 11638.7166816 (3996)	total: 5.92s	remaining: 23.7s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11613.6
bestIteration = 4542

Shrink model to first 4543 iterations.
남은변수개수: 26
Learning rate set to 0.006278
0:	learn: 48246.9601822	test: 39397.7107356	best: 39397.7107356 (0)	total: 15.6ms	remaining: 5m 12s
1000:	learn: 6794.9869684	test: 1

6000:	learn: 1061.7071557	test: 11516.5065023	best: 11516.4146704 (5986)	total: 6.97s	remaining: 16.3s
7000:	learn: 792.6578516	test: 11496.8440485	best: 11496.6901957 (6998)	total: 8.13s	remaining: 15.1s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11492.45155
bestIteration = 7299

Shrink model to first 7300 iterations.
남은변수개수: 16
Learning rate set to 0.006278
0:	learn: 48248.1318987	test: 39414.5263279	best: 39414.5263279 (0)	total: 1.61ms	remaining: 32.1s
1000:	learn: 7080.1144441	test: 12918.3318377	best: 12918.3318377 (1000)	total: 966ms	remaining: 18.3s
2000:	learn: 4555.9477299	test: 12358.4097156	best: 12357.2575998 (1999)	total: 1.92s	remaining: 17.3s
3000:	learn: 2994.4578304	test: 12113.6564955	best: 12113.6564955 (3000)	total: 2.91s	remaining: 16.5s
4000:	learn: 2055.0365199	test: 12022.5849800	best: 12019.6631389 (3959)	total: 3.87s	remaining: 15.5s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12010.53887
bestIteration = 4231

Sh

1000:	learn: 14375.4262476	test: 21626.4144140	best: 21625.9327373 (997)	total: 641ms	remaining: 12.2s
2000:	learn: 11948.5735795	test: 20564.0005255	best: 20562.5584769 (1997)	total: 1.35s	remaining: 12.2s
3000:	learn: 10674.1579574	test: 19926.6424529	best: 19926.6424529 (3000)	total: 2.05s	remaining: 11.6s
4000:	learn: 9999.2704839	test: 19674.7688969	best: 19674.7688969 (4000)	total: 2.76s	remaining: 11s
5000:	learn: 9535.1253842	test: 19570.4053014	best: 19570.4053014 (5000)	total: 3.46s	remaining: 10.4s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 19535.99041
bestIteration = 5783

Shrink model to first 5784 iterations.
남은변수개수: 2
Learning rate set to 0.006278
0:	learn: 48247.6479708	test: 39407.8429905	best: 39407.8429905 (0)	total: 646us	remaining: 12.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 25403.93754
bestIteration = 347

Shrink model to first 348 iterations.


Unnamed: 0,변수명,RFE순위
0,141,143
1,64,142
2,77,141
3,70,140
4,72,139
...,...,...
138,60,5
139,142,4
140,0,3
141,139,2


In [35]:
# RFE 최적변수만큼 할당
optimal_rank = 15
modeling_columns = list(ranking.loc[ranking['RFE순위'].isin([i for i in range(1, optimal_rank+1)]), '변수명'])
print(modeling_columns)
train_x2 = train_x[modeling_columns]

# 훈련
final_models, r2scores = cb_final_model(train_x2, train_y)
round(np.mean(list(r2scores.values())),4)

[116, 88, 14, 58, 63, 94, 1, 138, 65, 61, 60, 142, 0, 139, 80]
Learning rate set to 0.006355
0:	learn: 48024.3307522	test: 38018.9613400	best: 38018.9613400 (0)	total: 3.63ms	remaining: 1m 12s
1000:	learn: 7095.7890450	test: 13209.8992010	best: 13209.8992010 (1000)	total: 970ms	remaining: 18.4s
2000:	learn: 4699.1354015	test: 12747.2029438	best: 12747.2029438 (2000)	total: 1.94s	remaining: 17.5s
3000:	learn: 3091.5424109	test: 12442.6336194	best: 12442.6336194 (3000)	total: 2.94s	remaining: 16.7s
4000:	learn: 2236.0975951	test: 12366.4810820	best: 12365.7487751 (3971)	total: 3.92s	remaining: 15.7s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12359.86252
bestIteration = 4204

Shrink model to first 4205 iterations.
Learning rate set to 0.006355
0:	learn: 47307.9196771	test: 41392.3083571	best: 41392.3083571 (0)	total: 4.74ms	remaining: 1m 34s
1000:	learn: 7214.6499651	test: 13937.4379881	best: 13937.4379881 (1000)	total: 1.06s	remaining: 20.2s
2000:	learn: 4810.5641

0.9245

#### (2) 타자 nonFA
 - 시간경기가중 // Z-scoring + Feature selection(RFE)

In [36]:
# 데이터 load
train = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/시간경기가중/타자_시간경기가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

# Xy split
train_x, train_y = Xy_split(train)

# 스케일링
train_x = scaler_transformation(dataset = train_x, 
                                scaler_type = 'standardized_scaler')

# RFE 변수 순위 도출
ranking = get_cb_RFE_rank(train_x, train_y)
ranking

남은변수개수: 145
Learning rate set to 0.007498
0:	learn: 8619.6493114	test: 6447.2252572	best: 6447.2252572 (0)	total: 22.1ms	remaining: 7m 22s
1000:	learn: 1724.8132729	test: 2769.3123306	best: 2769.3123306 (1000)	total: 5.56s	remaining: 1m 45s
2000:	learn: 1035.9936853	test: 2667.4877340	best: 2667.4877340 (2000)	total: 11.7s	remaining: 1m 45s
3000:	learn: 729.7803309	test: 2636.2553001	best: 2636.2553001 (3000)	total: 17.5s	remaining: 1m 39s
4000:	learn: 516.5489678	test: 2621.7854811	best: 2621.7854811 (4000)	total: 23.2s	remaining: 1m 32s
5000:	learn: 373.9175500	test: 2613.9181757	best: 2613.8311792 (4990)	total: 28.7s	remaining: 1m 26s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2613.626442
bestIteration = 5165

Shrink model to first 5166 iterations.
남은변수개수: 144
Learning rate set to 0.007498
0:	learn: 8619.6493114	test: 6447.2252572	best: 6447.2252572 (0)	total: 15.5ms	remaining: 5m 10s
1000:	learn: 1724.8132729	test: 2769.3123306	best: 2769.3123306 (1000)	tota

1000:	learn: 1720.1705017	test: 2757.7055884	best: 2757.7055884 (1000)	total: 5.41s	remaining: 1m 42s
2000:	learn: 1029.5046513	test: 2651.1675450	best: 2651.1675450 (2000)	total: 10.8s	remaining: 1m 37s
3000:	learn: 726.1374303	test: 2616.8237267	best: 2616.7363333 (2999)	total: 16.3s	remaining: 1m 32s
4000:	learn: 522.0749992	test: 2603.7331816	best: 2603.4216156 (3956)	total: 21.7s	remaining: 1m 26s
5000:	learn: 372.4945740	test: 2595.3399291	best: 2595.3366786 (4999)	total: 27.1s	remaining: 1m 21s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2594.651349
bestIteration = 5057

Shrink model to first 5058 iterations.
남은변수개수: 133
Learning rate set to 0.007498
0:	learn: 8618.7914267	test: 6448.0677029	best: 6448.0677029 (0)	total: 16.7ms	remaining: 5m 33s
1000:	learn: 1723.0835623	test: 2801.9875731	best: 2801.9875731 (1000)	total: 5.42s	remaining: 1m 42s
2000:	learn: 1043.8045001	test: 2689.7339539	best: 2689.7339539 (2000)	total: 10.9s	remaining: 1m 38s
3000:	lear

2000:	learn: 1032.0090834	test: 2669.7124627	best: 2669.7124627 (2000)	total: 10.5s	remaining: 1m 34s
3000:	learn: 731.2793936	test: 2631.2839977	best: 2631.2483859 (2999)	total: 15.8s	remaining: 1m 29s
4000:	learn: 514.2346098	test: 2612.5254477	best: 2612.3225079 (3997)	total: 21.1s	remaining: 1m 24s
5000:	learn: 368.7620140	test: 2604.5599217	best: 2604.5599217 (5000)	total: 26.4s	remaining: 1m 19s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2603.844404
bestIteration = 5142

Shrink model to first 5143 iterations.
남은변수개수: 122
Learning rate set to 0.007498
0:	learn: 8620.0901034	test: 6444.7307309	best: 6444.7307309 (0)	total: 22.2ms	remaining: 7m 24s
1000:	learn: 1715.0502052	test: 2792.8515572	best: 2792.8515572 (1000)	total: 5.28s	remaining: 1m 40s
2000:	learn: 1035.9459107	test: 2678.5950026	best: 2678.5950026 (2000)	total: 10.5s	remaining: 1m 34s
3000:	learn: 710.0595240	test: 2652.2102475	best: 2652.2102475 (3000)	total: 15.8s	remaining: 1m 29s
4000:	learn

3000:	learn: 719.9942369	test: 2597.7830662	best: 2597.7497171 (2999)	total: 15.4s	remaining: 1m 27s
4000:	learn: 509.6537282	test: 2576.1976341	best: 2576.1562231 (3990)	total: 20.5s	remaining: 1m 21s
5000:	learn: 361.9620282	test: 2563.9259156	best: 2563.9259156 (5000)	total: 25.6s	remaining: 1m 16s
6000:	learn: 266.5104958	test: 2557.9091359	best: 2557.8715299 (5991)	total: 30.7s	remaining: 1m 11s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2556.504644
bestIteration = 6722

Shrink model to first 6723 iterations.
남은변수개수: 112
Learning rate set to 0.007498
0:	learn: 8619.9331670	test: 6452.9273160	best: 6452.9273160 (0)	total: 11.4ms	remaining: 3m 47s
1000:	learn: 1715.5722060	test: 2772.9706995	best: 2772.5766282 (996)	total: 5.05s	remaining: 1m 35s
2000:	learn: 1026.8766796	test: 2654.6779600	best: 2654.6126913 (1994)	total: 10.1s	remaining: 1m 31s
3000:	learn: 724.8381688	test: 2619.2539563	best: 2619.2539563 (3000)	total: 15.3s	remaining: 1m 26s
4000:	learn: 

1000:	learn: 1723.7477161	test: 2749.1049683	best: 2749.0410680 (999)	total: 4.86s	remaining: 1m 32s
2000:	learn: 1037.6694076	test: 2632.7329958	best: 2632.7329958 (2000)	total: 9.72s	remaining: 1m 27s
3000:	learn: 720.8315197	test: 2598.3448330	best: 2598.3332981 (2998)	total: 14.6s	remaining: 1m 22s
4000:	learn: 495.1018233	test: 2582.7930919	best: 2582.7930919 (4000)	total: 19.5s	remaining: 1m 18s
5000:	learn: 349.8345893	test: 2571.2716916	best: 2571.2716916 (5000)	total: 24.5s	remaining: 1m 13s
6000:	learn: 256.3302914	test: 2566.6736171	best: 2566.6422475 (5904)	total: 29.4s	remaining: 1m 8s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2566.642247
bestIteration = 5904

Shrink model to first 5905 iterations.
남은변수개수: 102
Learning rate set to 0.007498
0:	learn: 8619.7808463	test: 6448.1754051	best: 6448.1754051 (0)	total: 5.6ms	remaining: 1m 52s
1000:	learn: 1716.3091245	test: 2761.0770641	best: 2761.0770641 (1000)	total: 4.85s	remaining: 1m 32s
2000:	learn: 1

1000:	learn: 1720.3916630	test: 2776.8127135	best: 2776.8127135 (1000)	total: 4.54s	remaining: 1m 26s
2000:	learn: 1059.5596243	test: 2658.5730480	best: 2658.5730480 (2000)	total: 8.97s	remaining: 1m 20s
3000:	learn: 750.0445665	test: 2622.7066904	best: 2622.6575664 (2978)	total: 13.4s	remaining: 1m 15s
4000:	learn: 513.1451496	test: 2606.5924998	best: 2606.5305913 (3999)	total: 17.8s	remaining: 1m 11s
5000:	learn: 368.5993036	test: 2596.2410716	best: 2596.1331334 (4990)	total: 22.3s	remaining: 1m 6s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2592.442066
bestIteration = 5542

Shrink model to first 5543 iterations.
남은변수개수: 92
Learning rate set to 0.007498
0:	learn: 8619.6172667	test: 6446.1856270	best: 6446.1856270 (0)	total: 13ms	remaining: 4m 19s
1000:	learn: 1722.9025955	test: 2765.9619285	best: 2765.9619285 (1000)	total: 4.41s	remaining: 1m 23s
2000:	learn: 1042.3364551	test: 2628.9304511	best: 2628.9282689 (1999)	total: 8.87s	remaining: 1m 19s
3000:	learn: 7

1000:	learn: 1730.4809988	test: 2737.7219018	best: 2737.7219018 (1000)	total: 4.04s	remaining: 1m 16s
2000:	learn: 1086.8839177	test: 2623.1835409	best: 2623.1835409 (2000)	total: 8.07s	remaining: 1m 12s
3000:	learn: 775.7292903	test: 2590.3790154	best: 2590.3790154 (3000)	total: 12.1s	remaining: 1m 8s
4000:	learn: 543.3460240	test: 2574.1206391	best: 2574.1206391 (4000)	total: 16.1s	remaining: 1m 4s
5000:	learn: 390.9301463	test: 2564.0752810	best: 2563.9636625 (4944)	total: 20.2s	remaining: 1m
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2562.348441
bestIteration = 5271

Shrink model to first 5272 iterations.
남은변수개수: 81
Learning rate set to 0.007498
0:	learn: 8620.3730738	test: 6445.2500346	best: 6445.2500346 (0)	total: 4.79ms	remaining: 1m 35s
1000:	learn: 1725.2280215	test: 2762.5561970	best: 2762.5561970 (1000)	total: 4.03s	remaining: 1m 16s
2000:	learn: 1052.7715388	test: 2652.4914599	best: 2652.4914599 (2000)	total: 8.06s	remaining: 1m 12s
3000:	learn: 738.

3000:	learn: 726.9810204	test: 2626.1229481	best: 2625.6315549 (2982)	total: 10.7s	remaining: 1m
4000:	learn: 524.4221897	test: 2616.9662204	best: 2616.9562452 (3997)	total: 14.3s	remaining: 57.3s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2612.528277
bestIteration = 4761

Shrink model to first 4762 iterations.
남은변수개수: 68
Learning rate set to 0.007498
0:	learn: 8622.4914207	test: 6449.4181631	best: 6449.4181631 (0)	total: 4.53ms	remaining: 1m 30s
1000:	learn: 1738.6237900	test: 2778.1543826	best: 2778.0579748 (999)	total: 3.58s	remaining: 1m 7s
2000:	learn: 1080.7784009	test: 2662.1518919	best: 2662.1518919 (2000)	total: 7.16s	remaining: 1m 4s
3000:	learn: 745.5518341	test: 2628.1392996	best: 2628.1392996 (3000)	total: 10.8s	remaining: 1m 1s
4000:	learn: 554.5086846	test: 2617.9443394	best: 2617.9292053 (3959)	total: 14.4s	remaining: 57.6s
5000:	learn: 402.8846890	test: 2612.5953346	best: 2612.5290757 (4993)	total: 18s	remaining: 54s
Stopped by overfitting detec

1000:	learn: 1735.1501516	test: 2757.1437090	best: 2757.0099849 (999)	total: 3.11s	remaining: 59.1s
2000:	learn: 1072.0984156	test: 2617.9612946	best: 2617.9612946 (2000)	total: 6.25s	remaining: 56.3s
3000:	learn: 743.6663784	test: 2582.7128186	best: 2582.7128186 (3000)	total: 9.42s	remaining: 53.3s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2574.50469
bestIteration = 3676

Shrink model to first 3677 iterations.
남은변수개수: 54
Learning rate set to 0.007498
0:	learn: 8619.0995025	test: 6444.9121010	best: 6444.9121010 (0)	total: 3.85ms	remaining: 1m 16s
1000:	learn: 1743.7841000	test: 2751.5692084	best: 2751.5692084 (1000)	total: 3.01s	remaining: 57.1s
2000:	learn: 1085.1260144	test: 2608.5967521	best: 2608.5967521 (2000)	total: 6.1s	remaining: 54.9s
3000:	learn: 752.1334490	test: 2582.3746229	best: 2582.3212574 (2951)	total: 9.18s	remaining: 52s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2579.402003
bestIteration = 3148

Shrink model to first 

1000:	learn: 1750.4739442	test: 2709.3675145	best: 2708.8836167 (996)	total: 2.24s	remaining: 42.6s
2000:	learn: 1097.2929165	test: 2607.5640429	best: 2607.5640429 (2000)	total: 4.5s	remaining: 40.5s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2581.510231
bestIteration = 2817

Shrink model to first 2818 iterations.
남은변수개수: 39
Learning rate set to 0.007498
0:	learn: 8620.5677775	test: 6447.9058130	best: 6447.9058130 (0)	total: 2.74ms	remaining: 54.9s
1000:	learn: 1745.5327846	test: 2674.1346280	best: 2674.1346280 (1000)	total: 2.23s	remaining: 42.3s
2000:	learn: 1098.9454504	test: 2576.1966281	best: 2576.1773948 (1999)	total: 4.45s	remaining: 40.1s
3000:	learn: 768.1065569	test: 2555.2090385	best: 2555.2090385 (3000)	total: 6.73s	remaining: 38.1s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2554.68066
bestIteration = 3016

Shrink model to first 3017 iterations.
남은변수개수: 38
Learning rate set to 0.007498
0:	learn: 8620.0489537	test: 6449.0846940

2000:	learn: 1189.2310995	test: 2643.0512179	best: 2641.9009647 (1964)	total: 3.19s	remaining: 28.7s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2641.900965
bestIteration = 1964

Shrink model to first 1965 iterations.
남은변수개수: 22
Learning rate set to 0.007498
0:	learn: 8618.5515429	test: 6443.2596604	best: 6443.2596604 (0)	total: 17ms	remaining: 5m 40s
1000:	learn: 1781.2854503	test: 2699.3013556	best: 2698.9077671 (999)	total: 1.57s	remaining: 29.8s
2000:	learn: 1200.8328768	test: 2617.7284713	best: 2617.0955747 (1976)	total: 3.17s	remaining: 28.5s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2613.207588
bestIteration = 2230

Shrink model to first 2231 iterations.
남은변수개수: 21
Learning rate set to 0.007498
0:	learn: 8618.8199066	test: 6445.7721462	best: 6445.7721462 (0)	total: 2.2ms	remaining: 44s
1000:	learn: 1787.3592758	test: 2672.9126072	best: 2672.9126072 (1000)	total: 1.53s	remaining: 29.1s
Stopped by overfitting detector  (100 iteration

Unnamed: 0,변수명,RFE순위
0,87,145
1,86,144
2,85,143
3,55,142
4,81,141
...,...,...
140,7,5
141,134,4
142,118,3
143,57,2


In [37]:
# RFE 최적변수만큼 할당
optimal_rank = 10
modeling_columns = list(ranking.loc[ranking['RFE순위'].isin([i for i in range(1, optimal_rank+1)]), '변수명'])
print(modeling_columns)
train_x2 = train_x[modeling_columns]

# 훈련
final_models, r2scores = cb_final_model(train_x2, train_y)
round(np.mean(list(r2scores.values())),4)

[59, 64, 140, 96, 101, 7, 134, 118, 57, 144]
Learning rate set to 0.007589
0:	learn: 8554.9873643	test: 6125.7340471	best: 6125.7340471 (0)	total: 1.65ms	remaining: 33s
1000:	learn: 1904.8448560	test: 2769.0729618	best: 2769.0729618 (1000)	total: 994ms	remaining: 18.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2752.487436
bestIteration = 1305

Shrink model to first 1306 iterations.
Learning rate set to 0.007589
0:	learn: 8510.1157526	test: 6389.8642469	best: 6389.8642469 (0)	total: 6.91ms	remaining: 2m 18s
1000:	learn: 1823.9870227	test: 2851.4641462	best: 2851.2277320 (998)	total: 1.01s	remaining: 19.1s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 2810.752039
bestIteration = 1490

Shrink model to first 1491 iterations.
Learning rate set to 0.007589
0:	learn: 7307.0896993	test: 10817.0398543	best: 10817.0398543 (0)	total: 1.58ms	remaining: 31.7s
1000:	learn: 2090.4419608	test: 2961.6026550	best: 2961.6026550 (1000)	total: 986ms	remaining: 

0.8694

#### (3) 투수 nonFA
 - 단순가중 // Feature selection(RFE)

In [38]:
# 데이터 load
train = pd.read_csv('../선수데이터(전처리완료)/모델링용ver8/단순가중/투수_단순가중_nonfa_train.csv', encoding='cp949').drop(['ID','선수명'], axis=1)

# Xy split
train_x, train_y = Xy_split(train)

# 스케일링
train_x = scaler_transformation(dataset = train_x, 
                                scaler_type = 'standardized_scaler')

# RFE 변수 순위 도출
ranking = get_cb_RFE_rank(train_x, train_y)
ranking

남은변수개수: 143
Learning rate set to 0.007431
0:	learn: 8587.8944283	test: 8914.9664910	best: 8914.9664910 (0)	total: 21.7ms	remaining: 7m 13s
1000:	learn: 2433.9402635	test: 4781.1656188	best: 4781.1656188 (1000)	total: 4.39s	remaining: 1m 23s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4726.899024
bestIteration = 1525

Shrink model to first 1526 iterations.
남은변수개수: 142
Learning rate set to 0.007431
0:	learn: 8589.7942075	test: 8912.8590394	best: 8912.8590394 (0)	total: 21.2ms	remaining: 7m 4s
1000:	learn: 2423.4626002	test: 4777.0283285	best: 4775.8495957 (996)	total: 4.39s	remaining: 1m 23s
2000:	learn: 1493.2781946	test: 4715.9126994	best: 4715.8240446 (1996)	total: 8.79s	remaining: 1m 19s
3000:	learn: 1029.8805295	test: 4661.5283629	best: 4661.4592943 (2992)	total: 13.2s	remaining: 1m 14s
4000:	learn: 746.1756591	test: 4641.5796339	best: 4641.3864630 (3970)	total: 17.6s	remaining: 1m 10s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4637.894

1000:	learn: 2448.0829101	test: 4792.6009442	best: 4792.0179065 (998)	total: 4.33s	remaining: 1m 22s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4739.761047
bestIteration = 1353

Shrink model to first 1354 iterations.
남은변수개수: 125
Learning rate set to 0.007431
0:	learn: 8589.3091792	test: 8913.3701391	best: 8913.3701391 (0)	total: 19.5ms	remaining: 6m 30s
1000:	learn: 2424.8260044	test: 4813.2563068	best: 4813.2563068 (1000)	total: 4.27s	remaining: 1m 21s
2000:	learn: 1517.4811552	test: 4731.0793780	best: 4731.0793780 (2000)	total: 8.56s	remaining: 1m 16s
3000:	learn: 1025.5511912	test: 4675.1658978	best: 4675.1246892 (2970)	total: 12.9s	remaining: 1m 12s
4000:	learn: 753.7189714	test: 4658.3095520	best: 4658.2277945 (3991)	total: 17.2s	remaining: 1m 8s
5000:	learn: 560.5945376	test: 4648.6197604	best: 4648.5616319 (4998)	total: 21.5s	remaining: 1m 4s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4642.283097
bestIteration = 5841

Shrink model 

0:	learn: 8589.5650109	test: 8908.4753177	best: 8908.4753177 (0)	total: 17.7ms	remaining: 5m 54s
1000:	learn: 2430.5074935	test: 4787.1818193	best: 4786.8368241 (998)	total: 4.12s	remaining: 1m 18s
2000:	learn: 1528.8817173	test: 4708.1316984	best: 4708.1316984 (2000)	total: 8.2s	remaining: 1m 13s
3000:	learn: 1024.4284157	test: 4657.3156065	best: 4657.3156065 (3000)	total: 12.3s	remaining: 1m 9s
4000:	learn: 740.9432525	test: 4635.0164951	best: 4635.0164951 (4000)	total: 16.4s	remaining: 1m 5s
5000:	learn: 550.8310434	test: 4624.6621906	best: 4624.2708688 (4978)	total: 20.6s	remaining: 1m 1s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4622.080973
bestIteration = 5441

Shrink model to first 5442 iterations.
남은변수개수: 109
Learning rate set to 0.007431
0:	learn: 8592.0996657	test: 8915.5074227	best: 8915.5074227 (0)	total: 4.93ms	remaining: 1m 38s
1000:	learn: 2439.3391142	test: 4828.9422110	best: 4828.9422110 (1000)	total: 3.98s	remaining: 1m 15s
Stopped by overfitt

1000:	learn: 2429.2104583	test: 4797.0950723	best: 4797.0950723 (1000)	total: 3.64s	remaining: 1m 9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4746.913207
bestIteration = 1521

Shrink model to first 1522 iterations.
남은변수개수: 95
Learning rate set to 0.007431
0:	learn: 8594.9737971	test: 8912.7336076	best: 8912.7336076 (0)	total: 8.28ms	remaining: 2m 45s
1000:	learn: 2408.3630915	test: 4740.1473439	best: 4739.0302713 (995)	total: 3.67s	remaining: 1m 9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4698.328742
bestIteration = 1406

Shrink model to first 1407 iterations.
남은변수개수: 94
Learning rate set to 0.007431
0:	learn: 8592.6054708	test: 8911.9187241	best: 8911.9187241 (0)	total: 8.06ms	remaining: 2m 41s
1000:	learn: 2438.2787024	test: 4844.2091668	best: 4844.2091668 (1000)	total: 3.63s	remaining: 1m 8s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4793.028196
bestIteration = 1338

Shrink model to first 1339 iterations.
남은

4000:	learn: 753.7763061	test: 4657.3037396	best: 4656.8346440 (3928)	total: 13.1s	remaining: 52.4s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4651.022527
bestIteration = 4506

Shrink model to first 4507 iterations.
남은변수개수: 81
Learning rate set to 0.007431
0:	learn: 8595.7824785	test: 8912.4994874	best: 8912.4994874 (0)	total: 21.3ms	remaining: 7m 6s
1000:	learn: 2377.3724894	test: 4720.2992675	best: 4720.0278467 (997)	total: 3.27s	remaining: 1m 1s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4685.386001
bestIteration = 1373

Shrink model to first 1374 iterations.
남은변수개수: 80
Learning rate set to 0.007431
0:	learn: 8593.6357074	test: 8918.8453220	best: 8918.8453220 (0)	total: 19.5ms	remaining: 6m 29s
1000:	learn: 2408.6040703	test: 4750.6888168	best: 4750.6888168 (1000)	total: 3.22s	remaining: 1m 1s
2000:	learn: 1493.2024312	test: 4673.1599185	best: 4673.1599185 (2000)	total: 6.44s	remaining: 57.9s
3000:	learn: 1032.4853400	test: 4614.744160

Stopped by overfitting detector  (100 iterations wait)

bestTest = 4605.18409
bestIteration = 5146

Shrink model to first 5147 iterations.
남은변수개수: 69
Learning rate set to 0.007431
0:	learn: 8590.1424273	test: 8915.5772597	best: 8915.5772597 (0)	total: 4.46ms	remaining: 1m 29s
1000:	learn: 2351.5422025	test: 4784.0369391	best: 4784.0369391 (1000)	total: 2.9s	remaining: 55.1s
2000:	learn: 1464.2896331	test: 4691.9327781	best: 4690.9891339 (1972)	total: 5.84s	remaining: 52.5s
3000:	learn: 1012.6171240	test: 4640.1015961	best: 4640.1015961 (3000)	total: 8.76s	remaining: 49.6s
4000:	learn: 721.1916659	test: 4613.5689489	best: 4613.5126846 (3988)	total: 11.7s	remaining: 46.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4608.359441
bestIteration = 4370

Shrink model to first 4371 iterations.
남은변수개수: 68
Learning rate set to 0.007431
0:	learn: 8594.7271342	test: 8919.0487743	best: 8919.0487743 (0)	total: 6.08ms	remaining: 2m 1s
1000:	learn: 2347.2914746	test: 4754.5508347

5000:	learn: 559.6038826	test: 4571.0025959	best: 4570.9781276 (4982)	total: 12.7s	remaining: 38.1s
6000:	learn: 431.7948328	test: 4563.8234788	best: 4563.8234788 (6000)	total: 15.3s	remaining: 35.6s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4560.858395
bestIteration = 6562

Shrink model to first 6563 iterations.
남은변수개수: 56
Learning rate set to 0.007431
0:	learn: 8588.0741215	test: 8910.3167913	best: 8910.3167913 (0)	total: 3.38ms	remaining: 1m 7s
1000:	learn: 2345.6495152	test: 4743.1970156	best: 4742.9765534 (998)	total: 2.45s	remaining: 46.5s
2000:	learn: 1508.6575518	test: 4644.2442896	best: 4644.1243831 (1999)	total: 4.91s	remaining: 44.2s
3000:	learn: 1046.3999116	test: 4591.2390616	best: 4591.1536030 (2999)	total: 7.41s	remaining: 42s
4000:	learn: 758.8066861	test: 4562.9204242	best: 4562.9204242 (4000)	total: 9.9s	remaining: 39.6s
5000:	learn: 561.6345509	test: 4546.1128294	best: 4546.1033156 (4999)	total: 12.4s	remaining: 37.3s
Stopped by overfitting d

1000:	learn: 2367.8830650	test: 4765.9803432	best: 4765.9803432 (1000)	total: 2.04s	remaining: 38.8s
2000:	learn: 1522.8572041	test: 4694.8811921	best: 4694.8811921 (2000)	total: 4.09s	remaining: 36.8s
3000:	learn: 1068.7134696	test: 4638.5170694	best: 4638.5044833 (2999)	total: 6.16s	remaining: 34.9s
4000:	learn: 767.7119644	test: 4608.2648207	best: 4608.2525490 (3997)	total: 8.21s	remaining: 32.8s
5000:	learn: 584.1120564	test: 4591.3071759	best: 4591.2067863 (4995)	total: 10.3s	remaining: 30.8s
6000:	learn: 457.9122239	test: 4583.5900503	best: 4583.5393798 (5997)	total: 12.3s	remaining: 28.8s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4583.276521
bestIteration = 6041

Shrink model to first 6042 iterations.
남은변수개수: 45
Learning rate set to 0.007431
0:	learn: 8593.6290925	test: 8914.1344150	best: 8914.1344150 (0)	total: 7.56ms	remaining: 2m 31s
1000:	learn: 2361.8312438	test: 4751.9956566	best: 4751.6814879 (999)	total: 2.03s	remaining: 38.5s
2000:	learn: 1520.7

5000:	learn: 611.4689974	test: 4567.6917619	best: 4567.6576590 (4999)	total: 8.64s	remaining: 25.9s
6000:	learn: 481.5085879	test: 4555.1033151	best: 4554.9897179 (5992)	total: 10.4s	remaining: 24.2s
7000:	learn: 384.0088227	test: 4548.9542779	best: 4548.9542779 (7000)	total: 12.1s	remaining: 22.5s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4546.854165
bestIteration = 7685

Shrink model to first 7686 iterations.
남은변수개수: 37
Learning rate set to 0.007431
0:	learn: 8590.1040082	test: 8907.9806184	best: 8907.9806184 (0)	total: 2.43ms	remaining: 48.6s
1000:	learn: 2366.1502861	test: 4774.9891912	best: 4773.9851914 (996)	total: 1.74s	remaining: 32.9s
2000:	learn: 1555.9249637	test: 4677.6289234	best: 4677.6289234 (2000)	total: 3.47s	remaining: 31.2s
3000:	learn: 1090.5205408	test: 4634.8906923	best: 4634.8906923 (3000)	total: 5.2s	remaining: 29.5s
4000:	learn: 808.2840020	test: 4600.8448224	best: 4600.6788398 (3987)	total: 6.95s	remaining: 27.8s
5000:	learn: 616.28227

4000:	learn: 811.8080455	test: 4496.0368331	best: 4495.8097755 (3986)	total: 5.94s	remaining: 23.7s
5000:	learn: 624.3372184	test: 4482.2874625	best: 4482.2664791 (4995)	total: 7.46s	remaining: 22.4s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4478.791515
bestIteration = 5575

Shrink model to first 5576 iterations.
남은변수개수: 28
Learning rate set to 0.007431
0:	learn: 8590.5007587	test: 8918.7642214	best: 8918.7642214 (0)	total: 9.33ms	remaining: 3m 6s
1000:	learn: 2366.1551103	test: 4717.4306542	best: 4717.4306542 (1000)	total: 1.43s	remaining: 27.1s
2000:	learn: 1533.3662460	test: 4623.2564729	best: 4623.2564729 (2000)	total: 2.92s	remaining: 26.3s
3000:	learn: 1081.9859327	test: 4576.3954633	best: 4576.3954633 (3000)	total: 4.37s	remaining: 24.7s
4000:	learn: 806.1479887	test: 4551.5256042	best: 4551.3590077 (3970)	total: 5.83s	remaining: 23.3s
5000:	learn: 633.0262293	test: 4537.9198182	best: 4537.9198182 (5000)	total: 7.29s	remaining: 21.9s
Stopped by overfitti

1000:	learn: 2365.7629099	test: 4582.3120920	best: 4582.3120920 (1000)	total: 1.11s	remaining: 21.2s
2000:	learn: 1725.2540250	test: 4435.9998468	best: 4435.8731456 (1992)	total: 2.24s	remaining: 20.2s
3000:	learn: 1337.6096416	test: 4384.5929866	best: 4384.4385074 (2997)	total: 3.37s	remaining: 19.1s
4000:	learn: 1072.8353106	test: 4359.4188446	best: 4359.0482480 (3960)	total: 4.49s	remaining: 18s
5000:	learn: 879.3576848	test: 4348.2241987	best: 4347.7207570 (4926)	total: 5.64s	remaining: 16.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4347.720757
bestIteration = 4926

Shrink model to first 4927 iterations.
남은변수개수: 16
Learning rate set to 0.007431
0:	learn: 8590.3894216	test: 8913.7975724	best: 8913.7975724 (0)	total: 1.75ms	remaining: 35s
1000:	learn: 2344.4969930	test: 4575.5416059	best: 4575.5416059 (1000)	total: 1.12s	remaining: 21.3s
2000:	learn: 1709.2232999	test: 4441.6920279	best: 4441.6920279 (2000)	total: 2.23s	remaining: 20.1s
3000:	learn: 1313.117

Stopped by overfitting detector  (100 iterations wait)

bestTest = 4526.298176
bestIteration = 3802

Shrink model to first 3803 iterations.
남은변수개수: 5
Learning rate set to 0.007431
0:	learn: 8589.9963810	test: 8909.7280105	best: 8909.7280105 (0)	total: 1.46ms	remaining: 29.1s
1000:	learn: 2598.0484012	test: 4929.3999867	best: 4929.3999867 (1000)	total: 921ms	remaining: 17.5s
2000:	learn: 2123.0769418	test: 4838.8062028	best: 4838.8062028 (2000)	total: 1.83s	remaining: 16.5s
3000:	learn: 1823.5397434	test: 4774.0729677	best: 4773.8702717 (2999)	total: 2.76s	remaining: 15.6s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4754.965302
bestIteration = 3834

Shrink model to first 3835 iterations.
남은변수개수: 4
Learning rate set to 0.007431
0:	learn: 8592.5236804	test: 8915.7575699	best: 8915.7575699 (0)	total: 1.44ms	remaining: 28.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 5264.156403
bestIteration = 758

Shrink model to first 759 iterations.
남은변수개수:

Unnamed: 0,변수명,RFE순위
0,141,143
1,81,142
2,80,141
3,64,140
4,59,139
...,...,...
138,58,5
139,139,4
140,0,3
141,138,2


In [39]:
# RFE 최적변수만큼 할당
optimal_rank = 10
modeling_columns = list(ranking.loc[ranking['RFE순위'].isin([i for i in range(1, optimal_rank+1)]), '변수명'])
print(modeling_columns)
train_x2 = train_x[modeling_columns]

# 훈련
final_models, r2scores = cb_final_model(train_x2, train_y)
round(np.mean(list(r2scores.values())),4)

[14, 68, 60, 87, 65, 58, 139, 0, 138, 142]
Learning rate set to 0.007522
0:	learn: 8598.6996538	test: 8952.3038353	best: 8952.3038353 (0)	total: 1.55ms	remaining: 31.1s
1000:	learn: 2433.3308726	test: 3616.7465184	best: 3616.6502537 (999)	total: 976ms	remaining: 18.5s
2000:	learn: 1849.7734115	test: 3424.4667928	best: 3423.8269290 (1998)	total: 1.93s	remaining: 17.4s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 3387.442465
bestIteration = 2514

Shrink model to first 2515 iterations.
Learning rate set to 0.007522
0:	learn: 8585.3035747	test: 8993.1114917	best: 8993.1114917 (0)	total: 1.49ms	remaining: 29.9s
1000:	learn: 2299.7004406	test: 4343.0656144	best: 4343.0656144 (1000)	total: 979ms	remaining: 18.6s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 4253.98811
bestIteration = 1899

Shrink model to first 1900 iterations.
Learning rate set to 0.007522
0:	learn: 9134.7491342	test: 6477.4887916	best: 6477.4887916 (0)	total: 1.57ms	remaining: 31.5

0.7807