# 패키지 불러오기

In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import xgboost as xgb

from catboost import Pool, CatBoostRegressor
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

n_splits = 5

# 모델 정의
 1. Random Forest
 2. Lightgbm
 3. Xgboost
 4. Catboost
 5. Knn
 6. Linear regression

In [2]:
def Xy_split(dataset):
    
    X = dataset.drop('연봉', axis=1)
    column_dict = {f'variable{idx+1}':col for idx, col in enumerate(X.columns)}
    X = X.rename(columns = {col:f'variable{idx+1}' for idx, col in enumerate(X.columns)})
    y = dataset['연봉']
    
    return column_dict, X, y

In [3]:
def rf_model(data_x, data_y):

    # rf 모델링
    performance = []
    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        rf_model = RandomForestRegressor(random_state = 42)
        rf_model.fit(X_train, y_train)

        pred_valid = rf_model.predict(X_valid)
        rmse = mean_squared_error(y_valid, pred_valid, squared=False)
        performance.append(rmse)
        
    # rf 성능종합
    performance.append(np.mean(performance))

    output = pd.DataFrame({'rf':performance}, index=['cv1','cv2','cv3','cv4','cv5','평균'])
    return output

In [4]:
def lgbm_model(data_x, data_y):

    # lgbm 모델링
    lgb_params = {"objective" : "rmse",
                 "verbosity" : -1}

    lgb_models={}
    step = 1

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        print(f'\n\n ============================ {step} ============================')    

        X_train = data_x.iloc[tr_idx, :].values
        y_train = data_y[tr_idx].values

        X_valid = data_x.iloc[val_idx, :].values
        y_valid = data_y[val_idx].values

        lgb_dtrain = lgb.Dataset(data = X_train, label = y_train) 
        lgb_dvalid = lgb.Dataset(data = X_valid, label = y_valid) 

        lgb_model = lgb.train(lgb_params, lgb_dtrain, 20000, valid_sets=[lgb_dvalid], early_stopping_rounds=100, verbose_eval=1000)
        lgb_models[step] = lgb_model

        step += 1
        
    # lgbm 성능종합
    performance = [lgb_models[step].best_score['valid_0']['rmse'] for step in range(1,6)]
    performance.append(np.mean(performance))

    output = pd.DataFrame({'lgbm':performance}, index=['cv1','cv2','cv3','cv4','cv5','평균'])
    return output

In [5]:
def xgb_model(data_x, data_y):
    
    # xgb 모델링
    xgb_final_param = {
          "objective" : 'reg:squarederror',
          "random_state" : 42,
          "verbosity" : 0
          }

    xgb_models={}
    step = 1

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :].values
        y_train = data_y[tr_idx].values

        X_valid = data_x.iloc[val_idx, :].values
        y_valid = data_y[val_idx].values

        xgb_dtrain = xgb.DMatrix(data = X_train, label = y_train) 
        xgb_dvalid = xgb.DMatrix(data = X_valid, label = y_valid) 

        xgb_model = xgb.train(params = xgb_final_param, dtrain = xgb_dtrain, num_boost_round = 20000, early_stopping_rounds = 100, verbose_eval = 1000, evals=[(xgb_dtrain, 'train'), (xgb_dvalid,'eval')])
        xgb_models[step] = xgb_model

        step += 1
        
    # xgb 성능종합
    performance = [xgb_models[step].best_score for step in range(1,6)]
    performance.append(np.mean(performance))

    output = pd.DataFrame({'xgb':performance}, index=['cv1','cv2','cv3','cv4','cv5','평균'])
    return output

In [6]:
def cb_model(data_x, data_y):

    # cb 모델링
    cat_cols = []

    cb_models={}
    step = 1

    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        cb_dtrain = Pool(data=X_train, label=y_train, cat_features=cat_cols)
        cb_dvalid = Pool(data=X_valid, label=y_valid, cat_features=cat_cols)

        cb_model = CatBoostRegressor(iterations=20000, eval_metric='RMSE', loss_function='RMSE', verbose = 0)

        cb_model.fit(cb_dtrain, eval_set=cb_dvalid, early_stopping_rounds=100, verbose_eval=1000, use_best_model=True)
        cb_models[step] = cb_model

        step += 1
        
    # cb 성능종합
    performance = [cb_models[step].best_score_['validation']['RMSE'] for step in range(1,6)]
    performance.append(np.mean(performance))

    output = pd.DataFrame({'cb':performance}, index=['cv1','cv2','cv3','cv4','cv5','평균'])
    return output

In [7]:
def knn_model(data_x, data_y):

    # knn 모델링
    performance = []
    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        knn_model = KNeighborsRegressor()
        knn_model.fit(X_train, y_train)

        pred_valid = knn_model.predict(X_valid)
        rmse = mean_squared_error(y_valid, pred_valid, squared=False)
        performance.append(rmse)
        
    # knn 성능종합
    performance.append(np.mean(performance))

    output = pd.DataFrame({'knn':performance}, index=['cv1','cv2','cv3','cv4','cv5','평균'])
    return output

In [8]:
def regression_model(data_x, data_y):

    # regression 모델링
    performance = []
    cv = KFold(n_splits = n_splits, shuffle = True, random_state=42)

    for tr_idx, val_idx in cv.split(data_x):  

        X_train = data_x.iloc[tr_idx, :]
        y_train = data_y[tr_idx]

        X_valid = data_x.iloc[val_idx, :]
        y_valid = data_y[val_idx]

        regression_model = LinearRegression()
        regression_model.fit(X_train, y_train)

        pred_valid = regression_model.predict(X_valid)
        rmse = mean_squared_error(y_valid, pred_valid, squared=False)
        performance.append(rmse)
        
    # regression 성능종합
    performance.append(np.mean(performance))

    output = pd.DataFrame({'regression':performance}, index=['cv1','cv2','cv3','cv4','cv5','평균'])
    return output

# train, test분할 후 모델링

In [9]:
hitter = pd.read_csv('../선수데이터(전처리완료)/모델링용ver1/타자(모델링용).csv')
pitcher = pd.read_csv('../선수데이터(전처리완료)/모델링용ver1/투수(모델링용).csv')

In [10]:
hitter_score1 = pd.read_csv('../변수중요도데이터/RFE_타자.csv')
hitter_score2 = pd.read_csv('../변수중요도데이터/filter_타자.csv')
hitter_score = pd.concat([hitter_score1, hitter_score2[['fscore순위', 'mutual_info순위']]], axis=1)

pitcher_score1 = pd.read_csv('../변수중요도데이터/RFE_투수.csv')
pitcher_score2 = pd.read_csv('../변수중요도데이터/filter_투수.csv')
pitcher_score = pd.concat([pitcher_score1, pitcher_score2[['fscore순위', 'mutual_info순위']]], axis=1)
pitcher_score

Unnamed: 0,변수명,실제변수명,rf순위,lgbm순위,xgb순위,cb순위,regression순위,fscore순위,mutual_info순위
0,variable1,데뷔년도,15,3,4,12,32,36.0,3.0
1,variable2,연도,11,19,9,10,31,47.0,5.0
2,variable3,평균자책점,30,29,2,19,64,59.0,48.0
3,variable4,경기,37,39,10,37,27,39.0,10.0
4,variable5,승리,68,44,39,52,45,6.0,15.0
...,...,...,...,...,...,...,...,...,...
75,variable76,팀명_롯데,46,59,60,40,18,73.0,74.0
76,variable77,팀명_삼성,42,71,70,45,17,57.0,75.0
77,variable78,팀명_우리/히어로즈/넥센/키움,78,79,69,75,13,67.0,67.0
78,variable79,팀명_한화,53,64,62,50,60,76.0,64.0


#### 1. 타자

In [11]:
# 데이터프레임 초기화
hitter_fscore = pd.DataFrame()

# train, test 분할
col_dict, X, y = Xy_split(hitter)

# 변수개수별 성능파악
for n_features in range(0, len(col_dict)+1, 5):
    
    if n_features == 0:
        continue
    
    ### 변수목록 선정
    features = list(hitter_score.loc[hitter_score['fscore순위'].isin([i for i in range(1, n_features+1)]), '변수명'])

    # 모델링 후 데이터프레임 생성
    X_tmp = X[features]
    tmp = pd.concat([rf_model(X_tmp, y).rename(columns={'rf':f'rf{n_features}'}), 
                  lgbm_model(X_tmp, y).rename(columns={'lgbm':f'lgbm{n_features}'}),
                  xgb_model(X_tmp, y).rename(columns={'xgb':f'xgb{n_features}'}),
                  cb_model(X_tmp, y).rename(columns={'cb':f'cb{n_features}'}),
                  knn_model(X_tmp, y).rename(columns={'knn':f'knn{n_features}'}),
                  regression_model(X_tmp, y).rename(columns={'regression':f'regression{n_features}'})], axis=1)
    
    hitter_fscore = pd.concat([hitter_fscore, tmp], axis=1)



Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[40]	valid_0's rmse: 18718.3


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[18]	valid_0's rmse: 14828


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[97]	valid_0's rmse: 17008.7


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[42]	valid_0's rmse: 17721.7


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[21]	valid_0's rmse: 19588
[0]	train-rmse:27682.33594	eval-rmse:30439.45508
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[11]	train-rmse:9004.83301	eval-rmse:19012.75195

[0]	train-rmse:29058.82422	eval-rmse:24552.20898
Multiple eval metrics have been passed: 'eval-

Early stopping, best iteration is:
[203]	valid_0's rmse: 18762.1
[0]	train-rmse:27611.22266	eval-rmse:30783.54883
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[10]	train-rmse:7705.31494	eval-rmse:19231.68945

[0]	train-rmse:28910.21094	eval-rmse:24539.82031
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[14]	train-rmse:7426.54688	eval-rmse:15483.23242

[0]	train-rmse:27997.53320	eval-rmse:28204.55859
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[7]	train-rmse:9268.52832	eval-rmse:17708.87500

[0]	train-rmse:28080.71289	eval-rmse:29025.40430
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopp

Stopping. Best iteration:
[10]	train-rmse:6775.36865	eval-rmse:16826.67383

[0]	train-rmse:28670.40234	eval-rmse:23961.64844
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[9]	train-rmse:7408.14990	eval-rmse:13902.88672

[0]	train-rmse:27843.46484	eval-rmse:28187.60742
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[8]	train-rmse:7179.73389	eval-rmse:15549.32227

[0]	train-rmse:27681.60547	eval-rmse:29759.74805
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[18]	train-rmse:4854.25830	eval-rmse:15230.90234

[0]	train-rmse:27105.89453	eval-rmse:31107.49805
Multiple eval metrics have been passed: 'eval-rmse' will be used for e

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[22]	train-rmse:4028.32812	eval-rmse:16625.96094

[0]	train-rmse:28702.12305	eval-rmse:24034.42383
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[11]	train-rmse:6050.20312	eval-rmse:12319.03418

[0]	train-rmse:27797.55664	eval-rmse:28144.36523
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[7]	train-rmse:7742.41846	eval-rmse:15939.64648

[0]	train-rmse:27692.81641	eval-rmse:29765.54297
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[18]	train-rmse:4739.03418	eval-rmse:15334.72363

[0]	train-rmse:27060.39648	eval-rmse:31204.33008
Multiple ev

[1000]	valid_0's rmse: 10620
Early stopping, best iteration is:
[1286]	valid_0's rmse: 10616.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[357]	valid_0's rmse: 14706.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[34]	valid_0's rmse: 15357.9


Training until validation scores don't improve for 100 rounds
[1000]	valid_0's rmse: 14666.8
[2000]	valid_0's rmse: 14658.4
Early stopping, best iteration is:
[2335]	valid_0's rmse: 14658
[0]	train-rmse:27132.45117	eval-rmse:30063.53320
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[116]	train-rmse:414.60873	eval-rmse:15357.56152

[0]	train-rmse:28457.08984	eval-rmse:23666.64258
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 roun

Stopped by overfitting detector  (100 iterations wait)

bestTest = 10911.72968
bestIteration = 3740

Shrink model to first 3741 iterations.
Learning rate set to 0.008942
0:	learn: 33034.4306861	test: 32601.9475125	best: 32601.9475125 (0)	total: 5.44ms	remaining: 1m 48s
1000:	learn: 8637.1000719	test: 14206.2344423	best: 14206.2344423 (1000)	total: 6.2s	remaining: 1m 57s
2000:	learn: 5645.3496735	test: 13574.6076287	best: 13574.6076287 (2000)	total: 12.1s	remaining: 1m 49s
3000:	learn: 4011.2140616	test: 13388.0446352	best: 13388.0446352 (3000)	total: 17.7s	remaining: 1m 40s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13353.72151
bestIteration = 3223

Shrink model to first 3224 iterations.
Learning rate set to 0.008942
0:	learn: 32876.6129587	test: 33258.3836202	best: 33258.3836202 (0)	total: 5.33ms	remaining: 1m 46s
1000:	learn: 8171.5945141	test: 14781.3821897	best: 14781.1768234 (997)	total: 7.77s	remaining: 2m 27s
2000:	learn: 5545.6849650	test: 14513.0053201	

[0]	train-rmse:28453.91992	eval-rmse:23906.81445
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[105]	train-rmse:564.78918	eval-rmse:11304.96973

[0]	train-rmse:27545.58398	eval-rmse:27476.15625
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[13]	train-rmse:4131.83008	eval-rmse:14104.52832

[0]	train-rmse:27435.97461	eval-rmse:29598.56445
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[23]	train-rmse:3284.10791	eval-rmse:14907.64648

[0]	train-rmse:26944.20703	eval-rmse:31302.85156
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.


4000:	learn: 3119.6931931	test: 13039.2140395	best: 13039.1016384 (3998)	total: 24.7s	remaining: 1m 38s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13004.10633
bestIteration = 4695

Shrink model to first 4696 iterations.
Learning rate set to 0.008942
0:	learn: 32889.8910150	test: 33264.2226413	best: 33264.2226413 (0)	total: 5.74ms	remaining: 1m 54s
1000:	learn: 8097.5277319	test: 14579.2134528	best: 14578.5741013 (998)	total: 5.75s	remaining: 1m 49s
2000:	learn: 5523.4987694	test: 14246.2194535	best: 14246.2073148 (1999)	total: 11.3s	remaining: 1m 41s
3000:	learn: 4082.1922353	test: 14094.7968551	best: 14094.7968551 (3000)	total: 16.9s	remaining: 1m 35s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14065.5345
bestIteration = 3217

Shrink model to first 3218 iterations.
Learning rate set to 0.008942
0:	learn: 32090.7304479	test: 36228.6462566	best: 36228.6462566 (0)	total: 8.2ms	remaining: 2m 43s
1000:	learn: 8504.7674348	test: 15562.5159229	b

Stopping. Best iteration:
[44]	train-rmse:1646.20068	eval-rmse:14203.32227

[0]	train-rmse:27435.97461	eval-rmse:29598.56445
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[167]	train-rmse:210.74344	eval-rmse:14685.56250

[0]	train-rmse:26923.01367	eval-rmse:31265.57031
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[25]	train-rmse:2812.61645	eval-rmse:14593.20019

Learning rate set to 0.008942
0:	learn: 32570.9605899	test: 34507.7670092	best: 34507.7670092 (0)	total: 4.91ms	remaining: 1m 38s
1000:	learn: 8752.6200474	test: 14999.1606871	best: 14999.1606871 (1000)	total: 5.6s	remaining: 1m 46s
2000:	learn: 6016.0321630	test: 14396.2275137	best: 14396.0432231 (1998)	total: 11.2s	remaining: 1m 40s
3000:	learn: 4384.0133637	test: 14193.8355801	bes

4000:	learn: 3074.5137065	test: 13632.3025481	best: 13632.3025481 (4000)	total: 25.2s	remaining: 1m 40s
5000:	learn: 2462.7570916	test: 13574.5012649	best: 13574.2388890 (4998)	total: 31.4s	remaining: 1m 34s
6000:	learn: 1996.8916799	test: 13525.4767155	best: 13525.3893729 (5999)	total: 37.6s	remaining: 1m 27s
7000:	learn: 1647.8019193	test: 13495.6208153	best: 13495.5091064 (6982)	total: 43.9s	remaining: 1m 21s
8000:	learn: 1378.8203872	test: 13478.2036420	best: 13477.5532401 (7954)	total: 50.9s	remaining: 1m 16s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13477.55324
bestIteration = 7954

Shrink model to first 7955 iterations.
Learning rate set to 0.008942
0:	learn: 32092.0473681	test: 36222.5147428	best: 36222.5147428 (0)	total: 7.14ms	remaining: 2m 22s
1000:	learn: 8354.9481579	test: 15272.0740879	best: 15270.4116949 (995)	total: 6.92s	remaining: 2m 11s
2000:	learn: 5535.2153348	test: 14665.2668870	best: 14664.3835151 (1993)	total: 13.2s	remaining: 1m 59s
300

In [12]:
# 데이터프레임 초기화
hitter_mutual_info = pd.DataFrame()

# train, test 분할
col_dict, X, y = Xy_split(hitter)

# 변수개수별 성능파악
for n_features in range(0, len(col_dict)+1, 5):
    
    if n_features == 0:
        continue
    
    ### 변수목록 선정
    features = list(hitter_score.loc[hitter_score['mutual_info순위'].isin([i for i in range(1, n_features+1)]), '변수명'])

    # 모델링 후 데이터프레임 생성
    X_tmp = X[features]
    tmp = pd.concat([rf_model(X_tmp, y).rename(columns={'rf':f'rf{n_features}'}), 
                  lgbm_model(X_tmp, y).rename(columns={'lgbm':f'lgbm{n_features}'}),
                  xgb_model(X_tmp, y).rename(columns={'xgb':f'xgb{n_features}'}),
                  cb_model(X_tmp, y).rename(columns={'cb':f'cb{n_features}'}),
                  knn_model(X_tmp, y).rename(columns={'knn':f'knn{n_features}'}),
                  regression_model(X_tmp, y).rename(columns={'regression':f'regression{n_features}'})], axis=1)
    
    hitter_mutual_info = pd.concat([hitter_mutual_info, tmp], axis=1)



Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[210]	valid_0's rmse: 15794.4


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[30]	valid_0's rmse: 12993.2


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[25]	valid_0's rmse: 17132


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[72]	valid_0's rmse: 14991.4


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[36]	valid_0's rmse: 17488.9
[0]	train-rmse:27600.57227	eval-rmse:30053.10156
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[15]	train-rmse:6669.93262	eval-rmse:14296.98731

[0]	train-rmse:28765.30078	eval-rmse:23367.40234
Multiple eval metrics have been passed: 'ev

1000:	learn: 9751.5134688	test: 14630.8191642	best: 14628.3257898 (956)	total: 4.96s	remaining: 1m 34s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14528.94287
bestIteration = 1395

Shrink model to first 1396 iterations.
Learning rate set to 0.008942
0:	learn: 32083.0045469	test: 36209.8558889	best: 36209.8558889 (0)	total: 3.77ms	remaining: 1m 15s
1000:	learn: 10151.5137974	test: 14232.1939736	best: 14231.1565018 (999)	total: 3.75s	remaining: 1m 11s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13921.43661
bestIteration = 1565

Shrink model to first 1566 iterations.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[52]	valid_0's rmse: 16875.8


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[31]	valid_0's rmse: 12932.1


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[77]	valid_0's rmse: 15853.1

1000:	learn: 9764.0725712	test: 14467.8317341	best: 14467.8317341 (1000)	total: 3.4s	remaining: 1m 4s
2000:	learn: 7168.2720681	test: 13991.4353940	best: 13989.5604851 (1989)	total: 6.52s	remaining: 58.7s
3000:	learn: 5581.9710076	test: 13880.4954451	best: 13879.7706522 (2929)	total: 9.65s	remaining: 54.7s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13879.77065
bestIteration = 2929

Shrink model to first 2930 iterations.
Learning rate set to 0.008942
0:	learn: 32865.7806225	test: 33247.1670674	best: 33247.1670674 (0)	total: 3.68ms	remaining: 1m 13s
1000:	learn: 9558.7587389	test: 15339.6852317	best: 15338.8246963 (999)	total: 4.1s	remaining: 1m 17s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 15274.7849
bestIteration = 1217

Shrink model to first 1218 iterations.
Learning rate set to 0.008942
0:	learn: 32083.5440926	test: 36237.8051291	best: 36237.8051291 (0)	total: 3.96ms	remaining: 1m 19s
1000:	learn: 9801.3937373	test: 15620.2083571	best:

1000:	learn: 9640.1637689	test: 15495.4173927	best: 15495.4173927 (1000)	total: 3.8s	remaining: 1m 12s
2000:	learn: 6827.3504008	test: 14889.8681069	best: 14889.6693124 (1999)	total: 7.5s	remaining: 1m 7s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14721.471
bestIteration = 2581

Shrink model to first 2582 iterations.
Learning rate set to 0.008942
0:	learn: 34160.1262425	test: 27584.4643222	best: 27584.4643222 (0)	total: 4.19ms	remaining: 1m 23s
1000:	learn: 10121.3180375	test: 12329.0557755	best: 12328.3694120 (999)	total: 3.98s	remaining: 1m 15s
2000:	learn: 7150.7006086	test: 11898.7269959	best: 11898.4502692 (1977)	total: 7.69s	remaining: 1m 9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11884.38061
bestIteration = 2059

Shrink model to first 2060 iterations.
Learning rate set to 0.008942
0:	learn: 33040.1289238	test: 32610.0070478	best: 32610.0070478 (0)	total: 3.99ms	remaining: 1m 19s
1000:	learn: 9402.7961319	test: 14414.4493214	best

[0]	train-rmse:27587.86719	eval-rmse:27235.95508
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[6]	train-rmse:7982.71338	eval-rmse:14879.65039

[0]	train-rmse:27524.79492	eval-rmse:29380.70312
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[262]	train-rmse:58.61169	eval-rmse:14991.30078

[0]	train-rmse:27048.10742	eval-rmse:31448.21680
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[17]	train-rmse:3771.98389	eval-rmse:15578.15723

Learning rate set to 0.008942
0:	learn: 32566.8980296	test: 34478.1991309	best: 34478.1991309 (0)	total: 13.9ms	remaining: 4m 37s
1000:	learn: 9459.7702928	test: 15840.3078383	best: 15840.3078383

2000:	learn: 6564.7724025	test: 15178.7014789	best: 15177.6610067 (1998)	total: 10.6s	remaining: 1m 35s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 15029.05103
bestIteration = 2739

Shrink model to first 2740 iterations.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[111]	valid_0's rmse: 16695.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[192]	valid_0's rmse: 11570.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[471]	valid_0's rmse: 14732


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[31]	valid_0's rmse: 15520.8


Training until validation scores don't improve for 100 rounds
[1000]	valid_0's rmse: 15541.9
[2000]	valid_0's rmse: 15530.7
[3000]	valid_0's rmse: 15529.3
Early stopping, best iteration is:
[3564]	valid_0's rmse: 15529.2
[0]	train-rmse:27196

4000:	learn: 3480.4380269	test: 14722.7363081	best: 14721.5827092 (3993)	total: 21.5s	remaining: 1m 26s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14715.33732
bestIteration = 4038

Shrink model to first 4039 iterations.
Learning rate set to 0.008942
0:	learn: 34158.4061850	test: 27582.1479975	best: 27582.1479975 (0)	total: 5.85ms	remaining: 1m 56s
1000:	learn: 9492.8004364	test: 11968.6042998	best: 11968.6042998 (1000)	total: 5.8s	remaining: 1m 50s
2000:	learn: 6314.2125176	test: 11402.1500087	best: 11402.1500087 (2000)	total: 11.7s	remaining: 1m 45s
3000:	learn: 4603.1635613	test: 11202.1856829	best: 11202.1856829 (3000)	total: 17.4s	remaining: 1m 38s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11161.70229
bestIteration = 3364

Shrink model to first 3365 iterations.
Learning rate set to 0.008942
0:	learn: 33030.7855362	test: 32612.4300313	best: 32612.4300313 (0)	total: 6.09ms	remaining: 2m 1s
1000:	learn: 9095.5008698	test: 14595.0380843	



Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[161]	valid_0's rmse: 15562.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[183]	valid_0's rmse: 11106.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[603]	valid_0's rmse: 13909.1


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[35]	valid_0's rmse: 14878.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[826]	valid_0's rmse: 15151.2
[0]	train-rmse:27116.97266	eval-rmse:30039.31641
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[42]	train-rmse:1760.45227	eval-rmse:14827.63769

[0]	train-rmse:28448.32812	eval-rmse:23985.80273
Multiple eval metrics have been passed

3000:	learn: 4206.9847561	test: 14079.5366341	best: 14078.4164673 (2997)	total: 15.8s	remaining: 1m 29s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14043.16223
bestIteration = 3268

Shrink model to first 3269 iterations.
Learning rate set to 0.008942
0:	learn: 34176.5160084	test: 27600.1531002	best: 27600.1531002 (0)	total: 5.54ms	remaining: 1m 50s
1000:	learn: 8832.4914614	test: 11839.0944636	best: 11835.6965165 (995)	total: 5.96s	remaining: 1m 53s
2000:	learn: 5751.3130180	test: 11237.1502115	best: 11237.1296589 (1998)	total: 11.6s	remaining: 1m 44s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11170.58981
bestIteration = 2557

Shrink model to first 2558 iterations.
Learning rate set to 0.008942
0:	learn: 33057.0830147	test: 32639.3073168	best: 32639.3073168 (0)	total: 5.72ms	remaining: 1m 54s
1000:	learn: 8470.1082866	test: 13654.2247146	best: 13653.8172223 (998)	total: 6.1s	remaining: 1m 55s
2000:	learn: 5551.2845620	test: 12948.8049876	b

2000:	learn: 5644.5506539	test: 14621.8523302	best: 14620.2640065 (1996)	total: 12.2s	remaining: 1m 49s
3000:	learn: 4146.1010305	test: 14378.4546906	best: 14376.8702323 (2992)	total: 18.3s	remaining: 1m 43s
4000:	learn: 3092.8524060	test: 14304.2072090	best: 14304.1931443 (3998)	total: 24.5s	remaining: 1m 38s
5000:	learn: 2368.2484559	test: 14257.9655367	best: 14257.9655367 (5000)	total: 30.9s	remaining: 1m 32s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14246.07029
bestIteration = 5386

Shrink model to first 5387 iterations.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[159]	valid_0's rmse: 15166.5


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[181]	valid_0's rmse: 11672.8


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[549]	valid_0's rmse: 13870.2


Training until validation scores don't improve for 100 

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[55]	train-rmse:1461.91467	eval-rmse:13464.80176

Learning rate set to 0.008942
0:	learn: 32585.1885039	test: 34516.7998474	best: 34516.7998474 (0)	total: 6.01ms	remaining: 2m
1000:	learn: 8618.6322410	test: 14939.0509989	best: 14939.0509989 (1000)	total: 5.15s	remaining: 1m 37s
2000:	learn: 5729.0006922	test: 14277.1865377	best: 14277.1865377 (2000)	total: 10.6s	remaining: 1m 35s
3000:	learn: 4166.5551861	test: 14094.2673214	best: 14093.3181133 (2989)	total: 16.5s	remaining: 1m 33s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 14086.72229
bestIteration = 3186

Shrink model to first 3187 iterations.
Learning rate set to 0.008942
0:	learn: 34180.5600606	test: 27603.4899104	best: 27603.4899104 (0)	total: 6.18ms	remaining: 2m 3s
1000:	learn: 8773.4499589	test: 11834.2514931	best: 11834.2514931 (1000)	total: 6.18s	remaining: 1m 57s
2000:	learn: 5561.6193418	test: 11183.8627760	best: 111

#### 2. 투수

In [13]:
# 데이터프레임 초기화
pitcher_fscore = pd.DataFrame()

# train, test 분할
col_dict, X, y = Xy_split(pitcher)

# 변수개수별 성능파악
for n_features in range(0, len(col_dict)+1, 5):
    
    if n_features == 0:
        continue
    
    ### 변수목록 선정
    features = list(pitcher_score.loc[pitcher_score['fscore순위'].isin([i for i in range(1, n_features+1)]), '변수명'])

    # 모델링 후 데이터프레임 생성
    X_tmp = X[features]
    tmp = pd.concat([rf_model(X_tmp, y).rename(columns={'rf':f'rf{n_features}'}), 
                  lgbm_model(X_tmp, y).rename(columns={'lgbm':f'lgbm{n_features}'}),
                  xgb_model(X_tmp, y).rename(columns={'xgb':f'xgb{n_features}'}),
                  cb_model(X_tmp, y).rename(columns={'cb':f'cb{n_features}'}),
                  knn_model(X_tmp, y).rename(columns={'knn':f'knn{n_features}'}),
                  regression_model(X_tmp, y).rename(columns={'regression':f'regression{n_features}'})], axis=1)
    
    pitcher_fscore = pd.concat([pitcher_fscore, tmp], axis=1)



Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[30]	valid_0's rmse: 13853.7


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[85]	valid_0's rmse: 13857.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[66]	valid_0's rmse: 15137.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[50]	valid_0's rmse: 14094.8


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[24]	valid_0's rmse: 12824.3
[0]	train-rmse:20275.16211	eval-rmse:23433.47461
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[14]	train-rmse:5750.51660	eval-rmse:14286.00977

[0]	train-rmse:20307.83984	eval-rmse:24936.39258
Multiple eval metrics have been passed: 'e

Early stopping, best iteration is:
[109]	valid_0's rmse: 12666.4


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[124]	valid_0's rmse: 14950.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[65]	valid_0's rmse: 12991


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[32]	valid_0's rmse: 12716.5
[0]	train-rmse:20253.81641	eval-rmse:22875.79688
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[6]	train-rmse:6770.05469	eval-rmse:14815.89941

[0]	train-rmse:20188.74609	eval-rmse:25153.30859
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[45]	train-rmse:2262.15576	eval-rmse:14249.99707

[0]	train-rmse:

Early stopping, best iteration is:
[307]	valid_0's rmse: 14885.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[41]	valid_0's rmse: 13014.2


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[31]	valid_0's rmse: 12339.2
[0]	train-rmse:20195.26758	eval-rmse:24583.82227
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[12]	train-rmse:4287.60889	eval-rmse:17981.58789

[0]	train-rmse:20131.44141	eval-rmse:25537.37695
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[28]	train-rmse:3110.05347	eval-rmse:14272.56641

[0]	train-rmse:20622.78320	eval-rmse:21908.99609
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will t

Early stopping, best iteration is:
[107]	valid_0's rmse: 12084.3


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[271]	valid_0's rmse: 13513.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[25]	valid_0's rmse: 13289


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[99]	valid_0's rmse: 12091.8
[0]	train-rmse:20030.54883	eval-rmse:24459.59570
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[19]	train-rmse:3441.82300	eval-rmse:18231.65625

[0]	train-rmse:19842.04102	eval-rmse:25158.60156
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[30]	train-rmse:2705.89356	eval-rmse:13941.47168

[0]	train-rmse



Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 14385.8


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[131]	valid_0's rmse: 12113


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[467]	valid_0's rmse: 12600.7


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[35]	valid_0's rmse: 12463.8


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[191]	valid_0's rmse: 11068.9
[0]	train-rmse:19807.91211	eval-rmse:22404.45898
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[16]	train-rmse:3054.02393	eval-rmse:13590.86133

[0]	train-rmse:19712.84766	eval-rmse:24728.51953
Multiple eval metrics have been passed: '

1000:	learn: 6979.6029849	test: 12358.1913002	best: 12358.1913002 (1000)	total: 4.23s	remaining: 1m 20s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12194.54681
bestIteration = 1842

Shrink model to first 1843 iterations.
Learning rate set to 0.008636
0:	learn: 24891.4007713	test: 20558.4068117	best: 20558.4068117 (0)	total: 4.24ms	remaining: 1m 24s
1000:	learn: 6979.5641976	test: 10319.6943368	best: 10319.6943368 (1000)	total: 4.27s	remaining: 1m 21s
2000:	learn: 4920.5177819	test: 10058.6424170	best: 10058.6424170 (2000)	total: 8.19s	remaining: 1m 13s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 9935.990863
bestIteration = 2882

Shrink model to first 2883 iterations.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[48]	valid_0's rmse: 14173.8


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[90]	valid_0's rmse: 11847.8


Training until valid

2000:	learn: 5053.2269509	test: 12929.6725504	best: 12929.6499037 (1999)	total: 9.43s	remaining: 1m 24s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12820.19283
bestIteration = 2596

Shrink model to first 2597 iterations.
Learning rate set to 0.008636
0:	learn: 24060.2846357	test: 24215.6753230	best: 24215.6753230 (0)	total: 4.76ms	remaining: 1m 35s
1000:	learn: 7391.2336468	test: 13433.0119211	best: 13433.0119211 (1000)	total: 5.1s	remaining: 1m 36s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13289.79138
bestIteration = 1451

Shrink model to first 1452 iterations.
Learning rate set to 0.008636
0:	learn: 24500.2836301	test: 22286.7590201	best: 22286.7590201 (0)	total: 5.3ms	remaining: 1m 45s
1000:	learn: 7055.6366654	test: 13289.5038133	best: 13285.3441404 (996)	total: 5.02s	remaining: 1m 35s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 13151.20106
bestIteration = 1294

Shrink model to first 1295 iterations.
Learning ra

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[23]	train-rmse:2451.69409	eval-rmse:11903.44922

Learning rate set to 0.008635
0:	learn: 23614.3069245	test: 25880.4565780	best: 25880.4565780 (0)	total: 4.73ms	remaining: 1m 34s
1000:	learn: 6527.8769842	test: 12554.1148463	best: 12554.1148463 (1000)	total: 4.34s	remaining: 1m 22s
2000:	learn: 4176.1254276	test: 12242.5841592	best: 12238.8324150 (1978)	total: 8.68s	remaining: 1m 18s
3000:	learn: 2983.1502360	test: 12101.7523835	best: 12101.6624972 (2998)	total: 13.4s	remaining: 1m 16s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12067.21257
bestIteration = 3847

Shrink model to first 3848 iterations.
Learning rate set to 0.008636
0:	learn: 23301.1617660	test: 27011.4945770	best: 27011.4945770 (0)	total: 5ms	remaining: 1m 40s
1000:	learn: 6884.8192406	test: 13170.7404841	best: 13170.7404841 (1000)	total: 5.21s	remaining: 1m 38s
2000:	learn: 4667.6753655	test: 12501.7571098	best: 1

Early stopping, best iteration is:
[105]	valid_0's rmse: 11810.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[885]	valid_0's rmse: 11801


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[178]	valid_0's rmse: 12771.2


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[173]	valid_0's rmse: 10599.4
[0]	train-rmse:19843.97461	eval-rmse:22357.60156
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[23]	train-rmse:2182.38501	eval-rmse:12472.86719

[0]	train-rmse:19650.34766	eval-rmse:24073.76562
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[53]	train-rmse:1321.89575	eval-rmse:13154.27637

[0]	train-rm

In [14]:
# 데이터프레임 초기화
pitcher_mutual_info = pd.DataFrame()

# train, test 분할
col_dict, X, y = Xy_split(pitcher)

# 변수개수별 성능파악
for n_features in range(0, len(col_dict)+1, 5):
    
    if n_features == 0:
        continue
    
    ### 변수목록 선정
    features = list(pitcher_score.loc[pitcher_score['mutual_info순위'].isin([i for i in range(1, n_features+1)]), '변수명'])

    # 모델링 후 데이터프레임 생성
    X_tmp = X[features]
    tmp = pd.concat([rf_model(X_tmp, y).rename(columns={'rf':f'rf{n_features}'}), 
                  lgbm_model(X_tmp, y).rename(columns={'lgbm':f'lgbm{n_features}'}),
                  xgb_model(X_tmp, y).rename(columns={'xgb':f'xgb{n_features}'}),
                  cb_model(X_tmp, y).rename(columns={'cb':f'cb{n_features}'}),
                  knn_model(X_tmp, y).rename(columns={'knn':f'knn{n_features}'}),
                  regression_model(X_tmp, y).rename(columns={'regression':f'regression{n_features}'})], axis=1)
    
    pitcher_mutual_info = pd.concat([pitcher_mutual_info, tmp], axis=1)



Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[567]	valid_0's rmse: 13151.1


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[30]	valid_0's rmse: 14828.1


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[151]	valid_0's rmse: 13743.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[158]	valid_0's rmse: 12413.2


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[207]	valid_0's rmse: 10965.8
[0]	train-rmse:20258.26562	eval-rmse:23553.86328
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[186]	train-rmse:650.10754	eval-rmse:13104.29102

[0]	train-rmse:20155.48242	eval-rmse:24417.89844
Multiple eval metrics have been passed

2000:	learn: 5572.1408443	test: 11820.8917286	best: 11818.6205690 (1987)	total: 4.3s	remaining: 38.7s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11811.59724
bestIteration = 2037

Shrink model to first 2038 iterations.
Learning rate set to 0.008636
0:	learn: 24055.3702356	test: 24161.4301350	best: 24161.4301350 (0)	total: 2.24ms	remaining: 44.9s
1000:	learn: 7616.1202732	test: 12812.0505791	best: 12812.0505791 (1000)	total: 2.38s	remaining: 45.2s
2000:	learn: 5484.9870803	test: 12203.5893265	best: 12203.5893265 (2000)	total: 4.43s	remaining: 39.9s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12203.58933
bestIteration = 2000

Shrink model to first 2001 iterations.
Learning rate set to 0.008636
0:	learn: 24505.0070661	test: 22272.1920953	best: 22272.1920953 (0)	total: 2.38ms	remaining: 47.6s
1000:	learn: 7429.8623574	test: 12344.6003849	best: 12344.6003849 (1000)	total: 2.39s	remaining: 45.3s
Stopped by overfitting detector  (100 iterations wa

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[219]	train-rmse:100.53456	eval-rmse:11306.63086

Learning rate set to 0.008635
0:	learn: 23610.7841567	test: 25880.1271887	best: 25880.1271887 (0)	total: 2.84ms	remaining: 56.7s
1000:	learn: 7479.3680262	test: 13403.0314810	best: 13402.8024350 (997)	total: 2.72s	remaining: 51.7s
2000:	learn: 5229.8977852	test: 13071.3937686	best: 13071.1352559 (1999)	total: 5.59s	remaining: 50.3s
3000:	learn: 3935.0142210	test: 12884.6700547	best: 12884.6700547 (3000)	total: 8.42s	remaining: 47.7s
4000:	learn: 3203.4795429	test: 12748.7032746	best: 12748.4066668 (3999)	total: 11.3s	remaining: 45.2s
5000:	learn: 2602.6878239	test: 12639.4700368	best: 12639.1474656 (4998)	total: 14.2s	remaining: 42.4s
6000:	learn: 2164.8953942	test: 12584.9161652	best: 12584.6115381 (5996)	total: 17.1s	remaining: 39.8s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12576.44897
bestIteration = 6120

Shrink model to fir

1000:	learn: 7525.6147036	test: 11662.7033626	best: 11662.7009554 (999)	total: 3.26s	remaining: 1m 1s
2000:	learn: 5361.5719212	test: 11352.7872010	best: 11352.5191277 (1997)	total: 6.17s	remaining: 55.5s
3000:	learn: 3822.5509564	test: 11102.3318878	best: 11102.0256260 (2996)	total: 9.11s	remaining: 51.6s
4000:	learn: 2938.2926091	test: 10975.6667236	best: 10975.4753525 (3999)	total: 12.1s	remaining: 48.3s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 10964.80475
bestIteration = 4131

Shrink model to first 4132 iterations.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[504]	valid_0's rmse: 13635.6


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[140]	valid_0's rmse: 12166.4


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[340]	valid_0's rmse: 12397


Training until validation scores don't improve for 100 rounds


1000:	learn: 7617.6180032	test: 13077.6530609	best: 13077.4843737 (998)	total: 4.07s	remaining: 1m 17s
2000:	learn: 5425.4949245	test: 12401.3610384	best: 12401.3610384 (2000)	total: 7.72s	remaining: 1m 9s
3000:	learn: 3870.2512660	test: 12112.8334756	best: 12112.6680405 (2999)	total: 11.6s	remaining: 1m 5s
4000:	learn: 3022.8292373	test: 11999.0882384	best: 11998.9922079 (3999)	total: 15.4s	remaining: 1m 1s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11981.1603
bestIteration = 4293

Shrink model to first 4294 iterations.
Learning rate set to 0.008636
0:	learn: 24055.6720618	test: 24157.8073464	best: 24157.8073464 (0)	total: 4.21ms	remaining: 1m 24s
1000:	learn: 7526.3199812	test: 13634.8152374	best: 13634.8152374 (1000)	total: 4.24s	remaining: 1m 20s
2000:	learn: 5434.9971793	test: 13207.5861330	best: 13207.5861330 (2000)	total: 8.11s	remaining: 1m 12s
3000:	learn: 3891.3466994	test: 13047.9955828	best: 13047.4372799 (2997)	total: 12.1s	remaining: 1m 8s
Stopped 

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[20]	train-rmse:2400.25659	eval-rmse:12514.47754

[0]	train-rmse:19610.28125	eval-rmse:24599.24609
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[30]	train-rmse:2041.58643	eval-rmse:15039.34863

[0]	train-rmse:20349.57812	eval-rmse:22040.23047
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[26]	train-rmse:2325.59863	eval-rmse:12511.99902

[0]	train-rmse:20597.58008	eval-rmse:20878.72070
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[6]	train-rmse:5922.53906	eval-rmse:15109.42285

[0]	train-rmse:20845.93164	eval-rmse:18979.32227
Multiple ev

1000:	learn: 7037.8520844	test: 11536.2264602	best: 11536.2264602 (1000)	total: 5.23s	remaining: 1m 39s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11473.55741
bestIteration = 1308

Shrink model to first 1309 iterations.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[195]	valid_0's rmse: 14329.4


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[110]	valid_0's rmse: 12316.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[477]	valid_0's rmse: 12496.9


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[430]	valid_0's rmse: 13638.5


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[184]	valid_0's rmse: 11235
[0]	train-rmse:19895.98242	eval-rmse:22354.90820
Multiple eval metrics have been passed: 'eval-rmse' will be used

3000:	learn: 3333.6017918	test: 12298.6635233	best: 12295.7860400 (2985)	total: 17.5s	remaining: 1m 39s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12295.78604
bestIteration = 2985

Shrink model to first 2986 iterations.
Learning rate set to 0.008636
0:	learn: 24068.2682791	test: 24175.0937672	best: 24175.0937672 (0)	total: 7.26ms	remaining: 2m 25s
1000:	learn: 6978.7282390	test: 12916.9441548	best: 12916.9441548 (1000)	total: 6.58s	remaining: 2m 4s
2000:	learn: 5036.1707846	test: 12510.5769490	best: 12510.5769490 (2000)	total: 12.6s	remaining: 1m 52s
3000:	learn: 3601.9561128	test: 12272.8244333	best: 12271.8152083 (2987)	total: 18.9s	remaining: 1m 46s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 12203.06396
bestIteration = 3334

Shrink model to first 3335 iterations.
Learning rate set to 0.008636
0:	learn: 24516.9780775	test: 22279.9702920	best: 22279.9702920 (0)	total: 5.59ms	remaining: 1m 51s
1000:	learn: 6705.6709907	test: 12922.5337456

Early stopping, best iteration is:
[206]	valid_0's rmse: 10881.8
[0]	train-rmse:19845.89844	eval-rmse:22378.34180
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[38]	train-rmse:1345.67090	eval-rmse:12345.56152

[0]	train-rmse:19703.69727	eval-rmse:24074.15625
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[63]	train-rmse:910.00220	eval-rmse:12016.71973

[0]	train-rmse:20414.95117	eval-rmse:21898.33398
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 100 rounds.
Stopping. Best iteration:
[11]	train-rmse:3676.84009	eval-rmse:12486.96777

[0]	train-rmse:20690.16016	eval-rmse:21163.26367
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopp

1000:	learn: 6620.5411240	test: 12659.0111984	best: 12659.0111984 (1000)	total: 6.07s	remaining: 1m 55s
2000:	learn: 4542.4598355	test: 12194.0477884	best: 12193.2803658 (1998)	total: 12.2s	remaining: 1m 49s
3000:	learn: 3341.4919984	test: 12011.1423858	best: 12011.1423858 (3000)	total: 18.8s	remaining: 1m 46s
4000:	learn: 2597.7253228	test: 11934.9777782	best: 11933.9421171 (3996)	total: 25.2s	remaining: 1m 40s
5000:	learn: 2075.3734055	test: 11868.9830118	best: 11868.9604596 (4999)	total: 32s	remaining: 1m 36s
Stopped by overfitting detector  (100 iterations wait)

bestTest = 11852.05179
bestIteration = 5456

Shrink model to first 5457 iterations.
Learning rate set to 0.008636
0:	learn: 24887.7699333	test: 20571.4556697	best: 20571.4556697 (0)	total: 6.01ms	remaining: 2m
1000:	learn: 6558.0397248	test: 10800.0618483	best: 10800.0618483 (1000)	total: 7.75s	remaining: 2m 27s
2000:	learn: 4358.7050800	test: 10433.7690776	best: 10432.6451255 (1996)	total: 14.9s	remaining: 2m 13s
3000:	le

# 체크

In [45]:
def checking_hitter(data, model_name):

    data2 = data[[f'{model_name}{i}' for i in range(5, 86, 5)]]
    data2.index = [model_name]
    data2.columns = [f'{i}개' for i in range(5, 86, 5)]
    
    return data2

def checking_pitcher(data, model_name):

    data2 = data[[f'{model_name}{i}' for i in range(5, 81, 5)]]
    data2.index = [model_name]
    data2.columns = [f'{i}개' for i in range(5, 81, 5)]
    
    return data2

#### 1. f score

In [43]:
hitter_fscore2 = pd.DataFrame(hitter_fscore.loc['평균']).transpose()

hitter_fscore2_rf = checking_hitter(hitter_fscore2, 'rf')
hitter_fscore2_lgbm = checking_hitter(hitter_fscore2, 'lgbm')
hitter_fscore2_xgb = checking_hitter(hitter_fscore2, 'xgb')
hitter_fscore2_cb = checking_hitter(hitter_fscore2, 'cb')
hitter_fscore2_knn = checking_hitter(hitter_fscore2, 'knn')
hitter_fscore2_regression = checking_hitter(hitter_fscore2, 'regression')

pd.concat([hitter_fscore2_rf,
          hitter_fscore2_lgbm,
          hitter_fscore2_xgb,
          hitter_fscore2_cb,
          hitter_fscore2_knn,
          hitter_fscore2_regression]).transpose()

Unnamed: 0,rf,lgbm,xgb,cb,knn,regression
5개,17504.443466,17572.908469,17523.007813,16792.186713,22729.939301,19744.975837
10개,17384.146584,17516.841164,17373.697266,16498.246991,22211.356897,19695.015799
15개,17256.713008,17458.964254,17795.922656,16534.807437,21646.499918,19674.126566
20개,16342.094729,16508.444214,16064.032227,15835.718819,21780.241481,19626.691185
25개,15862.850234,15951.537868,15754.557031,15207.942964,21715.468591,18580.153585
30개,15719.415478,15593.264999,15597.358398,14940.133895,21593.349221,18564.962772
35개,15711.516651,15565.42926,15457.283985,14866.738968,21924.033075,18572.448875
40개,15886.670888,15732.816678,15779.917969,14966.948011,21905.91718,18589.769417
45개,14584.438314,14261.385804,14022.515039,13398.279509,21871.255391,18529.15001
50개,14753.558237,14399.954235,14233.694336,13513.445483,21937.493563,18542.159901


In [46]:
pitcher_fscore2 = pd.DataFrame(pitcher_fscore.loc['평균']).transpose()

pitcher_fscore2_rf = checking_pitcher(pitcher_fscore2, 'rf')
pitcher_fscore2_lgbm = checking_pitcher(pitcher_fscore2, 'lgbm')
pitcher_fscore2_xgb = checking_pitcher(pitcher_fscore2, 'xgb')
pitcher_fscore2_cb = checking_pitcher(pitcher_fscore2, 'cb')
pitcher_fscore2_knn = checking_pitcher(pitcher_fscore2, 'knn')
pitcher_fscore2_regression = checking_pitcher(pitcher_fscore2, 'regression')

pd.concat([pitcher_fscore2_rf,
          pitcher_fscore2_lgbm,
          pitcher_fscore2_xgb,
          pitcher_fscore2_cb,
          pitcher_fscore2_knn,
          pitcher_fscore2_regression]).transpose()

Unnamed: 0,rf,lgbm,xgb,cb,knn,regression
5개,14200.773814,13953.652358,14247.026758,13209.005549,18582.647833,16662.689687
10개,13939.150218,13805.609981,14571.135352,13295.251268,17042.646868,16640.130027
15개,13923.47014,13656.879787,13792.245898,13270.39236,16210.670576,16539.882279
20개,13842.172851,13780.254225,15084.318164,13192.574866,16363.314061,16534.323791
25개,13534.878823,13555.887292,14530.212305,13129.778835,16326.201579,16545.739711
30개,13678.31797,13585.027325,14869.838672,13148.320258,16154.388261,16556.956625
35개,13765.28138,13205.007102,14899.442774,12904.336198,16194.723671,16521.589467
40개,13640.222566,12575.445107,14373.441992,12482.826625,16096.178223,16538.390085
45개,13191.274131,12526.413378,13335.406055,12275.047161,16051.464888,16573.105078
50개,13203.912155,12333.403512,13731.703516,12062.36517,16049.217392,16589.530388


#### 2. Mutual information

In [47]:
hitter_mutual_info2 = pd.DataFrame(hitter_mutual_info.loc['평균']).transpose()

hitter_mutual_info2_rf = checking_hitter(hitter_mutual_info2, 'rf')
hitter_mutual_info2_lgbm = checking_hitter(hitter_mutual_info2, 'lgbm')
hitter_mutual_info2_xgb = checking_hitter(hitter_mutual_info2, 'xgb')
hitter_mutual_info2_cb = checking_hitter(hitter_mutual_info2, 'cb')
hitter_mutual_info2_knn = checking_hitter(hitter_mutual_info2, 'knn')
hitter_mutual_info2_regression = checking_hitter(hitter_mutual_info2, 'regression')

pd.concat([hitter_mutual_info2_rf,
          hitter_mutual_info2_lgbm,
          hitter_mutual_info2_xgb,
          hitter_mutual_info2_cb,
          hitter_mutual_info2_knn,
          hitter_mutual_info2_regression]).transpose()

Unnamed: 0,rf,lgbm,xgb,cb,knn,regression
5개,14127.943981,15679.957807,14001.944336,13862.149463,23577.809607,19805.489389
10개,14430.389486,15412.435099,14063.790039,13731.989715,21637.517561,19744.612356
15개,14765.978685,15612.088656,14277.377148,14325.790119,21681.569233,19429.979153
20개,14953.502659,15663.522141,14717.484571,14554.115115,21634.075464,19449.079873
25개,14560.25089,15045.451019,14326.545508,14014.330058,21584.503264,19478.657447
30개,14660.309153,14867.674411,14164.676953,13907.436131,21596.823469,19486.065786
35개,14896.956575,15018.075318,14319.364063,14129.109801,21884.762445,19495.323817
40개,14762.131068,14854.681961,14639.901563,14045.097952,21863.272347,19453.775309
45개,15004.250514,15078.539261,14285.086133,14041.611912,21861.648926,19242.92809
50개,14917.846715,14809.701882,14153.92168,13799.60831,21861.058478,19215.767304


In [48]:
pitcher_mutual_info2 = pd.DataFrame(pitcher_mutual_info.loc['평균']).transpose()

pitcher_mutual_info2_rf = checking_pitcher(pitcher_mutual_info2, 'rf')
pitcher_mutual_info2_lgbm = checking_pitcher(pitcher_mutual_info2, 'lgbm')
pitcher_mutual_info2_xgb = checking_pitcher(pitcher_mutual_info2, 'xgb')
pitcher_mutual_info2_cb = checking_pitcher(pitcher_mutual_info2, 'cb')
pitcher_mutual_info2_knn = checking_pitcher(pitcher_mutual_info2, 'knn')
pitcher_mutual_info2_regression = checking_pitcher(pitcher_mutual_info2, 'regression')

pd.concat([pitcher_mutual_info2_rf,
          pitcher_mutual_info2_lgbm,
          pitcher_mutual_info2_xgb,
          pitcher_mutual_info2_cb,
          pitcher_mutual_info2_knn,
          pitcher_mutual_info2_regression]).transpose()

Unnamed: 0,rf,lgbm,xgb,cb,knn,regression
5개,12827.014204,13020.414969,12947.384375,11612.683271,18464.750746,17392.125659
10개,12417.424017,12605.752339,11762.779688,11881.605165,17078.094982,17417.17858
15개,12868.331323,12680.827645,12155.100195,12246.201442,17030.677572,17374.550683
20개,12613.044462,12580.893984,12691.732617,12060.213748,16189.588957,17291.66797
25개,12818.749644,12742.491232,12809.437695,11993.965582,16124.110698,17312.034888
30개,13102.969994,12475.312815,12873.611133,12255.053437,16028.404512,17279.748185
35개,13132.36168,13083.12564,13482.025,12416.649935,16032.706948,17252.737025
40개,13344.404636,12684.949984,12962.042187,12351.787456,16036.054096,17281.067544
45개,13437.880321,12728.130734,13274.270508,12562.009122,16038.412796,17316.755316
50개,13539.778845,12532.089748,13318.447461,12560.77777,16047.574195,17351.003734
