In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action = 'ignore')
%matplotlib inline

# 데이터 분할
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold

# 모델
from xgboost import XGBClassifier 
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# 파라미터 최적화
from bayes_opt import BayesianOptimization

# 평가지표
from sklearn.metrics import log_loss

In [2]:
target = pd.read_csv('cust_train.csv', encoding = 'UTF-8')
label = target.LABEL

# pd w2v bayesian opt

In [18]:
train = pd.read_csv('pd_w2v_train.csv', encoding = 'UTF-8')
test = pd.read_csv('pd_w2v_test.csv', encoding = 'UTF-8')

X_train, X_val, y_train, y_val = train_test_split(train, label, test_size = 0.3, random_state = 516, stratify = label)

In [19]:
pbounds = {'n_estimators' : (10, 350),
           'learning_rate' : (0.03, 1.3),
           'max_depth' : (3, 10),
           'num_leaves' : (10, 200),
           'min_child_samples' : (20, 100),
           'min_child_weight' : (1, 10),
           'subsample' : (0.75, 0.95),
           'colsample_bytree' : (0.75, 0.95)}


def lgbm_opt(n_estimators, learning_rate, max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree):
    
    params = {'n_estimators' : int(round(n_estimators)),
              'learning_rate': learning_rate,
              'max_depth' : int(round(max_depth)),
              'num_leaves' : int(round(num_leaves)),
              'min_child_samples' : int(round(min_child_samples)),
              'min_child_weight' : min_child_weight,
              'subsample': subsample,
              'colsample_bytree' : colsample_bytree,
              'n_jobs' : -1}
    
    lgbm = LGBMClassifier(**params)
    
    skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
    
    score = cross_val_score(lgbm, X_train, y_train, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)
    
    return np.mean(score)

BO_lgbm = BayesianOptimization(f = lgbm_opt, pbounds = pbounds, random_state = 516)

BO_lgbm.maximize(init_points=40, n_iter=40)

max_params = BO_lgbm.max['params']
max_params['n_estimators'] = int(round(max_params['n_estimators']))
max_params['max_depth'] = int(round(max_params['max_depth']))
max_params['num_leaves'] = int(round(max_params['num_leaves']))
max_params['min_child_samples'] = int(round(max_params['min_child_samples']))
display(max_params)

# Step9. 최대화 하이퍼파라미터로 재학습
lgbm_tun = LGBMClassifier(**max_params)
lgbm_tun.fit(X_train, y_train)

skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
scores = cross_val_score(lgbm_tun, X_val, y_val, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)

print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | min_ch... | n_esti... | num_le... | subsample |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-1.696   [0m | [0m 0.9123  [0m | [0m 0.9297  [0m | [0m 3.781   [0m | [0m 65.34   [0m | [0m 8.135   [0m | [0m 294.8   [0m | [0m 113.0   [0m | [0m 0.789   [0m |
| [0m 2       [0m | [0m-5.896   [0m | [0m 0.7878  [0m | [0m 1.103   [0m | [0m 5.745   [0m | [0m 80.65   [0m | [0m 2.584   [0m | [0m 212.1   [0m | [0m 62.69   [0m | [0m 0.8719  [0m |
| [0m 3       [0m | [0m-2.458   [0m | [0m 0.8565  [0m | [0m 0.9781  [0m | [0m 6.776   [0m | [0m 20.55   [0m | [0m 5.861   [0m | [0m 240.8   [0m | [0m 67.81   [0m | [0m 0.8451  [0m |
| [0m 4       [0m | [0m-1.955   [0m | [0m 0.8397  [0m | [0m 0.5528  [0m | [0m 9.905   [0m | [0m 29.23   [0m | [0m 9.431   [0m | [0m 198

| [95m 41      [0m | [95m-1.298   [0m | [95m 0.75    [0m | [95m 0.03    [0m | [95m 6.156   [0m | [95m 59.37   [0m | [95m 10.0    [0m | [95m 313.4   [0m | [95m 94.79   [0m | [95m 0.8206  [0m |
| [0m 42      [0m | [0m-1.354   [0m | [0m 0.8655  [0m | [0m 0.3229  [0m | [0m 6.947   [0m | [0m 49.47   [0m | [0m 2.985   [0m | [0m 35.99   [0m | [0m 125.6   [0m | [0m 0.7527  [0m |
| [0m 43      [0m | [0m-1.443   [0m | [0m 0.8166  [0m | [0m 0.2888  [0m | [0m 5.91    [0m | [0m 24.86   [0m | [0m 2.176   [0m | [0m 230.8   [0m | [0m 151.2   [0m | [0m 0.8495  [0m |
| [0m 44      [0m | [0m-1.494   [0m | [0m 0.8348  [0m | [0m 0.6267  [0m | [0m 5.001   [0m | [0m 57.77   [0m | [0m 1.0     [0m | [0m 106.5   [0m | [0m 105.6   [0m | [0m 0.8654  [0m |
| [0m 45      [0m | [0m-1.471   [0m | [0m 0.8177  [0m | [0m 0.5701  [0m | [0m 4.173   [0m | [0m 64.35   [0m | [0m 9.235   [0m | [0m 323.9   [0m | [0m 131.7   [0m | 

{'colsample_bytree': 0.75,
 'learning_rate': 0.03,
 'max_depth': 6,
 'min_child_samples': 59,
 'min_child_weight': 10.0,
 'n_estimators': 313,
 'num_leaves': 95,
 'subsample': 0.8206383495380686}

[-1.3059556  -1.30588738 -1.30921729 -1.30877038]
최대성능: -1.3058873815455645
평균성능: -1.3074576653509726


In [None]:
pd_w2v_pred = pd.DataFrame(lgbm_tun.predict_proba(test))
clnt_te = pd.DataFrame({'CLNT_ID':[i for i in range(263104,375864)]})
pd_w2v_pred = pd.concat([clnt_te, pd_w2v_pred],axis=1)
pd_w2v_pred.columns = ['CLNT_ID','F20','F30','F40','M20','M30','M40']
pd_w2v_pred.to_csv('pd_w2v_pred.csv',index=False)

# kwd w2v bayesian opt

In [8]:
train = pd.read_csv('kwd_w2v_train.csv', encoding = 'UTF-8')
test = pd.read_csv('kwd_w2v_test.csv', encoding = 'UTF-8')
train = train.iloc[:,1:]
test = test.iloc[:,1:]

X_train, X_val, y_train, y_val = train_test_split(train, label, test_size = 0.3, random_state = 516, stratify = label)

In [9]:
pbounds = {'n_estimators' : (50, 500),
           'learning_rate' : (0.01, 1.0),
           'max_depth' : (2, 10),
           'num_leaves' : (10, 200),
           'min_child_samples' : (20, 100),
           'min_child_weight' : (1, 15),
           'subsample' : (0.75, 0.95),
           'colsample_bytree' : (0.75, 0.95)}


def lgbm_opt(n_estimators, learning_rate, max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree):
    
    params = {'n_estimators' : int(round(n_estimators)),
              'learning_rate': learning_rate,
              'max_depth' : int(round(max_depth)),
              'num_leaves' : int(round(num_leaves)),
              'min_child_samples' : int(round(min_child_samples)),
              'min_child_weight' : min_child_weight,
              'subsample': subsample,
              'colsample_bytree' : colsample_bytree,
              'n_jobs' : -1}
    
    lgbm = LGBMClassifier(**params)
    
    skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
    
    score = cross_val_score(lgbm, X_train, y_train, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)
    
    return np.mean(score)

BO_lgbm = BayesianOptimization(f = lgbm_opt, pbounds = pbounds, random_state = 516)

BO_lgbm.maximize(init_points=40, n_iter=40)

max_params = BO_lgbm.max['params']
max_params['n_estimators'] = int(round(max_params['n_estimators']))
max_params['max_depth'] = int(round(max_params['max_depth']))
max_params['num_leaves'] = int(round(max_params['num_leaves']))
max_params['min_child_samples'] = int(round(max_params['min_child_samples']))
display(max_params)

# Step9. 최대화 하이퍼파라미터로 재학습
lgbm_tun = LGBMClassifier(**max_params)
lgbm_tun.fit(X_train, y_train)

skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
scores = cross_val_score(lgbm_tun, X_val, y_val, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)

print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | min_ch... | n_esti... | num_le... | subsample |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-1.364   [0m | [0m 0.9123  [0m | [0m 0.7113  [0m | [0m 2.892   [0m | [0m 65.34   [0m | [0m 12.1    [0m | [0m 426.9   [0m | [0m 113.0   [0m | [0m 0.789   [0m |
| [0m 2       [0m | [0m-1.734   [0m | [0m 0.7878  [0m | [0m 0.8463  [0m | [0m 5.137   [0m | [0m 80.65   [0m | [0m 3.463   [0m | [0m 317.5   [0m | [0m 62.69   [0m | [0m 0.8719  [0m |
| [0m 3       [0m | [0m-1.87    [0m | [0m 0.8565  [0m | [0m 0.7491  [0m | [0m 6.316   [0m | [0m 20.55   [0m | [0m 8.562   [0m | [0m 355.5   [0m | [0m 67.81   [0m | [0m 0.8451  [0m |
| [0m 4       [0m | [0m-1.739   [0m | [0m 0.8397  [0m | [0m 0.4175  [0m | [0m 9.892   [0m | [0m 29.23   [0m | [0m 14.12   [0m | [0m 299

| [0m 41      [0m | [0m-1.524   [0m | [0m 0.7672  [0m | [0m 0.7231  [0m | [0m 6.951   [0m | [0m 57.32   [0m | [0m 12.06   [0m | [0m 413.1   [0m | [0m 12.69   [0m | [0m 0.7878  [0m |
| [0m 42      [0m | [0m-1.275   [0m | [0m 0.7606  [0m | [0m 0.4312  [0m | [0m 4.009   [0m | [0m 60.52   [0m | [0m 9.797   [0m | [0m 191.5   [0m | [0m 10.68   [0m | [0m 0.934   [0m |
| [0m 43      [0m | [0m-1.267   [0m | [0m 0.95    [0m | [0m 0.01    [0m | [0m 7.308   [0m | [0m 78.67   [0m | [0m 1.0     [0m | [0m 163.3   [0m | [0m 177.5   [0m | [0m 0.75    [0m |
| [0m 44      [0m | [0m-2.026   [0m | [0m 0.8561  [0m | [0m 0.8537  [0m | [0m 7.369   [0m | [0m 56.04   [0m | [0m 1.051   [0m | [0m 159.2   [0m | [0m 184.4   [0m | [0m 0.9148  [0m |
| [0m 45      [0m | [0m-1.263   [0m | [0m 0.95    [0m | [0m 0.01    [0m | [0m 7.529   [0m | [0m 76.45   [0m | [0m 1.0     [0m | [0m 163.4   [0m | [0m 156.0   [0m | [0m 0.75 

{'colsample_bytree': 0.8432589523901574,
 'learning_rate': 0.051055588083552005,
 'max_depth': 4,
 'min_child_samples': 51,
 'min_child_weight': 12.505756351754851,
 'n_estimators': 428,
 'num_leaves': 11,
 'subsample': 0.9335077173704511}

[-1.23775659 -1.23450612 -1.23108711 -1.23705596]
최대성능: -1.2310871116283506
평균성능: -1.2351014472670574


In [10]:
kwd_w2v_pred = pd.DataFrame(lgbm_tun.predict_proba(test))
clnt_te = pd.DataFrame({'CLNT_ID':[i for i in range(263104,375864)]})
kwd_w2v_pred = pd.concat([clnt_te, kwd_w2v_pred],axis=1)
kwd_w2v_pred.columns = ['CLNT_ID','F20','F30','F40','M20','M30','M40']
kwd_w2v_pred.to_csv('kwd2_w2v_pred.csv',index=False)

# brand w2v bayesian opt

In [None]:
train = pd.read_csv('brand_w2v_train.csv', encoding = 'UTF-8')
test = pd.read_csv('brand_w2v_test.csv', encoding = 'UTF-8')

X_train, X_val, y_train, y_val = train_test_split(train, label, test_size = 0.3, random_state = 516, stratify = label)

In [None]:
pbounds = {'n_estimators' : (10, 350),
           'learning_rate' : (0.03, 1.3),
           'max_depth' : (3, 10),
           'num_leaves' : (10, 200),
           'min_child_samples' : (20, 100),
           'min_child_weight' : (1, 10),
           'subsample' : (0.75, 0.95),
           'colsample_bytree' : (0.75, 0.95)}


def lgbm_opt(n_estimators, learning_rate, max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree):
    
    
    params = {'n_estimators' : int(round(n_estimators)),
              'learning_rate': learning_rate,
              'max_depth' : int(round(max_depth)),
              'num_leaves' : int(round(num_leaves)),
              'min_child_samples' : int(round(min_child_samples)),
              'min_child_weight' : min_child_weight,
              'subsample': subsample,
              'colsample_bytree' : colsample_bytree,
              'n_jobs' : -1}
    
    lgbm = LGBMClassifier(**params)
    
    skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
    
    score = cross_val_score(lgbm, X_train, y_train, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)
    
    return np.mean(score)

BO_lgbm = BayesianOptimization(f = lgbm_opt, pbounds = pbounds, random_state = 516)

BO_lgbm.maximize(init_points=40, n_iter=40)

max_params = BO_lgbm.max['params']
max_params['n_estimators'] = int(round(max_params['n_estimators']))
max_params['max_depth'] = int(round(max_params['max_depth']))
max_params['num_leaves'] = int(round(max_params['num_leaves']))
max_params['min_child_samples'] = int(round(max_params['min_child_samples']))
display(max_params)

# Step9. 최대화 하이퍼파라미터로 재학습
lgbm_tun = LGBMClassifier(**max_params)
lgbm_tun.fit(X_train, y_train)

skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
scores = cross_val_score(lgbm_tun, X_val, y_val, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)

print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

In [None]:
w2v_pred = pd.DataFrame(lgbm_clf.predict_proba(test))
clnt_te = pd.DataFrame({'CLNT_ID':[i for i in range(263104,375864)]})
w2v_pred = pd.concat([clnt_te, w2v_pred],axis=1)
w2v_pred.columns = ['CLNT_ID','F20','F30','F40','M20','M30','M40']
w2v_pred.to_csv('w2v_pred.csv',index=False)

# clac1 w2v bayesian opt

In [None]:
train = pd.read_csv('clac1_w2v_train.csv', encoding = 'UTF-8')
test = pd.read_csv('clac1_w2v_test.csv', encoding = 'UTF-8')

X_train, X_val, y_train, y_val = train_test_split(train, label, test_size = 0.3, random_state = 516, stratify = label)

In [None]:
pbounds = {'n_estimators' : (10, 350),
           'learning_rate' : (0.03, 1.3),
           'max_depth' : (3, 10),
           'num_leaves' : (10, 200),
           'min_child_samples' : (20, 100),
           'min_child_weight' : (1, 10),
           'subsample' : (0.75, 0.95),
           'colsample_bytree' : (0.75, 0.95)}


def lgbm_opt(n_estimators, learning_rate, max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree):
    
    
    params = {'n_estimators' : int(round(n_estimators)),
              'learning_rate': learning_rate,
              'max_depth' : int(round(max_depth)),
              'num_leaves' : int(round(num_leaves)),
              'min_child_samples' : int(round(min_child_samples)),
              'min_child_weight' : min_child_weight,
              'subsample': subsample,
              'colsample_bytree' : colsample_bytree,
              'n_jobs' : -1}
    
    lgbm = LGBMClassifier(**params)
    
    skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
    
    score = cross_val_score(lgbm, X_train, y_train, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)
    
    return np.mean(score)

BO_lgbm = BayesianOptimization(f = lgbm_opt, pbounds = pbounds, random_state = 516)

BO_lgbm.maximize(init_points=40, n_iter=40)

max_params = BO_lgbm.max['params']
max_params['n_estimators'] = int(round(max_params['n_estimators']))
max_params['max_depth'] = int(round(max_params['max_depth']))
max_params['num_leaves'] = int(round(max_params['num_leaves']))
max_params['min_child_samples'] = int(round(max_params['min_child_samples']))
display(max_params)

# Step9. 최대화 하이퍼파라미터로 재학습
lgbm_tun = LGBMClassifier(**max_params)
lgbm_tun.fit(X_train, y_train)

skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
scores = cross_val_score(lgbm_tun, X_val, y_val, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)

print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

In [None]:
w2v_pred = pd.DataFrame(lgbm_clf.predict_proba(test))
clnt_te = pd.DataFrame({'CLNT_ID':[i for i in range(263104,375864)]})
w2v_pred = pd.concat([clnt_te, w2v_pred],axis=1)
w2v_pred.columns = ['CLNT_ID','F20','F30','F40','M20','M30','M40']
w2v_pred.to_csv('w2v_pred.csv',index=False)

# clac2 w2v bayesian opt

In [None]:
train = pd.read_csv('clac2_w2v_train.csv', encoding = 'UTF-8')
test = pd.read_csv('clac2_w2v_test.csv', encoding = 'UTF-8')

X_train, X_val, y_train, y_val = train_test_split(train, label, test_size = 0.3, random_state = 516, stratify = label)

In [None]:
pbounds = {'n_estimators' : (10, 350),
           'learning_rate' : (0.03, 1.3),
           'max_depth' : (3, 10),
           'num_leaves' : (10, 200),
           'min_child_samples' : (20, 100),
           'min_child_weight' : (1, 10),
           'subsample' : (0.75, 0.95),
           'colsample_bytree' : (0.75, 0.95)}


def lgbm_opt(n_estimators, learning_rate, max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree):
    
    
    params = {'n_estimators' : int(round(n_estimators)),
              'learning_rate': learning_rate,
              'max_depth' : int(round(max_depth)),
              'num_leaves' : int(round(num_leaves)),
              'min_child_samples' : int(round(min_child_samples)),
              'min_child_weight' : min_child_weight,
              'subsample': subsample,
              'colsample_bytree' : colsample_bytree,
              'n_jobs' : -1}
    
    lgbm = LGBMClassifier(**params)
    
    skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
    
    score = cross_val_score(lgbm, X_train, y_train, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)
    
    return np.mean(score)

BO_lgbm = BayesianOptimization(f = lgbm_opt, pbounds = pbounds, random_state = 516)

BO_lgbm.maximize(init_points=40, n_iter=40)

max_params = BO_lgbm.max['params']
max_params['n_estimators'] = int(round(max_params['n_estimators']))
max_params['max_depth'] = int(round(max_params['max_depth']))
max_params['num_leaves'] = int(round(max_params['num_leaves']))
max_params['min_child_samples'] = int(round(max_params['min_child_samples']))
display(max_params)

# Step9. 최대화 하이퍼파라미터로 재학습
lgbm_tun = LGBMClassifier(**max_params)
lgbm_tun.fit(X_train, y_train)

skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
scores = cross_val_score(lgbm_tun, X_val, y_val, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)

print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

In [None]:
w2v_pred = pd.DataFrame(lgbm_clf.predict_proba(test))
clnt_te = pd.DataFrame({'CLNT_ID':[i for i in range(263104,375864)]})
w2v_pred = pd.concat([clnt_te, w2v_pred],axis=1)
w2v_pred.columns = ['CLNT_ID','F20','F30','F40','M20','M30','M40']
w2v_pred.to_csv('w2v_pred.csv',index=False)

# clac3 w2v bayesian opt

In [3]:
train = pd.read_csv('clac3_w2v_train.csv', encoding = 'UTF-8')
test = pd.read_csv('clac3_w2v_test.csv', encoding = 'UTF-8')

X_train, X_val, y_train, y_val = train_test_split(train, label, test_size = 0.3, random_state = 516, stratify = label)

In [None]:
pbounds = {'n_estimators' : (100, 400),
           'learning_rate' : (0.01, 1.0),
           'max_depth' : (3, 13),
           'num_leaves' : (10, 200),
           'min_child_samples' : (10, 100),
           'min_child_weight' : (1, 20),
           'subsample' : (0.75, 0.95),
           'colsample_bytree' : (0.75, 0.95)}


def lgbm_opt(n_estimators, learning_rate, max_depth, num_leaves, min_child_samples, min_child_weight, subsample, colsample_bytree):
    
    
    params = {'n_estimators' : int(round(n_estimators)),
              'learning_rate': learning_rate,
              'max_depth' : int(round(max_depth)),
              'num_leaves' : int(round(num_leaves)),
              'min_child_samples' : int(round(min_child_samples)),
              'min_child_weight' : min_child_weight,
              'subsample': subsample,
              'colsample_bytree' : colsample_bytree,
              'n_jobs' : -1}
    
    lgbm = LGBMClassifier(**params)
    
    skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
    
    score = cross_val_score(lgbm, X_train, y_train, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)
    
    return np.mean(score)

BO_lgbm = BayesianOptimization(f = lgbm_opt, pbounds = pbounds, random_state = 516)

BO_lgbm.maximize(init_points=40, n_iter=40)

max_params = BO_lgbm.max['params']
max_params['n_estimators'] = int(round(max_params['n_estimators']))
max_params['max_depth'] = int(round(max_params['max_depth']))
max_params['num_leaves'] = int(round(max_params['num_leaves']))
max_params['min_child_samples'] = int(round(max_params['min_child_samples']))
display(max_params)

# Step9. 최대화 하이퍼파라미터로 재학습
lgbm_tun = LGBMClassifier(**max_params)
lgbm_tun.fit(X_train, y_train)

skfold = StratifiedKFold(n_splits = 4, shuffle = True, random_state = 516)
scores = cross_val_score(lgbm_tun, X_val, y_val, cv = skfold, scoring = 'neg_log_loss', n_jobs=-1)

print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | min_ch... | n_esti... | num_le... | subsample |
-------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-1.388   [0m | [0m 0.9123  [0m | [0m 0.7113  [0m | [0m 4.115   [0m | [0m 61.01   [0m | [0m 16.06   [0m | [0m 351.3   [0m | [0m 113.0   [0m | [0m 0.789   [0m |
| [0m 2       [0m | [0m-16.8    [0m | [0m 0.7878  [0m | [0m 0.8463  [0m | [0m 6.922   [0m | [0m 78.23   [0m | [0m 4.343   [0m | [0m 278.3   [0m | [0m 62.69   [0m | [0m 0.8719  [0m |
| [0m 3       [0m | [0m-1.834   [0m | [0m 0.8565  [0m | [0m 0.7491  [0m | [0m 8.395   [0m | [0m 10.62   [0m | [0m 11.26   [0m | [0m 303.7   [0m | [0m 67.81   [0m | [0m 0.8451  [0m |
| [0m 4       [0m | [0m-1.663   [0m | [0m 0.8397  [0m | [0m 0.4175  [0m | [0m 12.86   [0m | [0m 20.38   [0m | [0m 18.8    [0m | [0m 266

| [0m 41      [0m | [0m-1.272   [0m | [0m 0.75    [0m | [0m 0.01    [0m | [0m 3.235   [0m | [0m 60.71   [0m | [0m 20.0    [0m | [0m 371.8   [0m | [0m 84.63   [0m | [0m 0.8548  [0m |
| [0m 42      [0m | [0m-1.381   [0m | [0m 0.8655  [0m | [0m 0.2383  [0m | [0m 8.639   [0m | [0m 43.16   [0m | [0m 5.19    [0m | [0m 122.9   [0m | [0m 125.6   [0m | [0m 0.7527  [0m |
| [0m 43      [0m | [0m-1.536   [0m | [0m 0.8239  [0m | [0m 0.4002  [0m | [0m 10.01   [0m | [0m 60.86   [0m | [0m 9.001   [0m | [0m 169.2   [0m | [0m 116.0   [0m | [0m 0.8351  [0m |
| [0m 44      [0m | [0m-16.75   [0m | [0m 0.95    [0m | [0m 1.0     [0m | [0m 3.0     [0m | [0m 50.86   [0m | [0m 20.0    [0m | [0m 304.3   [0m | [0m 10.0    [0m | [0m 0.75    [0m |
| [0m 45      [0m | [0m-1.261   [0m | [0m 0.7524  [0m | [0m 0.01    [0m | [0m 6.052   [0m | [0m 72.58   [0m | [0m 16.3    [0m | [0m 358.1   [0m | [0m 36.0    [0m | [0m 0.95 

In [6]:
display(max_params)
print(scores)
print(f'최대성능: {max(scores)}\n평균성능: {np.mean(scores)}')

{'colsample_bytree': 0.8432589523901574,
 'learning_rate': 0.051055588083552005,
 'max_depth': 5,
 'min_child_samples': 45,
 'min_child_weight': 16.614955048810156,
 'n_estimators': 352,
 'num_leaves': 11,
 'subsample': 0.9335077173704511}

[-1.27318992 -1.27103356 -1.27066252 -1.27162127]
최대성능: -1.2706625238476337
평균성능: -1.2716268163090936


In [7]:
clac3_w2v_pred = pd.DataFrame(lgbm_tun.predict_proba(test))
clnt_te = pd.DataFrame({'CLNT_ID':[i for i in range(263104,375864)]})
clac3_w2v_pred = pd.concat([clnt_te, clac3_w2v_pred],axis=1)
clac3_w2v_pred.columns = ['CLNT_ID','F20','F30','F40','M20','M30','M40']
clac3_w2v_pred.to_csv('clac3_w2v_pred.csv',index=False)