In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
__file__ = os.getcwd()
path_data_train = os.path.join(os.path.realpath(__file__).rsplit('/', 1)[0], 'preprocess', 'train_preprocess_2.csv')
path_label = os.path.join(os.path.realpath(__file__).rsplit('/', 1)[0], 'raw', 'train_label.csv')

In [3]:
X_train = pd.read_csv(path_data_train, engine = 'python')
y_train = pd.read_csv(path_label, engine = 'python')

In [4]:
# 잔존 고객 0, 이탈 고객 1
y_train['leave'] = 0
y_train['leave'][y_train.survival_time < 64] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [5]:
# 결제 기록이 없으면 0, 결제 기록이 있으면 1
y_train['amount'] = 0
y_train['amount'][y_train.amount_spent > 0] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [6]:
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits = 1, test_size = 0.25, random_state = 42)
for train_index, test_index in split.split(y_train, y_train['leave']):
    strat_y_train = y_train.loc[train_index]
    strat_y_test = y_train.loc[test_index]

In [7]:
y_train = strat_y_train.sort_values(['acc_id'], ascending = True)
y_test = strat_y_test.sort_values(['acc_id'], ascending = True)

# 모델링

모델링 과정에서 교차검증을 통해 얻은 hyperparameter를 그대로 사용하지 않고 근처의 값을 사용하였는데, 이는 실제 평가 척도인 기대 이익에 최적화하는 과정에서 더 나은 결과를 보이는 hyperparameter를 사용하였기 때문입니다.

## 이탈 여부

### RandomForest

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

rf = RandomForestClassifier(random_state=42)

param_grid = [
    {'n_estimators' : [300, 500, 750], 'max_features' : [14, 15, 16, 17]}
  ]

grid_search = GridSearchCV(rf, param_grid, cv = 5,
                           scoring = 'accuracy', verbose = 2, n_jobs = -1)

grid_search.fit(X_train.iloc[:, 1:], y_train['leave'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  60 | elapsed:  2.1min remaining:  2.4min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:  3.8min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestClassifier(bootstrap=True, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators='warn', n_jobs=None,
                                              oob_score=False, random_state=42,
                                              verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'m

In [9]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.7696333333333333
{'max_features': 16, 'n_estimators': 750}


In [10]:
from sklearn.externals import joblib

rf_leave_clf = RandomForestClassifier(random_state = 42, max_features = 15, n_estimators = 750, n_jobs = -1)
rf_leave_clf.fit(X_train.iloc[:, 1:], y_train['leave'])
joblib.dump(rf_leave_clf, './rf_leave_clf.pkl')



['./rf_leave_clf.pkl']

### XGBoost

In [11]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

xgb = XGBClassifier(random_state=42)

param_grid = [
    {'learning_rate' : [0.001, 0.01, 0.1], 'max_depth' : [13, 14, 15, 16], 'gamma' : [0, 1, 3]}
  ]

grid_search = GridSearchCV(xgb, param_grid, cv = 5,
                           scoring = 'accuracy', verbose = 2, n_jobs = -1)

grid_search.fit(X_train.iloc[:, 1:], y_train['leave'])

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:  6.9min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                     colsample_bylevel=1, colsample_bynode=1,
                                     colsample_bytree=1, gamma=0,
                                     learning_rate=0.1, max_delta_step=0,
                                     max_depth=3, min_child_weight=1,
                                     missing=None, n_estimators=100, n_jobs=1,
                                     nthread=None, objective='binary:logistic',
                                     random_state=42, reg_alpha=0, reg_lambda=1,
                                     scale_pos_weight=1, seed=None, silent=None,
                                     subsample=1, verbosity=1),
             iid='warn', n_jobs=-1,
             param_grid=[{'gamma': [0, 1, 3],
                          'learning_rate': [0.001, 0.01, 0.1],
                          'max_depth': [13, 14, 15, 16]}],


In [12]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.7633333333333333
{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 14}


In [13]:
from sklearn.externals import joblib

xgb_leave_clf = XGBClassifier(random_state = 42, learning_rate = 0.1, gamma = 3, max_depth = 14, n_jobs = -1)
xgb_leave_clf.fit(X_train.iloc[:, 1:], y_train['leave'])
joblib.dump(xgb_leave_clf, './xgb_leave_clf.pkl')

['./xgb_leave_clf.pkl']

### Ensemble

In [14]:
from sklearn.ensemble import VotingClassifier

ensemble_leave_clf = VotingClassifier(
    estimators = [('rf', rf_leave_clf), ('xgb', xgb_leave_clf)],
    voting = 'soft', n_jobs = -1)

ensemble_leave_clf.fit(X_train.iloc[:, 1:], y_train['leave'])

VotingClassifier(estimators=[('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features=15,
                                                     max_leaf_nodes=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
                                                     n_estimators=750,
                                                     n_jobs=-1, oob_score=False,

In [15]:
from sklearn.externals import joblib

joblib.dump(ensemble_leave_clf, './ensemble_leave_clf.pkl')

['./ensemble_leave_clf.pkl']

## 결제 여부

### RandomForest

In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

rf = RandomForestClassifier(random_state=42)

param_grid = [
    {'n_estimators' : [300, 500, 750], 'max_features' : [11, 12, 13, 14]}
  ]

grid_search = GridSearchCV(rf, param_grid, cv = 5,
                           scoring = 'accuracy', verbose = 2, n_jobs = -1)

grid_search.fit(X_train.iloc[:, 1:], y_train['amount'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  60 | elapsed:  1.8min remaining:  2.0min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:  3.2min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestClassifier(bootstrap=True, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators='warn', n_jobs=None,
                                              oob_score=False, random_state=42,
                                              verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'m

In [17]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.8580333333333333
{'max_features': 13, 'n_estimators': 300}


In [18]:
from sklearn.externals import joblib

rf_amount_clf = RandomForestClassifier(random_state = 42, max_features = 11, n_estimators = 750, n_jobs = -1)
rf_amount_clf.fit(X_train.iloc[:, 1:], y_train['amount'])
joblib.dump(rf_amount_clf, './rf_amount_clf.pkl')

['./rf_amount_clf.pkl']

### XGBoost 

In [19]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

xgb = XGBClassifier(random_state=42)

param_grid = [
    {'learning_rate' : [0.001, 0.01, 0.1], 'max_depth' : [10, 11, 12, 13], 'gamma' : [0, 1, 3]}
  ]

grid_search = GridSearchCV(xgb, param_grid, cv = 5,
                           scoring = 'accuracy', verbose = 2, n_jobs = -1)

grid_search.fit(X_train.iloc[:, 1:], y_train['amount'])

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:  5.1min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                     colsample_bylevel=1, colsample_bynode=1,
                                     colsample_bytree=1, gamma=0,
                                     learning_rate=0.1, max_delta_step=0,
                                     max_depth=3, min_child_weight=1,
                                     missing=None, n_estimators=100, n_jobs=1,
                                     nthread=None, objective='binary:logistic',
                                     random_state=42, reg_alpha=0, reg_lambda=1,
                                     scale_pos_weight=1, seed=None, silent=None,
                                     subsample=1, verbosity=1),
             iid='warn', n_jobs=-1,
             param_grid=[{'gamma': [0, 1, 3],
                          'learning_rate': [0.001, 0.01, 0.1],
                          'max_depth': [10, 11, 12, 13]}],


In [20]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.8575
{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 11}


In [21]:
from sklearn.externals import joblib

xgb_amount_clf = XGBClassifier(random_state = 42, learning_rate = 0.1, gamma = 3, max_depth = 10, n_jobs = -1)
xgb_amount_clf.fit(X_train.iloc[:, 1:], y_train['amount'])
joblib.dump(xgb_amount_clf, './xgb_amount_clf.pkl')

['./xgb_amount_clf.pkl']

### Ensemble

In [22]:
from sklearn.ensemble import VotingClassifier

ensemble_amount_clf = VotingClassifier(
    estimators = [('rf', rf_amount_clf), ('xgb', xgb_amount_clf)],
    voting = 'soft', n_jobs = -1)

ensemble_amount_clf.fit(X_train.iloc[:, 1:], y_train['amount'])

VotingClassifier(estimators=[('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None,
                                                     max_features=11,
                                                     max_leaf_nodes=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
                                                     n_estimators=750,
                                                     n_jobs=-1, oob_score=False,

In [23]:
from sklearn.externals import joblib

joblib.dump(ensemble_amount_clf, './ensemble_amount_clf.pkl')

['./ensemble_amount_clf.pkl']

## 추가 생존 기간

In [24]:
train = pd.merge(X_train, y_train, how = 'left', on = 'acc_id')

In [25]:
train_leave = train[train['leave'] == 1]
train_amount = train[(train['leave'] == 1) & (train['amount'] == 1)]

### RandomForest

In [26]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

rf = RandomForestRegressor(random_state=42)

param_grid = [
    {'n_estimators' : [300, 500, 750], 'max_features' : [16, 17, 18, 19]}
  ]

grid_search = GridSearchCV(rf, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  60 | elapsed:  1.1min remaining:  1.2min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:  1.9min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestRegressor(bootstrap=True, criterion='mse',
                                             max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators='warn', n_jobs=None,
                                             oob_score=False, random_state=42,
                                             verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'max_features': [16, 17, 18, 19],


In [27]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-201.72938098608364
{'max_features': 17, 'n_estimators': 750}


In [28]:
from sklearn.externals import joblib

rf_leave_reg = RandomForestRegressor(random_state = 42, max_features = 18, n_estimators = 750, n_jobs = -1)
rf_leave_reg.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])
joblib.dump(rf_leave_reg, './rf_leave_reg.pkl')

['./rf_leave_reg.pkl']

### XGBoost

In [29]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

xgb = XGBRegressor(random_state=42)

param_grid = [
    {'learning_rate' : [0.001, 0.01, 0.1], 'max_depth' : [7, 8, 9, 10], 'gamma' : [1, 3, 5]}
  ]

grid_search = GridSearchCV(xgb, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])

Fitting 5 folds for each of 36 candidates, totalling 180 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:   53.6s
[Parallel(n_jobs=-1)]: Done 180 out of 180 | elapsed:  1.5min finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \




GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                    colsample_bylevel=1, colsample_bynode=1,
                                    colsample_bytree=1, gamma=0,
                                    importance_type='gain', learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=100, n_jobs=1, nthread=None,
                                    objective='reg:linear', random_state=42,
                                    reg_alpha=0, reg_lambda=1,
                                    scale_pos_weight=1, seed=None, silent=None,
                                    subsample=1, verbosity=1),
             iid='warn', n_jobs=-1,
             param_grid=[{'gamma': [1, 3, 5],
                          'learning_rate': [0.001, 0.01, 0.1],
              

In [30]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-208.996915600758
{'gamma': 3, 'learning_rate': 0.1, 'max_depth': 9}


In [31]:
from sklearn.externals import joblib

xgb_leave_reg = XGBRegressor(random_state = 42, learning_rate = 0.1, gamma = 5, max_depth = 8, n_jobs = -1)
xgb_leave_reg.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])
joblib.dump(xgb_leave_reg, './xgb_leave_reg.pkl')



['./xgb_leave_reg.pkl']

### ExtraTrees

In [32]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import GridSearchCV

extree = ExtraTreesRegressor(random_state = 42, bootstrap = True)

param_grid = [
    {'n_estimators' : [300, 500, 750], 'max_features' : [19, 20, 21, 22]}
  ]

grid_search = GridSearchCV(extree, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  60 | elapsed:   36.7s remaining:   42.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   58.8s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ExtraTreesRegressor(bootstrap=True, criterion='mse',
                                           max_depth=None, max_features='auto',
                                           max_leaf_nodes=None,
                                           min_impurity_decrease=0.0,
                                           min_impurity_split=None,
                                           min_samples_leaf=1,
                                           min_samples_split=2,
                                           min_weight_fraction_leaf=0.0,
                                           n_estimators='warn', n_jobs=None,
                                           oob_score=False, random_state=42,
                                           verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'max_features': [19, 20, 21, 22],
                          'n_estimators': [300, 500, 750]}],
      

In [33]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-203.39616221817192
{'max_features': 21, 'n_estimators': 750}


In [34]:
from sklearn.externals import joblib

extree_leave_reg = ExtraTreesRegressor(random_state = 42, bootstrap = True, max_features = 22, n_estimators = 750, n_jobs = -1)
extree_leave_reg.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])
joblib.dump(extree_leave_reg, './extree_leave_reg.pkl')

['./extree_leave_reg.pkl']

### ElasticNet

In [35]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

lr = ElasticNet(random_state = 42)

param_grid = [
    {'alpha' : [0, 0.1, 1, 3, 5], 'l1_ratio' : [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
  ]

grid_search = GridSearchCV(lr, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)
grid_search.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])

Fitting 5 folds for each of 45 candidates, totalling 225 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done 225 out of 225 | elapsed:   11.7s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=42, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'alpha': [0, 0.1, 1, 3, 5],
                          'l1_ratio': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
                                       0.9]}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [36]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-238.25250958102515
{'alpha': 0.1, 'l1_ratio': 0.4}


In [37]:
from sklearn.externals import joblib

lr_leave_reg = ElasticNet(random_state = 42, alpha = 0.1, l1_ratio = 0.8)
lr_leave_reg.fit(train_leave.iloc[:, 1:-4], train_leave['survival_time'])
joblib.dump(lr_leave_reg, './lr_leave_reg.pkl')

['./lr_leave_reg.pkl']

## 일 평균 결제 금액

### RandomForest

In [38]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

rf = RandomForestRegressor(random_state=42)

param_grid = [
    {'n_estimators' : [300, 500, 750], 'max_features' : [5, 6, 7, 8]}
  ]

grid_search = GridSearchCV(rf, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  60 | elapsed:   14.2s remaining:   16.2s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   29.3s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestRegressor(bootstrap=True, criterion='mse',
                                             max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators='warn', n_jobs=None,
                                             oob_score=False, random_state=42,
                                             verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'max_features': [5, 6, 7, 8],
    

In [39]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-1.9007880353495359
{'max_features': 6, 'n_estimators': 500}


In [40]:
from sklearn.externals import joblib

rf_amount_reg = RandomForestRegressor(random_state = 42, max_features = 7, n_estimators = 500, n_jobs = -1)
rf_amount_reg.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])
joblib.dump(rf_amount_reg, './rf_amount_reg.pkl')

['./rf_amount_reg.pkl']

### XGBoost

In [41]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

xgb = XGBRegressor(random_state=42)

param_grid = [
    {'learning_rate' : [0.001, 0.01, 0.1], 'max_depth' : [1, 2, 3, 4], 'gamma' : [0, 1, 3, 5]}
  ]

grid_search = GridSearchCV(xgb, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])

Fitting 5 folds for each of 48 candidates, totalling 240 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:   17.4s finished
  if getattr(data, 'base', None) is not None and \
  data.base is not None and isinstance(data, np.ndarray) \




GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                    colsample_bylevel=1, colsample_bynode=1,
                                    colsample_bytree=1, gamma=0,
                                    importance_type='gain', learning_rate=0.1,
                                    max_delta_step=0, max_depth=3,
                                    min_child_weight=1, missing=None,
                                    n_estimators=100, n_jobs=1, nthread=None,
                                    objective='reg:linear', random_state=42,
                                    reg_alpha=0, reg_lambda=1,
                                    scale_pos_weight=1, seed=None, silent=None,
                                    subsample=1, verbosity=1),
             iid='warn', n_jobs=-1,
             param_grid=[{'gamma': [0, 1, 3, 5],
                          'learning_rate': [0.001, 0.01, 0.1],
           

In [42]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-1.9764363700601124
{'gamma': 0, 'learning_rate': 0.01, 'max_depth': 2}


In [43]:
from sklearn.externals import joblib

xgb_amount_reg = XGBRegressor(random_state = 42, learning_rate = 0.01, gamma = 3, max_depth = 2, n_jobs = -1)
xgb_amount_reg.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])
joblib.dump(xgb_amount_reg, './xgb_amount_reg.pkl')



['./xgb_amount_reg.pkl']

### ExtraTrees

In [44]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import GridSearchCV

extree = ExtraTreesRegressor(random_state = 42, bootstrap = True)

param_grid = [
    {'n_estimators' : [300, 500, 750], 'max_features' : [6, 7, 8, 9]}
  ]

grid_search = GridSearchCV(extree, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])

Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  60 | elapsed:    8.1s remaining:    9.2s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   13.9s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ExtraTreesRegressor(bootstrap=True, criterion='mse',
                                           max_depth=None, max_features='auto',
                                           max_leaf_nodes=None,
                                           min_impurity_decrease=0.0,
                                           min_impurity_split=None,
                                           min_samples_leaf=1,
                                           min_samples_split=2,
                                           min_weight_fraction_leaf=0.0,
                                           n_estimators='warn', n_jobs=None,
                                           oob_score=False, random_state=42,
                                           verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'max_features': [6, 7, 8, 9],
                          'n_estimators': [300, 500, 750]}],
          

In [45]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-1.9511252908936363
{'max_features': 7, 'n_estimators': 750}


In [46]:
from sklearn.externals import joblib

extree_amount_reg = ExtraTreesRegressor(random_state = 42, bootstrap = True, max_features = 7, n_estimators = 300, n_jobs = -1)
extree_amount_reg.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])
joblib.dump(extree_amount_reg, './extree_amount_reg.pkl')

['./extree_amount_reg.pkl']

### ElasticNet

In [47]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV

lr = ElasticNet(random_state = 42)

param_grid = [
    {'alpha' : [0, 0.1, 1, 3, 5], 'l1_ratio' : [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
  ]

grid_search = GridSearchCV(lr, param_grid, cv = 5,
                           scoring = 'neg_mean_squared_error', verbose = 2, n_jobs = -1)

grid_search.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])

Fitting 5 folds for each of 45 candidates, totalling 225 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done  98 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-1)]: Done 225 out of 225 | elapsed:    4.0s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000, normalize=False,
                                  positive=False, precompute=False,
                                  random_state=42, selection='cyclic',
                                  tol=0.0001, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'alpha': [0, 0.1, 1, 3, 5],
                          'l1_ratio': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
                                       0.9]}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [48]:
print(grid_search.best_score_)
print(grid_search.best_params_)

-2.0195345870598507
{'alpha': 0.1, 'l1_ratio': 0.3}


In [49]:
from sklearn.externals import joblib

lr_amount_reg = ElasticNet(random_state = 42, alpha = 0.1, l1_ratio = 0.2)
lr_amount_reg.fit(train_amount.iloc[:, 1:-4], train_amount['amount_spent'])
joblib.dump(lr_amount_reg, './lr_amount_reg.pkl')

['./lr_amount_reg.pkl']

# 테스트

In [50]:
path_data_test = os.path.join(os.path.realpath(__file__).rsplit('/', 1)[0], 'preprocess', 'test_preprocess_2.csv')

In [51]:
X_test = pd.read_csv(path_data_test, engine = 'python')

## 이탈 여부

### RandomForest

In [52]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

y_pred_rf = rf_leave_clf.predict(X_test.iloc[:, 1:])
print('acc :', accuracy_score(y_test['leave'], y_pred_rf))
print('recall :', recall_score(y_test['leave'], y_pred_rf))
print('precision :', precision_score(y_test['leave'], y_pred_rf))
print('f1 score :', f1_score(y_test['leave'], y_pred_rf))
confusion_matrix(y_test['leave'], y_pred_rf)

acc : 0.7726
recall : 0.7065096645189958
precision : 0.7694168884587467
f1 score : 0.7366226546212647


array([[4546,  953],
       [1321, 3180]])

### XGBoost

In [53]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

y_pred_xgb = xgb_leave_clf.predict(X_test.iloc[:, 1:])
print('acc :', accuracy_score(y_test['leave'], y_pred_xgb))
print('recall :', recall_score(y_test['leave'], y_pred_xgb))
print('precision :', precision_score(y_test['leave'], y_pred_xgb))
print('f1 score :', f1_score(y_test['leave'], y_pred_xgb))
confusion_matrix(y_test['leave'], y_pred_xgb)

acc : 0.7631
recall : 0.705620973117085
precision : 0.75260663507109
f1 score : 0.7283568398119481


array([[4455, 1044],
       [1325, 3176]])

### Ensemble

In [54]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

y_pred_ensemble = ensemble_leave_clf.predict(X_test.iloc[:, 1:])
print('acc :', accuracy_score(y_test['leave'], y_pred_ensemble))
print('recall :', recall_score(y_test['leave'], y_pred_ensemble))
print('precision :', precision_score(y_test['leave'], y_pred_ensemble))
print('f1 score :', f1_score(y_test['leave'], y_pred_ensemble))
confusion_matrix(y_test['leave'], y_pred_ensemble)

acc : 0.7706
recall : 0.7073983559209065
precision : 0.7652006729151646
f1 score : 0.7351650888940199


array([[4522,  977],
       [1317, 3184]])

## 결제 여부

### RandomForest

In [55]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

y_pred_rf = rf_amount_clf.predict(X_test.iloc[:, 1:])
print('acc :', accuracy_score(y_test['amount'], y_pred_rf))
print('recall :', recall_score(y_test['amount'], y_pred_rf))
print('precision :', precision_score(y_test['amount'], y_pred_rf))
print('f1 score :', f1_score(y_test['amount'], y_pred_rf))
confusion_matrix(y_test['amount'], y_pred_rf)

acc : 0.8574
recall : 0.921885753613214
precision : 0.8464454976303317
f1 score : 0.8825564157469937


array([[3216,  972],
       [ 454, 5358]])

### XGBoost

In [56]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

y_pred_xgb = xgb_amount_clf.predict(X_test.iloc[:, 1:])
print('acc :', accuracy_score(y_test['amount'], y_pred_xgb))
print('recall :', recall_score(y_test['amount'], y_pred_xgb))
print('precision :', precision_score(y_test['amount'], y_pred_xgb))
print('f1 score :', f1_score(y_test['amount'], y_pred_xgb))
confusion_matrix(y_test['amount'], y_pred_xgb)

acc : 0.8561
recall : 0.9091534755677908
precision : 0.8529459241323648
f1 score : 0.8801532439410343


array([[3277,  911],
       [ 528, 5284]])

### Ensemble

In [57]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

y_pred_ensemble = ensemble_amount_clf.predict(X_test.iloc[:, 1:])
print('acc :', accuracy_score(y_test['amount'], y_pred_ensemble))
print('recall :', recall_score(y_test['amount'], y_pred_ensemble))
print('precision :', precision_score(y_test['amount'], y_pred_ensemble))
print('f1 score :', f1_score(y_test['amount'], y_pred_ensemble))
confusion_matrix(y_test['amount'], y_pred_ensemble)

acc : 0.8572
recall : 0.9172401927047488
precision : 0.849155782096209
f1 score : 0.8818858560794044


array([[3241,  947],
       [ 481, 5331]])

## 추가 생존 기간

### RandomForest

In [58]:
from sklearn.metrics import mean_squared_error

y_pred_rf = rf_leave_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['survival_time'], y_pred_rf))

mse : 823.7027374009992


### XGBoost

In [59]:
from sklearn.metrics import mean_squared_error

y_pred_xgb = xgb_leave_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['survival_time'], y_pred_xgb))

mse : 830.4630963338064


### ExtraTrees

In [60]:
from sklearn.metrics import mean_squared_error

y_pred_extree = extree_leave_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['survival_time'], y_pred_extree))

mse : 840.4614706520662


### ElasticNet

In [61]:
from sklearn.metrics import mean_squared_error

y_pred_lr = lr_leave_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['survival_time'], y_pred_lr))

mse : 835.941754613301


## 일 평균 결제 금액

### RandomForest

In [62]:
from sklearn.metrics import mean_squared_error

y_pred_rf = rf_amount_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['amount_spent'], y_pred_rf))

mse : 0.8968534380831418


### XGBoost

In [63]:
from sklearn.metrics import mean_squared_error

y_pred_xgb = xgb_amount_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['amount_spent'], y_pred_xgb))

mse : 0.7044065196327494


### ExtraTrees

In [64]:
from sklearn.metrics import mean_squared_error

y_pred_extree = extree_amount_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['amount_spent'], y_pred_extree))

mse : 0.8328581870360369


### ElasticNet

In [65]:
from sklearn.metrics import mean_squared_error

y_pred_lr = lr_amount_reg.predict(X_test.iloc[:, 1:])
print('mse :', mean_squared_error(y_test['amount_spent'], y_pred_lr))

mse : 0.7004694475856131


# 베이지안 최적화

In [66]:
from sklearn.externals import joblib

ensemble_leave_clf = joblib.load('./ensemble_leave_clf.pkl')

ensemble_amount_clf = joblib.load('./ensemble_amount_clf.pkl')

rf_leave_reg = joblib.load('./rf_leave_reg.pkl')
xgb_leave_reg = joblib.load('./xgb_leave_reg.pkl')
extree_leave_reg = joblib.load('./extree_leave_reg.pkl')
lr_leave_reg = joblib.load('./lr_leave_reg.pkl')

rf_amount_reg = joblib.load('./rf_amount_reg.pkl')
xgb_amount_reg = joblib.load('./xgb_amount_reg.pkl')
extree_amount_reg = joblib.load('./extree_amount_reg.pkl')
lr_amount_reg = joblib.load('./lr_amount_reg.pkl')



In [67]:
y_test.iloc[:, :3].to_csv('./true.csv', index = False)

In [68]:
def predict(model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, data,
            w1, w2, w3, w4, w5, w6, w7, w8):
    '''
    model1 : 생존 여부 예측(classifier)
    model2 : 결제 여부 예측(classifier)
    model3~6 : 추가 생존 기간 예측(regressor)
    model7~10 : 일 평균 결제 금액 예측(regressor)
    data : test data
    w1~w4 : 추가 생존 기간 예측 모델 weight
    w5~w8 : 일 평균 결제 금액 예측 모델 weight
    '''
    
    pred1 = model1.predict(data.iloc[:, 1:])
    pred2 = model2.predict(data.iloc[:, 1:])
    pred3 = ((w1 * model3.predict(data.iloc[:, 1:])) + (w2 * model4.predict(data.iloc[:, 1:]))
            + (w3 * model5.predict(data.iloc[:, 1:])) + (w4 * model6.predict(data.iloc[:, 1:])))
    pred4 = ((w5 * model7.predict(data.iloc[:, 1:])) + (w6 * model8.predict(data.iloc[:, 1:]))
            + (w7 * model9.predict(data.iloc[:, 1:])) + (w8 * model10.predict(data.iloc[:, 1:])))
    
    pred3[pred1 == 0] = 64
    pred4[pred2 == 0] = 0
    pred3[pred3 <= 1] = 1
    pred3[pred3 >= 64] = 64
    pred4[pred4 <= 0] = 0
    
    array = np.concatenate([data.iloc[:, 0].values.reshape(-1, 1), pred3.reshape(-1, 1), pred4.reshape(-1, 1)], axis = 1)
    df = pd.DataFrame(array)
    df.columns = ['acc_id', 'survival_time', 'amount_spent']
    df['acc_id'] = df['acc_id'].astype('int32')
    df['survival_time'] = round(df['survival_time']).astype('int32')
    
    return df

In [69]:
from score_function import score_function

def black_box_function(w1, w2, w3, w4, w5, w6, w7, w8):
    test_pred = predict(model1 = ensemble_leave_clf, model2 = ensemble_amount_clf, 
                            model3 = rf_leave_reg, model4 = xgb_leave_reg,
                            model5 = extree_leave_reg, model6 = lr_leave_reg,
                            model7 = rf_amount_reg, model8 = xgb_amount_reg,
                            model9 = extree_amount_reg, model10 = lr_amount_reg,
                            data = X_test,
                            w1 = w1, w2 = w2, w3 = w3, w4 = w4, w5 = w5, w6 = w6, w7 = w7, w8 = w8)
    
    test_pred.to_csv('./y_pred.csv', index = False)
    test_score = score_function('./y_pred.csv', './true.csv')
    
    return test_score

In [70]:
from bayes_opt import BayesianOptimization

pbounds = {'w1' : (0, 1), 'w2' : (0, 1), 'w3' : (0, 1), 'w4' : (0, 1),
          'w5' : (0.05, 1.5), 'w6' : (0.05, 1.5), 'w7' : (0.05, 1.5), 'w8' : (0.05, 1.5)}

optimizer = BayesianOptimization(
    f = black_box_function,
    pbounds = pbounds,
    random_state = 42,
)

In [71]:
optimizer.maximize(
    init_points = 50,
    n_iter = 500
)

|   iter    |  target   |    w1     |    w2     |    w3     |    w4     |    w5     |    w6     |    w7     |    w8     |
-------------------------------------------------------------------------------------------------------------------------
1681.2155036820757
| [0m 1       [0m | [0m 1.681e+0[0m | [0m 0.3745  [0m | [0m 0.9507  [0m | [0m 0.732   [0m | [0m 0.5987  [0m | [0m 0.2762  [0m | [0m 0.2762  [0m | [0m 0.1342  [0m | [0m 1.306   [0m |
2017.3409292846748
| [95m 2       [0m | [95m 2.017e+0[0m | [95m 0.6011  [0m | [95m 0.7081  [0m | [95m 0.02058 [0m | [95m 0.9699  [0m | [95m 1.257   [0m | [95m 0.3579  [0m | [95m 0.3136  [0m | [95m 0.3159  [0m |
3965.5452909079777
| [95m 3       [0m | [95m 3.966e+0[0m | [95m 0.3042  [0m | [95m 0.5248  [0m | [95m 0.4319  [0m | [95m 0.2912  [0m | [95m 0.9372  [0m | [95m 0.2523  [0m | [95m 0.4736  [0m | [95m 0.5812  [0m |
3020.3969795688736
| [0m 4       [0m | [0m 3.02e+03[0m | [0m 0.4561 

5487.981055590978
| [0m 37      [0m | [0m 5.488e+0[0m | [0m 0.2865  [0m | [0m 0.5908  [0m | [0m 0.0305  [0m | [0m 0.03735 [0m | [0m 1.243   [0m | [0m 0.5723  [0m | [0m 0.2342  [0m | [0m 0.8073  [0m |
3619.2252774975127
| [0m 38      [0m | [0m 3.619e+0[0m | [0m 0.77    [0m | [0m 0.2158  [0m | [0m 0.6229  [0m | [0m 0.08535 [0m | [0m 0.1249  [0m | [0m 0.8205  [0m | [0m 0.8339  [0m | [0m 0.9743  [0m |
2027.9800896917404
| [0m 39      [0m | [0m 2.028e+0[0m | [0m 0.7261  [0m | [0m 0.9759  [0m | [0m 0.5163  [0m | [0m 0.323   [0m | [0m 1.203   [0m | [0m 0.4427  [0m | [0m 0.6865  [0m | [0m 0.1638  [0m |
1764.6129353090694
| [0m 40      [0m | [0m 1.765e+0[0m | [0m 0.02535 [0m | [0m 0.9626  [0m | [0m 0.836   [0m | [0m 0.696   [0m | [0m 0.643   [0m | [0m 0.3013  [0m | [0m 0.2768  [0m | [0m 0.4129  [0m |
2482.869637863869
| [0m 41      [0m | [0m 2.483e+0[0m | [0m 0.5492  [0m | [0m 0.7146  [0m | [0m 0.6602  [0m

5910.3626508223415
| [0m 74      [0m | [0m 5.91e+03[0m | [0m 0.0347  [0m | [0m 0.2602  [0m | [0m 0.1237  [0m | [0m 0.1116  [0m | [0m 0.5443  [0m | [0m 0.6747  [0m | [0m 1.308   [0m | [0m 0.7736  [0m |
5924.537392843931
| [0m 75      [0m | [0m 5.925e+0[0m | [0m 0.03645 [0m | [0m 0.3067  [0m | [0m 0.09293 [0m | [0m 0.1252  [0m | [0m 0.5837  [0m | [0m 0.6502  [0m | [0m 1.258   [0m | [0m 0.8015  [0m |
5924.729933797689
| [0m 76      [0m | [0m 5.925e+0[0m | [0m 0.04483 [0m | [0m 0.2828  [0m | [0m 0.1378  [0m | [0m 0.1256  [0m | [0m 0.5837  [0m | [0m 0.6413  [0m | [0m 1.273   [0m | [0m 0.7834  [0m |
5937.686650841916
| [95m 77      [0m | [95m 5.938e+0[0m | [95m 0.03605 [0m | [95m 0.3034  [0m | [95m 0.1105  [0m | [95m 0.08126 [0m | [95m 0.6253  [0m | [95m 0.6913  [0m | [95m 1.276   [0m | [95m 0.7769  [0m |
5920.734248582808
| [0m 78      [0m | [0m 5.921e+0[0m | [0m 0.03504 [0m | [0m 0.295   [0m | [0m 0.11

6221.894696294243
| [95m 111     [0m | [95m 6.222e+0[0m | [95m 0.0     [0m | [95m 0.517   [0m | [95m 0.0     [0m | [95m 0.04654 [0m | [95m 0.7054  [0m | [95m 0.8024  [0m | [95m 1.5     [0m | [95m 1.125   [0m |
6220.498132739546
| [0m 112     [0m | [0m 6.22e+03[0m | [0m 0.0     [0m | [0m 0.5176  [0m | [0m 0.0     [0m | [0m 0.04544 [0m | [0m 0.7045  [0m | [0m 0.8045  [0m | [0m 1.5     [0m | [0m 1.123   [0m |
6218.525731158271
| [0m 113     [0m | [0m 6.219e+0[0m | [0m 0.0     [0m | [0m 0.4996  [0m | [0m 0.0     [0m | [0m 0.05414 [0m | [0m 0.7174  [0m | [0m 0.7769  [0m | [0m 1.5     [0m | [0m 1.144   [0m |
6224.35840753046
| [95m 114     [0m | [95m 6.224e+0[0m | [95m 0.0     [0m | [95m 0.5166  [0m | [95m 0.0     [0m | [95m 0.04857 [0m | [95m 0.71    [0m | [95m 0.7996  [0m | [95m 1.5     [0m | [95m 1.132   [0m |
6242.428630253643
| [95m 115     [0m | [95m 6.242e+0[0m | [95m 0.0     [0m | [95m 0.5317  [0m

6397.618244893149
| [95m 148     [0m | [95m 6.398e+0[0m | [95m 0.0     [0m | [95m 0.5684  [0m | [95m 0.0     [0m | [95m 0.0     [0m | [95m 1.123   [0m | [95m 1.016   [0m | [95m 1.5     [0m | [95m 1.5     [0m |
6417.787603879385
| [95m 149     [0m | [95m 6.418e+0[0m | [95m 0.0     [0m | [95m 0.538   [0m | [95m 0.0     [0m | [95m 0.0     [0m | [95m 1.237   [0m | [95m 1.104   [0m | [95m 1.5     [0m | [95m 1.5     [0m |
6409.619330088153
| [0m 150     [0m | [0m 6.41e+03[0m | [0m 0.0     [0m | [0m 0.5473  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.203   [0m | [0m 1.078   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6419.710287556044
| [95m 151     [0m | [95m 6.42e+03[0m | [95m 0.0     [0m | [95m 0.5177  [0m | [95m 0.0     [0m | [95m 0.0     [0m | [95m 1.298   [0m | [95m 1.155   [0m | [95m 1.5     [0m | [95m 1.5     [0m |
6423.613172072432
| [95m 152     [0m | [95m 6.424e+0[0m | [95m 0.0     [0m | [95m 0

6430.118388634056
| [0m 185     [0m | [0m 6.43e+03[0m | [0m 0.0     [0m | [0m 0.5203  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.364   [0m | [0m 1.112   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6431.265633336163
| [0m 186     [0m | [0m 6.431e+0[0m | [0m 0.0     [0m | [0m 0.52    [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.363   [0m | [0m 1.112   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6429.024486344002
| [0m 187     [0m | [0m 6.429e+0[0m | [0m 0.0     [0m | [0m 0.5187  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.364   [0m | [0m 1.114   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6428.607255930192
| [0m 188     [0m | [0m 6.429e+0[0m | [0m 0.0     [0m | [0m 0.522   [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.365   [0m | [0m 1.111   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6428.114615792154
| [0m 189     [0m | [0m 6.428e+0[0m | [0m 0.0     [0m | [0m 0.5176  [0m | [0m 0.0     [0m | 

6415.031147978366
| [0m 223     [0m | [0m 6.415e+0[0m | [0m 0.0     [0m | [0m 0.5779  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.266   [0m | [0m 1.364   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6430.241845220493
| [0m 224     [0m | [0m 6.43e+03[0m | [0m 0.0     [0m | [0m 0.5413  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.349   [0m | [0m 1.146   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6226.313694231054
| [0m 225     [0m | [0m 6.226e+0[0m | [0m 0.04515 [0m | [0m 0.5317  [0m | [0m 0.05658 [0m | [0m 0.0209  [0m | [0m 0.7242  [0m | [0m 0.919   [0m | [0m 1.282   [0m | [0m 1.451   [0m |
6096.975504435761
| [0m 226     [0m | [0m 6.097e+0[0m | [0m 0.01294 [0m | [0m 0.3633  [0m | [0m 0.05031 [0m | [0m 0.1404  [0m | [0m 0.633   [0m | [0m 0.7625  [0m | [0m 1.48    [0m | [0m 0.9001  [0m |
6430.758394075007
| [0m 227     [0m | [0m 6.431e+0[0m | [0m 0.0     [0m | [0m 0.5414  [0m | [0m 0.0     [0m | 

6001.409030108353
| [0m 261     [0m | [0m 6.001e+0[0m | [0m 0.03199 [0m | [0m 0.3402  [0m | [0m 0.1868  [0m | [0m 0.2565  [0m | [0m 0.5285  [0m | [0m 0.8887  [0m | [0m 1.331   [0m | [0m 1.434   [0m |
6413.421112261094
| [0m 262     [0m | [0m 6.413e+0[0m | [0m 0.0     [0m | [0m 0.563   [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.284   [0m | [0m 1.325   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
5978.245911160784
| [0m 263     [0m | [0m 5.978e+0[0m | [0m 0.02176 [0m | [0m 0.2115  [0m | [0m 0.1641  [0m | [0m 0.1771  [0m | [0m 0.8557  [0m | [0m 0.5824  [0m | [0m 1.205   [0m | [0m 0.7755  [0m |
6239.889190248301
| [0m 264     [0m | [0m 6.24e+03[0m | [0m 0.05245 [0m | [0m 0.1422  [0m | [0m 0.1556  [0m | [0m 0.2621  [0m | [0m 0.6741  [0m | [0m 0.9494  [0m | [0m 1.385   [0m | [0m 1.49    [0m |
5683.131547672131
| [0m 265     [0m | [0m 5.683e+0[0m | [0m 0.06664 [0m | [0m 0.4403  [0m | [0m 0.0     [0m | 

6421.6653215481965
| [0m 299     [0m | [0m 6.422e+0[0m | [0m 0.0     [0m | [0m 0.5311  [0m | [0m 0.0     [0m | [0m 3.42e-10[0m | [0m 1.281   [0m | [0m 1.353   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6057.583683898717
| [0m 300     [0m | [0m 6.058e+0[0m | [0m 0.1317  [0m | [0m 0.1172  [0m | [0m 0.09874 [0m | [0m 0.1336  [0m | [0m 0.6408  [0m | [0m 0.7475  [0m | [0m 1.39    [0m | [0m 0.9842  [0m |
5982.325298121465
| [0m 301     [0m | [0m 5.982e+0[0m | [0m 0.02004 [0m | [0m 0.2089  [0m | [0m 0.1657  [0m | [0m 0.1788  [0m | [0m 0.8643  [0m | [0m 0.5827  [0m | [0m 1.203   [0m | [0m 0.7779  [0m |
6422.974878747821
| [0m 302     [0m | [0m 6.423e+0[0m | [0m 0.0     [0m | [0m 0.5342  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.289   [0m | [0m 1.354   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
5968.944594437808
| [0m 303     [0m | [0m 5.969e+0[0m | [0m 0.02964 [0m | [0m 0.1983  [0m | [0m 0.05023 [0m |

5715.180502545953
| [0m 337     [0m | [0m 5.715e+0[0m | [0m 0.08095 [0m | [0m 0.3134  [0m | [0m 0.1954  [0m | [0m 0.02371 [0m | [0m 0.4261  [0m | [0m 0.4453  [0m | [0m 1.127   [0m | [0m 0.7372  [0m |
5913.390622917189
| [0m 338     [0m | [0m 5.913e+0[0m | [0m 0.05034 [0m | [0m 0.2617  [0m | [0m 0.157   [0m | [0m 0.1181  [0m | [0m 0.6563  [0m | [0m 0.5962  [0m | [0m 1.258   [0m | [0m 0.7273  [0m |
6426.387979317888
| [0m 339     [0m | [0m 6.426e+0[0m | [0m 0.0     [0m | [0m 0.5282  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.296   [0m | [0m 1.139   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6421.9576380133
| [0m 340     [0m | [0m 6.422e+0[0m | [0m 0.0     [0m | [0m 0.5343  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.271   [0m | [0m 1.121   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6204.36860038605
| [0m 341     [0m | [0m 6.204e+0[0m | [0m 0.05428 [0m | [0m 0.385   [0m | [0m 0.07022 [0m | [0

6388.7626808463365
| [0m 375     [0m | [0m 6.389e+0[0m | [0m 0.0     [0m | [0m 0.4562  [0m | [0m 0.0     [0m | [0m 0.09963 [0m | [0m 1.166   [0m | [0m 1.5     [0m | [0m 1.394   [0m | [0m 1.436   [0m |
6392.257895659341
| [0m 376     [0m | [0m 6.392e+0[0m | [0m 0.04948 [0m | [0m 0.5716  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.212   [0m | [0m 1.23    [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6419.972348429497
| [0m 377     [0m | [0m 6.42e+03[0m | [0m 0.0     [0m | [0m 0.5817  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.34    [0m | [0m 1.425   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
6034.484200250484
| [0m 378     [0m | [0m 6.034e+0[0m | [0m 0.06528 [0m | [0m 0.2175  [0m | [0m 0.04074 [0m | [0m 0.1863  [0m | [0m 0.7182  [0m | [0m 0.5797  [0m | [0m 1.43    [0m | [0m 0.8757  [0m |
6033.205430614142
| [0m 379     [0m | [0m 6.033e+0[0m | [0m 0.05869 [0m | [0m 0.637   [0m | [0m 0.1783  [0m |

6422.056118941074
| [0m 413     [0m | [0m 6.422e+0[0m | [0m 0.0     [0m | [0m 0.5744  [0m | [0m 0.0     [0m | [0m 0.0     [0m | [0m 1.321   [0m | [0m 1.405   [0m | [0m 1.5     [0m | [0m 1.5     [0m |
5906.104040133306
| [0m 414     [0m | [0m 5.906e+0[0m | [0m 0.1888  [0m | [0m 0.3865  [0m | [0m 0.1864  [0m | [0m 0.1584  [0m | [0m 1.056   [0m | [0m 1.299   [0m | [0m 1.345   [0m | [0m 1.453   [0m |
6359.675012442566
| [0m 415     [0m | [0m 6.36e+03[0m | [0m 0.0     [0m | [0m 0.468   [0m | [0m 0.05425 [0m | [0m 0.1197  [0m | [0m 1.074   [0m | [0m 1.5     [0m | [0m 1.5     [0m | [0m 1.463   [0m |
3326.8408828771626
| [0m 416     [0m | [0m 3.327e+0[0m | [0m 0.4249  [0m | [0m 0.05337 [0m | [0m 0.3543  [0m | [0m 0.8846  [0m | [0m 0.6297  [0m | [0m 0.7303  [0m | [0m 0.5159  [0m | [0m 0.4966  [0m |
6346.842804802672
| [0m 417     [0m | [0m 6.347e+0[0m | [0m 0.0     [0m | [0m 0.434   [0m | [0m 0.02537 [0m |

6335.212100861771
| [0m 451     [0m | [0m 6.335e+0[0m | [0m 0.04905 [0m | [0m 0.4575  [0m | [0m 0.02695 [0m | [0m 0.04825 [0m | [0m 0.9838  [0m | [0m 1.165   [0m | [0m 1.334   [0m | [0m 1.421   [0m |
6217.61559660244
| [0m 452     [0m | [0m 6.218e+0[0m | [0m 0.02086 [0m | [0m 0.2688  [0m | [0m 0.002204[0m | [0m 0.3151  [0m | [0m 1.023   [0m | [0m 0.8228  [0m | [0m 1.341   [0m | [0m 1.08    [0m |
1951.5640494811917
| [0m 453     [0m | [0m 1.952e+0[0m | [0m 0.3255  [0m | [0m 0.6124  [0m | [0m 0.8345  [0m | [0m 0.6411  [0m | [0m 0.1297  [0m | [0m 0.4178  [0m | [0m 0.7291  [0m | [0m 1.3     [0m |
6388.097548529246
| [0m 454     [0m | [0m 6.388e+0[0m | [0m 0.0     [0m | [0m 0.3207  [0m | [0m 0.0     [0m | [0m 0.1936  [0m | [0m 1.278   [0m | [0m 1.5     [0m | [0m 1.418   [0m | [0m 1.436   [0m |
6203.476670691796
| [0m 455     [0m | [0m 6.203e+0[0m | [0m 0.1282  [0m | [0m 0.3608  [0m | [0m 0.04466 [0m | 

6285.223814455634
| [0m 489     [0m | [0m 6.285e+0[0m | [0m 0.02814 [0m | [0m 0.3037  [0m | [0m 0.1903  [0m | [0m 0.09117 [0m | [0m 0.9359  [0m | [0m 0.964   [0m | [0m 1.492   [0m | [0m 1.066   [0m |
6396.02581598958
| [0m 490     [0m | [0m 6.396e+0[0m | [0m 0.000296[0m | [0m 0.3244  [0m | [0m 0.0     [0m | [0m 0.2096  [0m | [0m 1.225   [0m | [0m 1.499   [0m | [0m 1.473   [0m | [0m 1.413   [0m |
6031.9180938075615
| [0m 491     [0m | [0m 6.032e+0[0m | [0m 0.05377 [0m | [0m 0.224   [0m | [0m 0.3499  [0m | [0m 0.1613  [0m | [0m 1.005   [0m | [0m 0.9905  [0m | [0m 1.259   [0m | [0m 0.866   [0m |
6294.406246013111
| [0m 492     [0m | [0m 6.294e+0[0m | [0m 0.07003 [0m | [0m 0.1029  [0m | [0m 0.333   [0m | [0m 0.07009 [0m | [0m 0.9334  [0m | [0m 0.8921  [0m | [0m 1.467   [0m | [0m 1.26    [0m |
6416.071256949695
| [0m 493     [0m | [0m 6.416e+0[0m | [0m 0.0     [0m | [0m 0.4676  [0m | [0m 0.0     [0m | 

6381.234010902029
| [0m 527     [0m | [0m 6.381e+0[0m | [0m 0.000118[0m | [0m 0.3255  [0m | [0m 0.001041[0m | [0m 0.1947  [0m | [0m 1.144   [0m | [0m 1.499   [0m | [0m 1.498   [0m | [0m 1.423   [0m |
1708.2048845616675
| [0m 528     [0m | [0m 1.708e+0[0m | [0m 0.8909  [0m | [0m 0.04281 [0m | [0m 0.9083  [0m | [0m 0.7748  [0m | [0m 0.3755  [0m | [0m 0.251   [0m | [0m 0.8854  [0m | [0m 1.311   [0m |
6414.881965476896
| [0m 529     [0m | [0m 6.415e+0[0m | [0m 0.0     [0m | [0m 0.4112  [0m | [0m 0.0     [0m | [0m 0.1263  [0m | [0m 1.306   [0m | [0m 1.5     [0m | [0m 1.5     [0m | [0m 1.499   [0m |
6379.402899437648
| [0m 530     [0m | [0m 6.379e+0[0m | [0m 0.0     [0m | [0m 0.2928  [0m | [0m 0.09679 [0m | [0m 0.1458  [0m | [0m 1.152   [0m | [0m 0.979   [0m | [0m 1.5     [0m | [0m 1.314   [0m |
6222.974051830132
| [0m 531     [0m | [0m 6.223e+0[0m | [0m 0.1811  [0m | [0m 0.5049  [0m | [0m 0.04456 [0m |

In [72]:
print(optimizer.max)

{'target': 6437.736119126102, 'params': {'w1': 0.0, 'w2': 0.5361283013725007, 'w3': 0.0, 'w4': 0.0, 'w5': 1.3675901396673014, 'w6': 1.1385865634353487, 'w7': 1.5, 'w8': 1.4999999990570498}}


베이지안 최적화를 통한 weight값이 실제 예측에 쓰인 값과 다를 수 있는데 이는 매 실행마다 결과가 다르기 때문이며 성능에는 크게 영향을 미치지 않는 것으로 판단됩니다. 이를 참고해 주시기 바랍니다.