<a href="https://colab.research.google.com/github/mizutokage/Kaglle/blob/main/xgb_bayesipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [235]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, KFold

import xgboost as xgb

%matplotlib inline

In [236]:
df = pd.read_csv("train.csv", index_col='PassengerId')

df = df[['Survived', 'Pclass', 'Age',
         'SibSp', 'Parch', 'Fare']]

In [275]:
df.fillna(df['Age'].mean())

Unnamed: 0_level_0,Survived,Pclass,Age,SibSp,Parch,Fare
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0,3,22.000000,1,0,7.2500
2,1,1,38.000000,1,0,71.2833
3,1,3,26.000000,0,0,7.9250
4,1,1,35.000000,1,0,53.1000
5,0,3,35.000000,0,0,8.0500
...,...,...,...,...,...,...
887,0,2,27.000000,0,0,13.0000
888,1,1,19.000000,0,0,30.0000
889,0,3,29.699118,1,2,23.4500
890,1,1,26.000000,0,0,30.0000


In [276]:
X = df.drop(['Survived'], axis=1)
y = df['Survived']

In [277]:
seed = 88
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=seed)

X_train.head()

Unnamed: 0_level_0,Pclass,Age,SibSp,Parch,Fare
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
791,3,,0,0,7.75
476,1,,0,0,52.0
809,2,39.0,0,0,13.0
330,1,16.0,0,1,57.9792
218,2,42.0,1,0,27.0


In [278]:
from bayes_opt import BayesianOptimization

In [279]:
def xgb_evaluate(min_child_weight, subsample, colsample_bytree, max_depth):
      params = {'metric': 'error',
              'objective':'binary:logistic',
              'n_estimators':50000,
              'random_state':42,
              'boosting_type':'gbdt',
              'learning_rate':0.01,              
              'min_child_weight': int(min_child_weight),
              'max_depth': int(max_depth),
              'colsample_bytree': colsample_bytree,
              'subsample': subsample,
             }

      cls = xgb.XGBClassifier()
      cls.set_params(**params)
      cls.fit(X_train,
              y_train,
              early_stopping_rounds=50,
              eval_set=[(X_test, y_test)],
              eval_metric='error',
              verbose=0)
      
      pred = cls.predict(X_test)
      score = accuracy_score(y_test, pred)
      return score

In [280]:
xgb_bo = BayesianOptimization(xgb_evaluate,
                              {'min_child_weight': (1, 20),
                               'subsample': (.1, 1),
                               'colsample_bytree': (.1, 1),
                               'max_depth': (1, 50)},
                               random_state=10)

In [281]:
xgb_bo.maximize(init_points=15, n_iter=50, acq='ei')

|   iter    |  target   | colsam... | max_depth | min_ch... | subsample |
-------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.7127  [0m | [0m 0.7942  [0m | [0m 2.017   [0m | [0m 13.04   [0m | [0m 0.7739  [0m |
| [95m 2       [0m | [95m 0.7313  [0m | [95m 0.5487  [0m | [95m 12.02   [0m | [95m 4.763   [0m | [95m 0.7845  [0m |
| [0m 3       [0m | [0m 0.7276  [0m | [0m 0.2522  [0m | [0m 5.329   [0m | [0m 14.02   [0m | [0m 0.9581  [0m |
| [0m 4       [0m | [0m 0.7164  [0m | [0m 0.1036  [0m | [0m 26.1    [0m | [0m 16.44   [0m | [0m 0.6513  [0m |
| [0m 5       [0m | [0m 0.6828  [0m | [0m 0.7496  [0m | [0m 15.3    [0m | [0m 18.44   [0m | [0m 0.7431  [0m |
| [0m 6       [0m | [0m 0.7313  [0m | [0m 0.5883  [0m | [0m 7.966   [0m | [0m 8.093   [0m | [0m 0.7067  [0m |
| [0m 7       [0m | [0m 0.7052  [0m | [0m 0.4976  [0m | [0m 22.27   [0m | [0m 12.74   [0m | [0m 0.5618 

In [282]:
optimized_params = xgb_bo.max['params']
optimized_params['max_depth'] = int(optimized_params['max_depth'])
optimized_params

{'colsample_bytree': 0.1812119603244762,
 'max_depth': 14,
 'min_child_weight': 6.224399593955088,
 'subsample': 0.13601072701561923}

In [283]:
fixed_params = {'metric':'error',
                'objective':'binary:logistic',
                'n_estimators':50000,
                'random_state':seed,
                'booster':'gbtree',
                'learning_rate':0.01}

In [284]:
cls = xgb.XGBClassifier()
cls.set_params(**fixed_params, **optimized_params)
cls.fit(X_train,
        y_train,
        early_stopping_rounds=50,
        eval_set=[(X_test, y_test)],
        eval_metric='error',
        verbose=0)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.1812119603244762, gamma=0,
              learning_rate=0.01, max_delta_step=0, max_depth=14,
              metric='error', min_child_weight=6.224399593955088, missing=None,
              n_estimators=50000, n_jobs=1, nthread=None,
              objective='binary:logistic', random_state=88, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,
              subsample=0.13601072701561923, verbosity=1)

In [285]:
pred = cls.predict(X_test)
baseline = accuracy_score(y_test, pred)
baseline

0.7276119402985075

In [286]:
%%writefile xgb_bayes.py

UsageError: %%writefile is a cell magic, but the cell body is empty.
