In [1]:
import numpy as np
import pandas as pd

### データ読み込み・tr, valに分割

In [2]:
from sklearn.model_selection import StratifiedKFold

In [3]:
train = pd.read_csv('../../input/sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']

In [4]:
skf = StratifiedKFold(n_splits=4, random_state=70, shuffle=True)
tr_idx, va_idx = list(skf.split(train_x, train_y))[0]

tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

### カスタム関数の定義

In [5]:
# カスタム目的関数（この場合はloglossであり、xgboostの'binary:logistic'と等価）
def logregobj(preds, dtrain):
    labels = dtrain.get_label()  # 真の値のラベルを取得
    preds = 1.0 / (1.0 + np.exp(-preds))  # シグモイド関数
    grad = preds - labels  # 勾配
    hess = preds * (1.0 - preds)  # 二階微分値
    return grad, hess


# カスタム評価指標（この場合は誤答率）
def evalerror(preds, dtrain):
    labels = dtrain.get_label()  # 真の値のラベルを取得
    return 'custom-error', float(sum(labels != (preds > 0.0))) / len(labels)

### 学習

In [6]:
import xgboost as xgb

In [7]:
dtrain = xgb.DMatrix(tr_x, label=tr_y)
dvalid = xgb.DMatrix(va_x, label=va_y)

# ハイパーパラメータの設定
params = {'silent': 1, 'random_state': 71}
num_round = 50
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

# モデルの学習の実行
bst = xgb.train(params, dtrain, num_round, watchlist, obj=logregobj, feval=evalerror)

[0]	train-rmse:0.401744	eval-rmse:0.427589	train-custom-error:0.171089	eval-custom-error:0.192323
[1]	train-rmse:0.703123	eval-rmse:0.727822	train-custom-error:0.119483	eval-custom-error:0.142343
[2]	train-rmse:0.986453	eval-rmse:1.00778	train-custom-error:0.107081	eval-custom-error:0.135546
[3]	train-rmse:1.22792	eval-rmse:1.24148	train-custom-error:0.098413	eval-custom-error:0.127949
[4]	train-rmse:1.4361	eval-rmse:1.44641	train-custom-error:0.095479	eval-custom-error:0.130748
[5]	train-rmse:1.62305	eval-rmse:1.63238	train-custom-error:0.089745	eval-custom-error:0.132747
[6]	train-rmse:1.78985	eval-rmse:1.79458	train-custom-error:0.086545	eval-custom-error:0.131547
[7]	train-rmse:1.92664	eval-rmse:1.93197	train-custom-error:0.081878	eval-custom-error:0.127549
[8]	train-rmse:2.03661	eval-rmse:2.04071	train-custom-error:0.080144	eval-custom-error:0.12555
[9]	train-rmse:2.14658	eval-rmse:2.14911	train-custom-error:0.074277	eval-custom-error:0.12555
[10]	train-rmse:2.2638	eval-rmse:2.266