<a href="https://colab.research.google.com/github/tagasy/kaggle/blob/master/lightgbm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import files
uploaded = files.upload()

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from sklearn import metrics
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold

In [0]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [0]:
x_train = train.drop(['id','target'], axis=1)
y_train = train["target"].copy()

In [0]:
#評価用関数
def get_evaluate(y_test, predict):

    fpr, tpr, thr_arr = metrics.roc_curve(y_test, predict)

    auc = metrics.auc(fpr, tpr)
    precision = metrics.precision_score(y_test, predict)
    recall = metrics.recall_score(y_test, predict)

    return auc, precision, recall

In [29]:
n_fold = 5
folds = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=42)

params = {
    'objective': 'binary',
    'seed': 71,
    'learning_rate': 0.05,
    'verbose': 1,
    'num_leaves': 20,
    'metrics': 'binary_logloss'

}
auc_list = []
precision_list = []
recall_list = []

for train_index, test_index in folds.split(x_train, y_train):

  X_train, X_test = x_train.iloc[train_index], x_train.iloc[test_index]
  Y_train, Y_test = y_train.iloc[train_index], y_train.iloc[test_index]

  lgb_train = lgb.Dataset(X_train, Y_train)
  lgb_test = lgb.Dataset(X_test, Y_test, reference=lgb_train)

  model = lgb.train(params, 
                    lgb_train,
                    num_boost_round=300,
                    valid_sets=[lgb_train,lgb_test],
                    early_stopping_rounds=10)

  predict_proba = model.predict(X_test, num_iteration=model.best_iteration)
  predict = [0 if i < 0.5 else 1 for i in predict_proba]

  auc, precision, recall = get_evaluate(Y_test, predict)
  print('AUC:{}, precision:{}, recall:{}'.format(auc, precision, recall))

  auc_list.append(auc)
  precision_list.append(precision)
  recall_list.append(recall)

print('Kfold平均 AUC:{}, precision:{}, recall:{}'.format(np.mean(auc_list), 
                                                         np.mean(precision_list), 
                                                         np.mean(recall_list)))

[1]	training's binary_logloss: 0.558525	valid_1's binary_logloss: 0.587589
Training until validation scores don't improve for 10 rounds.
[2]	training's binary_logloss: 0.541299	valid_1's binary_logloss: 0.584889
[3]	training's binary_logloss: 0.526315	valid_1's binary_logloss: 0.582118
[4]	training's binary_logloss: 0.511987	valid_1's binary_logloss: 0.578654
[5]	training's binary_logloss: 0.498404	valid_1's binary_logloss: 0.578911
[6]	training's binary_logloss: 0.482632	valid_1's binary_logloss: 0.580481
[7]	training's binary_logloss: 0.468143	valid_1's binary_logloss: 0.584076
[8]	training's binary_logloss: 0.454539	valid_1's binary_logloss: 0.582652
[9]	training's binary_logloss: 0.441305	valid_1's binary_logloss: 0.581134
[10]	training's binary_logloss: 0.428442	valid_1's binary_logloss: 0.579781
[11]	training's binary_logloss: 0.415333	valid_1's binary_logloss: 0.574127
[12]	training's binary_logloss: 0.404164	valid_1's binary_logloss: 0.571474
[13]	training's binary_logloss: 0.3

  _warn_prf(average, modifier, msg_start, len(result))


[1]	training's binary_logloss: 0.564763	valid_1's binary_logloss: 0.566802
Training until validation scores don't improve for 10 rounds.
[2]	training's binary_logloss: 0.548137	valid_1's binary_logloss: 0.556368
[3]	training's binary_logloss: 0.532008	valid_1's binary_logloss: 0.552539
[4]	training's binary_logloss: 0.517518	valid_1's binary_logloss: 0.547365
[5]	training's binary_logloss: 0.503932	valid_1's binary_logloss: 0.541812
[6]	training's binary_logloss: 0.490195	valid_1's binary_logloss: 0.539364
[7]	training's binary_logloss: 0.477708	valid_1's binary_logloss: 0.536207
[8]	training's binary_logloss: 0.465683	valid_1's binary_logloss: 0.530421
[9]	training's binary_logloss: 0.45425	valid_1's binary_logloss: 0.527041
[10]	training's binary_logloss: 0.444522	valid_1's binary_logloss: 0.523234
[11]	training's binary_logloss: 0.434232	valid_1's binary_logloss: 0.521613
[12]	training's binary_logloss: 0.422761	valid_1's binary_logloss: 0.516792
[13]	training's binary_logloss: 0.41