In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold


train_rd = pd.read_csv('train.csv')
test_rd = pd.read_csv('test.csv')

group_0 = train_rd.loc[train_rd['target']==0]
group_1 = train_rd.loc[train_rd['target']==1]

In [5]:
features = [c for c in train_rd.columns if c not in ['ID_code', 'target']]
target = train_rd['target']

param = {
    'bagging_freq': 5,
    'bagging_fraction': 0.333,
    'boost_from_average':'false',
    'boost': 'gbdt',
    'feature_fraction': 0.05,
    'learning_rate': 0.005,
    'max_depth': -1,  
    'metric':'auc',
    'min_data_in_leaf': 80,
    'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 10,
    'num_threads': 8,
    'tree_learner': 'serial',
    'objective': 'binary', 
    'verbosity': 1
}


In [6]:
folds = StratifiedKFold(n_splits=15, shuffle=False,random_state=2319)
predictions = np.zeros(len(test_rd))

predictions_list = []

for fold_count, (train_idx, valid_idx) in enumerate(folds.split(train_rd.values, target.values)):
    
    train_data = lgb.Dataset(train_rd.iloc[train_idx][features],label=target.iloc[train_idx])
    valid_data = lgb.Dataset(train_rd.iloc[valid_idx][features],label=target.iloc[valid_idx])
    
    clf = lgb.train(param, train_data, 50000, valid_sets = valid_data, verbose_eval=1000, early_stopping_rounds = 2500)
    
    predictions = clf.predict(test_rd[features], num_iteration=clf.best_iteration)
    
    print('\n============== Prediction iteration END ==============\n')
    
    predictions_list.append(predictions)

Training until validation scores don't improve for 2500 rounds.
[1000]	valid_0's auc: 0.872425
[2000]	valid_0's auc: 0.879734
[3000]	valid_0's auc: 0.884733
[4000]	valid_0's auc: 0.888339
[5000]	valid_0's auc: 0.891376
[6000]	valid_0's auc: 0.893611
[7000]	valid_0's auc: 0.89528
[8000]	valid_0's auc: 0.896994
[9000]	valid_0's auc: 0.898336
[10000]	valid_0's auc: 0.899356
[11000]	valid_0's auc: 0.900234
[12000]	valid_0's auc: 0.900918
[13000]	valid_0's auc: 0.901496
[14000]	valid_0's auc: 0.901705
[15000]	valid_0's auc: 0.902046
[16000]	valid_0's auc: 0.902218
[17000]	valid_0's auc: 0.902409
[18000]	valid_0's auc: 0.902322
[19000]	valid_0's auc: 0.902558
[20000]	valid_0's auc: 0.902563
[21000]	valid_0's auc: 0.902639
[22000]	valid_0's auc: 0.902643
[23000]	valid_0's auc: 0.9027
[24000]	valid_0's auc: 0.902677
[25000]	valid_0's auc: 0.902671
[26000]	valid_0's auc: 0.902674
Early stopping, best iteration is:
[23738]	valid_0's auc: 0.902736


Training until validation scores don't improve 

[2000]	valid_0's auc: 0.884387
[3000]	valid_0's auc: 0.888542
[4000]	valid_0's auc: 0.892345
[5000]	valid_0's auc: 0.894751
[6000]	valid_0's auc: 0.896898
[7000]	valid_0's auc: 0.898186
[8000]	valid_0's auc: 0.899305
[9000]	valid_0's auc: 0.900192
[10000]	valid_0's auc: 0.900852
[11000]	valid_0's auc: 0.901323
[12000]	valid_0's auc: 0.901595
[13000]	valid_0's auc: 0.901926
[14000]	valid_0's auc: 0.90205
[15000]	valid_0's auc: 0.902175
[16000]	valid_0's auc: 0.902289
[17000]	valid_0's auc: 0.902469
[18000]	valid_0's auc: 0.902486
[19000]	valid_0's auc: 0.902458
[20000]	valid_0's auc: 0.902393
Early stopping, best iteration is:
[17832]	valid_0's auc: 0.902588


Training until validation scores don't improve for 2500 rounds.
[1000]	valid_0's auc: 0.86737
[2000]	valid_0's auc: 0.875576
[3000]	valid_0's auc: 0.88106
[4000]	valid_0's auc: 0.884781
[5000]	valid_0's auc: 0.88816
[6000]	valid_0's auc: 0.890417
[7000]	valid_0's auc: 0.892104
[8000]	valid_0's auc: 0.893773
[9000]	valid_0's auc: 0

In [4]:
result = pd.DataFrame({"ID_code":test_rd["ID_code"].values})

result['target'] = predictions_list[13]

result.to_csv("submission.csv", index=False)