Based on [olivier's script](https://www.kaggle.com/ogrellier/xgb-classifier-upsampling-lb-0-283)

In [1]:
MAX_ROUNDS = 370
OPTIMIZE_ROUNDS = False
LEARNING_RATE = 0.07
EARLY_STOPPING_ROUNDS = 50  
# Note: I set EARLY_STOPPING_ROUNDS high so that (when OPTIMIZE_ROUNDS is set)
#       I will get lots of information to make my own judgment.  You should probably
#       reduce EARLY_STOPPING_ROUNDS if you want to do actual early stopping.

I recommend initially setting <code>MAX_ROUNDS</code> fairly high and using <code>OPTIMIZE_ROUNDS</code> to get an idea of the appropriate number of rounds (which, in my judgment, should be close to the maximum value of  <code>best_ntree_limit</code> among all folds, maybe even a bit higher if your model is adequately regularized...or alternatively, you could set <code>verbose=True</code> and look at the details to try to find a number of rounds that works well for all folds).  Then I would turn off <code>OPTIMIZE_ROUNDS</code> and set <code>MAX_ROUNDS</code> to the appropraite number of total rounds.  

The problem with "early stopping" by choosing the best round for each fold is that it overfits to the validation data.    It's therefore liable not to produce the optimal model for predicting test data, and if it's used to produce validation data for stacking/ensembling with other models, it would cause this one to have too much weight in the ensemble.  Another possibility (and the default for XGBoost, it seems) is to use the round where the early stop actually happens (with the lag that verifies lack of improvement) rather than the best round.  That solves the overfitting problem (provided the lag is long enough), but so far it doesn't seem to have helped.  (I got a worse validation score with 20-round early stopping per fold than with a constant number of rounds for all folds, so the early stopping actually seemed to underfit.)


In [2]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from numba import jit
import time
import gc



In [3]:
# Compute gini

# from CPMP's kernel https://www.kaggle.com/cpmpml/extremely-fast-gini-computation
@jit
def eval_gini(y_true, y_prob):
    y_true = np.asarray(y_true)
    y_true = y_true[np.argsort(y_prob)]
    ntrue = 0
    gini = 0
    delta = 0
    n = len(y_true)
    for i in range(n-1, -1, -1):
        y_i = y_true[i]
        ntrue += y_i
        gini += y_i * delta
        delta += 1 - y_i
    gini = 1 - 2 * gini / (ntrue * (n - ntrue))
    return gini

In [4]:
# Funcitons from olivier's kernel
# https://www.kaggle.com/ogrellier/xgb-classifier-upsampling-lb-0-283

def gini_xgb(preds, dtrain):
    labels = dtrain.get_label()
    gini_score = -eval_gini(labels, preds)
    return [('gini', gini_score)]


def add_noise(series, noise_level):
    return series * (1 + noise_level * np.random.randn(len(series)))


def target_encode(trn_series=None,    # Revised to encode validation series
                  val_series=None,
                  tst_series=None,
                  target=None,
                  min_samples_leaf=1,
                  smoothing=1,
                  noise_level=0):
    """
    Smoothing is computed like in the following paper by Daniele Micci-Barreca
    https://kaggle2.blob.core.windows.net/forum-message-attachments/225952/7441/high%20cardinality%20categoricals.pdf
    trn_series : training categorical feature as a pd.Series
    tst_series : test categorical feature as a pd.Series
    target : target data as a pd.Series
    min_samples_leaf (int) : minimum samples to take category average into account
    smoothing (int) : smoothing effect to balance categorical average vs prior
    """
    assert len(trn_series) == len(target)
    assert trn_series.name == tst_series.name
    temp = pd.concat([trn_series, target], axis=1)
    # Compute target mean
    averages = temp.groupby(by=trn_series.name)[target.name].agg(["mean", "count"])
    # Compute smoothing
    smoothing = 1 / (1 + np.exp(-(averages["count"] - min_samples_leaf) / smoothing))
    # Apply average function to all target data
    prior = target.mean()
    # The bigger the count the less full_avg is taken into account
    averages[target.name] = prior * (1 - smoothing) + averages["mean"] * smoothing
    averages.drop(["mean", "count"], axis=1, inplace=True)
    # Apply averages to trn and tst series
    ft_trn_series = pd.merge(
        trn_series.to_frame(trn_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=trn_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_trn_series.index = trn_series.index
    ft_val_series = pd.merge(
        val_series.to_frame(val_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=val_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_val_series.index = val_series.index
    ft_tst_series = pd.merge(
        tst_series.to_frame(tst_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=tst_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_tst_series.index = tst_series.index
    return add_noise(ft_trn_series, noise_level), add_noise(ft_val_series, noise_level), add_noise(ft_tst_series, noise_level)


In [5]:
# Read data
train_df = pd.read_csv('../data/train.csv', na_values="-1") # .iloc[0:200,:]
test_df = pd.read_csv('../data/test.csv', na_values="-1")

In [6]:
# from olivier
train_features = [
    "ps_car_13",  #            : 1571.65 / shadow  609.23
	"ps_reg_03",  #            : 1408.42 / shadow  511.15
	"ps_ind_05_cat",  #        : 1387.87 / shadow   84.72
	"ps_ind_03",  #            : 1219.47 / shadow  230.55
	"ps_ind_15",  #            :  922.18 / shadow  242.00
	"ps_reg_02",  #            :  920.65 / shadow  267.50
	"ps_car_14",  #            :  798.48 / shadow  549.58
	"ps_car_12",  #            :  731.93 / shadow  293.62
	"ps_car_01_cat",  #        :  698.07 / shadow  178.72
	"ps_car_07_cat",  #        :  694.53 / shadow   36.35
	"ps_ind_17_bin",  #        :  620.77 / shadow   23.15
	"ps_car_03_cat",  #        :  611.73 / shadow   50.67
	"ps_reg_01",  #            :  598.60 / shadow  178.57
	"ps_car_15",  #            :  593.35 / shadow  226.43
	"ps_ind_01",  #            :  547.32 / shadow  154.58
	"ps_ind_16_bin",  #        :  475.37 / shadow   34.17
	"ps_ind_07_bin",  #        :  435.28 / shadow   28.92
	"ps_car_06_cat",  #        :  398.02 / shadow  212.43
	"ps_car_04_cat",  #        :  376.87 / shadow   76.98
	"ps_ind_06_bin",  #        :  370.97 / shadow   36.13
	"ps_car_09_cat",  #        :  214.12 / shadow   81.38
	"ps_car_02_cat",  #        :  203.03 / shadow   26.67
	"ps_ind_02_cat",  #        :  189.47 / shadow   65.68
	"ps_car_11",  #            :  173.28 / shadow   76.45
	"ps_car_05_cat",  #        :  172.75 / shadow   62.92
	"ps_calc_09",  #           :  169.13 / shadow  129.72
	"ps_calc_05",  #           :  148.83 / shadow  120.68
	"ps_ind_08_bin",  #        :  140.73 / shadow   27.63
	"ps_car_08_cat",  #        :  120.87 / shadow   28.82
	"ps_ind_09_bin",  #        :  113.92 / shadow   27.05
	"ps_ind_04_cat",  #        :  107.27 / shadow   37.43
	"ps_ind_18_bin",  #        :   77.42 / shadow   25.97
	"ps_ind_12_bin",  #        :   39.67 / shadow   15.52
	"ps_ind_14",  #            :   37.37 / shadow   16.65
]
# add combinations
combs = [
    ('ps_reg_01', 'ps_car_02_cat'),  
    ('ps_reg_01', 'ps_car_04_cat'),
]

In [7]:
# Process data
id_test = test_df['id'].values
id_train = train_df['id'].values
y = train_df['target']

start = time.time()
for n_c, (f1, f2) in enumerate(combs):
    name1 = f1 + "_plus_" + f2
    print('current feature %60s %4d in %5.1f'
          % (name1, n_c + 1, (time.time() - start) / 60), end='')
    print('\r' * 75, end='')
    train_df[name1] = train_df[f1].apply(lambda x: str(x)) + "_" + train_df[f2].apply(lambda x: str(x))
    test_df[name1] = test_df[f1].apply(lambda x: str(x)) + "_" + test_df[f2].apply(lambda x: str(x))
    # Label Encode
    lbl = LabelEncoder()
    lbl.fit(list(train_df[name1].values) + list(test_df[name1].values))
    train_df[name1] = lbl.transform(list(train_df[name1].values))
    test_df[name1] = lbl.transform(list(test_df[name1].values))

    train_features.append(name1)
    
X = train_df[train_features]
test_df = test_df[train_features]

f_cats = [f for f in X.columns if "_cat" in f]


current feature                                 ps_reg_01_plus_ps_car_04_cat    2 in   0.0

In [8]:
y_valid_pred = 0*y
y_test_pred = 0

In [24]:
# Set up folds
K = 5
kf = KFold(n_splits = K, random_state = 1, shuffle = True)
np.random.seed(0)

In [25]:
# Set up classifier
model = XGBClassifier(    
                        n_estimators=MAX_ROUNDS,
                        max_depth=4,
                        objective="binary:logistic",
                        learning_rate=LEARNING_RATE, 
                        subsample=.8,
                        min_child_weight=.77,
                        colsample_bytree=.8,
                        scale_pos_weight=1.6,
                        gamma=10,
                        reg_alpha=8,
                        reg_lambda=1.3,
                     )

In [15]:
import xgboost as xgb
xgb_params = {}
xgb_params['objective'] = 'binary:logistic'
xgb_params['eta'] = 0.02
xgb_params['silent'] = True
xgb_params['max_depth'] = 6
xgb_params['subsample'] = 0.9
xgb_params['colsample_bytree'] = 0.9

In [26]:
# Run CV

for i, (train_index, test_index) in enumerate(kf.split(train_df)):
    
    # Create data for this fold
    y_train, y_valid = y.iloc[train_index].copy(), y.iloc[test_index]
    X_train, X_valid = X.iloc[train_index,:].copy(), X.iloc[test_index,:].copy()
    X_test = test_df.copy()
    print( "\nFold ", i)
    
    # Enocode data
    for f in f_cats:
        X_train[f + "_avg"], X_valid[f + "_avg"], X_test[f + "_avg"] = target_encode(
                                                        trn_series=X_train[f],
                                                        val_series=X_valid[f],
                                                        tst_series=X_test[f],
                                                        target=y_train,
                                                        min_samples_leaf=200,
                                                        smoothing=10,
                                                        noise_level=0
                                                        )
    
    print('Data processed')
    
    # Run model for this fold
    if OPTIMIZE_ROUNDS:
        eval_set=[(X_valid,y_valid)]
        fit_model = model.fit( X_train, y_train, 
                               eval_set=eval_set,
                               eval_metric=gini_xgb,
                               early_stopping_rounds=EARLY_STOPPING_ROUNDS,
                               verbose=False
                             )
        print( "  Best N trees = ", model.best_ntree_limit )
        print( "  Best gini = ", model.best_score )
    else:
        fit_model = model.fit( X_train, y_train )
        
    # Generate validation predictions for this fold
    pred = fit_model.predict_proba(X_valid)[:,1]
    print( "  Gini = ", eval_gini(y_valid, pred) )
    y_valid_pred.iloc[test_index] = pred
    
    # Accumulate test set predictions
    y_test_pred += fit_model.predict_proba(X_test)[:,1]
    
    del X_test, X_train, X_valid, y_train
    
y_test_pred /= K  # Average test set predictions

print( "\nGini for full training set:" )
eval_gini(y, y_valid_pred)


Fold  0
Data processed
  Gini =  0.28626059699316664


MemoryError: 

In [12]:
# Save validation predictions for stacking/ensembling
val = pd.DataFrame()
val['id'] = id_train
val['target'] = y_valid_pred.values
val.to_csv('xgb_valid.csv', float_format='%.6f', index=False)

In [14]:
# Create submission file
sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_test_pred
sub.to_csv('../submissions/submission_4.csv', float_format='%.6f', index=False)

Notes:<br>
version 16. Baseline best CV=.2832, LB=.282<br>
version 15. Ntree optimization for baseline<br>
version 21. Verbose version of baseline optimization<br>
version 22. Baseline + per-fold early stopping after 20 rounds<br>
version 23. Back to baseline.<br>
version 24. Some parameter tuning.<br>
version 25. Re-published to make it visible.<br>
version 26. A little more tuning.<br>
version 27: More tuning, get rid of upsampling (using  **<code>scale_pos_weight</code>** instead),<br>
                    &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
                    &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
                    Set <code>OPTIMIZE_ROUNDS</code> and <code>verbose</code> temporarily<br>
version 28: <code>MAX_ROUNDS=300</code> as a compromise<br>
version 29: Substantively identical. (Turn off now-irrelevant <code>verbose</code>.)<br>
version 30: Still substantively identical. Some visual cleanup.<br>
version 35. More tuning. CV went up but LB sorts lower (still .283)<br>
version 36. Identical (except turn off irrelevant <code>verbose</code>). Republished to make it visible.<br>
versions 37-40. More tuning (gamma=10, alpha=8). LB .284 (\*end zone dance\*).<br>

In [10]:
# calculate gini, ref: http://shichaoji.com/tag/train-model-predictive-insurance-auto-claim-gini-scikit-learn/
def gini(y_true, y_pred):
    # check and get number of samples
    assert y_true.shape == y_pred.shape
    n_samples = y_true.shape[0]
    
    # sort rows on prediction column 
    # (from largest to smallest)
    arr = np.array([y_true, y_pred]).transpose()
    true_order = arr[arr[:,0].argsort()][::-1,0]
    pred_order = arr[arr[:,1].argsort()][::-1,0]
    
    # get Lorenz curves
    L_true = np.cumsum(true_order) / np.sum(true_order)
    L_pred = np.cumsum(pred_order) / np.sum(pred_order)
    L_ones = np.linspace(1/n_samples, 1, n_samples)
    
    # get Gini coefficients (area between curves)
    G_true = np.sum(L_ones - L_true)
    G_pred = np.sum(L_ones - L_pred)
    
    # normalize to true Gini coefficient
    return G_pred/G_true

In [11]:
# apply xgboost
# ref: http://shichaoji.com/tag/train-model-predictive-insurance-auto-claim-gini-scikit-learn/
def use_gini(a, b):
    y = b.get_label()
    return 'gini', gini(y, a)

In [13]:
import xgboost as xgb
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.33, random_state=42)

X_test = test_df.copy()

for f in f_cats:
    X_train[f + "_avg"], X_valid[f + "_avg"], X_test[f + "_avg"] = target_encode(
                                                    trn_series=X_train[f],
                                                    val_series=X_valid[f],
                                                    tst_series=X_test[f],
                                                    target=y_train,
                                                    min_samples_leaf=200,
                                                    smoothing=10,
                                                    noise_level=0
                                                    )

d_train = xgb.DMatrix(X_train, y_train)
d_valid = xgb.DMatrix(X_valid, y_valid)
d_test = xgb.DMatrix(test_df)

watchlist = [(d_train, 'train'), (d_valid, 'valid')]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [16]:
clf = xgb.train(xgb_params, d_train, 1500, watchlist, early_stopping_rounds=100, 
                verbose_eval=50, 
                feval=use_gini, 
                maximize=True)

[0]	train-gini:0.22151	valid-gini:0.215209
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
[50]	train-gini:0.281506	valid-gini:0.253481
[100]	train-gini:0.297697	valid-gini:0.262274
[150]	train-gini:0.315647	valid-gini:0.267729
[200]	train-gini:0.338565	valid-gini:0.27509
[250]	train-gini:0.359316	valid-gini:0.27895
[300]	train-gini:0.378271	valid-gini:0.282049
[350]	train-gini:0.393837	valid-gini:0.283567
[400]	train-gini:0.407237	valid-gini:0.284701
[450]	train-gini:0.41965	valid-gini:0.285431
[500]	train-gini:0.430462	valid-gini:0.285325
[550]	train-gini:0.440554	valid-gini:0.285343
[600]	train-gini:0.449841	valid-gini:0.284989
Stopping. Best iteration:
[516]	train-gini:0.434145	valid-gini:0.285635



In [18]:
d_test = xgb.DMatrix(X_test)
y_pred_xgb_trained = clf.predict(d_test)

In [29]:
import lightgbm as lgb

lgb_params = {}
lgb_params['learning_rate'] = 0.02
lgb_params['n_estimators'] = 2000
lgb_params['max_depth'] = 4
lgb_params['seed'] = 42
lgb_params['metric'] = 'auc'

lgb_params2 = {}
lgb_params2['learning_rate'] = 0.02
lgb_params2['n_estimators'] = 650
lgb_params2['max_bin'] = 10
lgb_params2['subsample'] = 0.8
lgb_params2['subsample_freq'] = 10
lgb_params2['colsample_bytree'] = 0.8   
lgb_params2['min_child_samples'] = 500
lgb_params2['metric'] = 'auc'
lgb_params2['seed'] = 42

In [26]:
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_valid, label=y_valid)

In [28]:
lgb_clf = lgb.train(lgb_params, train_data, 2500, valid_sets=valid_data, early_stopping_rounds=200)

[1]	valid_0's auc: 0.59773
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's auc: 0.608386
[3]	valid_0's auc: 0.609677
[4]	valid_0's auc: 0.608234
[5]	valid_0's auc: 0.610764
[6]	valid_0's auc: 0.60934
[7]	valid_0's auc: 0.611517
[8]	valid_0's auc: 0.609867
[9]	valid_0's auc: 0.612451
[10]	valid_0's auc: 0.611939
[11]	valid_0's auc: 0.61064
[12]	valid_0's auc: 0.612337
[13]	valid_0's auc: 0.613348
[14]	valid_0's auc: 0.61263
[15]	valid_0's auc: 0.61334
[16]	valid_0's auc: 0.614201
[17]	valid_0's auc: 0.614266
[18]	valid_0's auc: 0.615005
[19]	valid_0's auc: 0.615204
[20]	valid_0's auc: 0.615648
[21]	valid_0's auc: 0.616079
[22]	valid_0's auc: 0.616647
[23]	valid_0's auc: 0.616734
[24]	valid_0's auc: 0.617058
[25]	valid_0's auc: 0.617721
[26]	valid_0's auc: 0.618167
[27]	valid_0's auc: 0.618553
[28]	valid_0's auc: 0.618901
[29]	valid_0's auc: 0.61933
[30]	valid_0's auc: 0.619557
[31]	valid_0's auc: 0.61977
[32]	valid_0's auc: 0.620249
[33]	valid_0's auc: 0.620

[279]	valid_0's auc: 0.638188
[280]	valid_0's auc: 0.638213
[281]	valid_0's auc: 0.638244
[282]	valid_0's auc: 0.638286
[283]	valid_0's auc: 0.638278
[284]	valid_0's auc: 0.638303
[285]	valid_0's auc: 0.638291
[286]	valid_0's auc: 0.638296
[287]	valid_0's auc: 0.638413
[288]	valid_0's auc: 0.638431
[289]	valid_0's auc: 0.638437
[290]	valid_0's auc: 0.638458
[291]	valid_0's auc: 0.638487
[292]	valid_0's auc: 0.638569
[293]	valid_0's auc: 0.638599
[294]	valid_0's auc: 0.638617
[295]	valid_0's auc: 0.638618
[296]	valid_0's auc: 0.638708
[297]	valid_0's auc: 0.638704
[298]	valid_0's auc: 0.638713
[299]	valid_0's auc: 0.638737
[300]	valid_0's auc: 0.63877
[301]	valid_0's auc: 0.638842
[302]	valid_0's auc: 0.638876
[303]	valid_0's auc: 0.638906
[304]	valid_0's auc: 0.638927
[305]	valid_0's auc: 0.638913
[306]	valid_0's auc: 0.638925
[307]	valid_0's auc: 0.638977
[308]	valid_0's auc: 0.639001
[309]	valid_0's auc: 0.639012
[310]	valid_0's auc: 0.63901
[311]	valid_0's auc: 0.638988
[312]	valid_

[559]	valid_0's auc: 0.641597
[560]	valid_0's auc: 0.641598
[561]	valid_0's auc: 0.641587
[562]	valid_0's auc: 0.641613
[563]	valid_0's auc: 0.641617
[564]	valid_0's auc: 0.64161
[565]	valid_0's auc: 0.641621
[566]	valid_0's auc: 0.641618
[567]	valid_0's auc: 0.641613
[568]	valid_0's auc: 0.641603
[569]	valid_0's auc: 0.641618
[570]	valid_0's auc: 0.641614
[571]	valid_0's auc: 0.641636
[572]	valid_0's auc: 0.641651
[573]	valid_0's auc: 0.641666
[574]	valid_0's auc: 0.64167
[575]	valid_0's auc: 0.641669
[576]	valid_0's auc: 0.64166
[577]	valid_0's auc: 0.641667
[578]	valid_0's auc: 0.64168
[579]	valid_0's auc: 0.641687
[580]	valid_0's auc: 0.64169
[581]	valid_0's auc: 0.641707
[582]	valid_0's auc: 0.64169
[583]	valid_0's auc: 0.641691
[584]	valid_0's auc: 0.641704
[585]	valid_0's auc: 0.641682
[586]	valid_0's auc: 0.641673
[587]	valid_0's auc: 0.641686
[588]	valid_0's auc: 0.641698
[589]	valid_0's auc: 0.641663
[590]	valid_0's auc: 0.641664
[591]	valid_0's auc: 0.641688
[592]	valid_0's 

[841]	valid_0's auc: 0.641811
[842]	valid_0's auc: 0.641806
[843]	valid_0's auc: 0.641805
[844]	valid_0's auc: 0.641806
[845]	valid_0's auc: 0.641813
[846]	valid_0's auc: 0.641804
[847]	valid_0's auc: 0.641803
[848]	valid_0's auc: 0.641798
[849]	valid_0's auc: 0.641799
[850]	valid_0's auc: 0.641784
[851]	valid_0's auc: 0.641795
[852]	valid_0's auc: 0.64179
[853]	valid_0's auc: 0.64178
[854]	valid_0's auc: 0.641768
[855]	valid_0's auc: 0.641765
[856]	valid_0's auc: 0.641766
[857]	valid_0's auc: 0.641771
[858]	valid_0's auc: 0.641771
[859]	valid_0's auc: 0.64177
[860]	valid_0's auc: 0.641766
[861]	valid_0's auc: 0.641767
[862]	valid_0's auc: 0.641768
[863]	valid_0's auc: 0.641763
[864]	valid_0's auc: 0.641741
[865]	valid_0's auc: 0.641732
[866]	valid_0's auc: 0.641738
[867]	valid_0's auc: 0.641726
[868]	valid_0's auc: 0.641738
[869]	valid_0's auc: 0.641728
[870]	valid_0's auc: 0.64173
[871]	valid_0's auc: 0.641713
[872]	valid_0's auc: 0.641711
[873]	valid_0's auc: 0.641704
[874]	valid_0'

In [31]:
lgb_clf2 = lgb.train(lgb_params2, train_data, 2500, valid_sets=valid_data, early_stopping_rounds=200)

[1]	valid_0's auc: 0.602898
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's auc: 0.61643
[3]	valid_0's auc: 0.618088
[4]	valid_0's auc: 0.620606
[5]	valid_0's auc: 0.622584
[6]	valid_0's auc: 0.622598
[7]	valid_0's auc: 0.622142
[8]	valid_0's auc: 0.623514
[9]	valid_0's auc: 0.623893
[10]	valid_0's auc: 0.62532
[11]	valid_0's auc: 0.62588
[12]	valid_0's auc: 0.626161
[13]	valid_0's auc: 0.626002
[14]	valid_0's auc: 0.626039
[15]	valid_0's auc: 0.626093
[16]	valid_0's auc: 0.626686
[17]	valid_0's auc: 0.626978
[18]	valid_0's auc: 0.626628
[19]	valid_0's auc: 0.626516
[20]	valid_0's auc: 0.626414
[21]	valid_0's auc: 0.626924
[22]	valid_0's auc: 0.626966
[23]	valid_0's auc: 0.627007
[24]	valid_0's auc: 0.62762
[25]	valid_0's auc: 0.627753
[26]	valid_0's auc: 0.628142
[27]	valid_0's auc: 0.628184
[28]	valid_0's auc: 0.628237
[29]	valid_0's auc: 0.628227
[30]	valid_0's auc: 0.628147
[31]	valid_0's auc: 0.628412
[32]	valid_0's auc: 0.628484
[33]	valid_0's auc: 0.

[280]	valid_0's auc: 0.641055
[281]	valid_0's auc: 0.641038
[282]	valid_0's auc: 0.641065
[283]	valid_0's auc: 0.641123
[284]	valid_0's auc: 0.641132
[285]	valid_0's auc: 0.641101
[286]	valid_0's auc: 0.641134
[287]	valid_0's auc: 0.641116
[288]	valid_0's auc: 0.641151
[289]	valid_0's auc: 0.641191
[290]	valid_0's auc: 0.641172
[291]	valid_0's auc: 0.641198
[292]	valid_0's auc: 0.641217
[293]	valid_0's auc: 0.641249
[294]	valid_0's auc: 0.64123
[295]	valid_0's auc: 0.641264
[296]	valid_0's auc: 0.6413
[297]	valid_0's auc: 0.641342
[298]	valid_0's auc: 0.641328
[299]	valid_0's auc: 0.641342
[300]	valid_0's auc: 0.641347
[301]	valid_0's auc: 0.641389
[302]	valid_0's auc: 0.641404
[303]	valid_0's auc: 0.641416
[304]	valid_0's auc: 0.641432
[305]	valid_0's auc: 0.641458
[306]	valid_0's auc: 0.641454
[307]	valid_0's auc: 0.641513
[308]	valid_0's auc: 0.641519
[309]	valid_0's auc: 0.6415
[310]	valid_0's auc: 0.641566
[311]	valid_0's auc: 0.641554
[312]	valid_0's auc: 0.641568
[313]	valid_0's

[561]	valid_0's auc: 0.642063
[562]	valid_0's auc: 0.64206
[563]	valid_0's auc: 0.642053
[564]	valid_0's auc: 0.642075
[565]	valid_0's auc: 0.642111
[566]	valid_0's auc: 0.642105
[567]	valid_0's auc: 0.642082
[568]	valid_0's auc: 0.642093
[569]	valid_0's auc: 0.642072
[570]	valid_0's auc: 0.642067
[571]	valid_0's auc: 0.642073
[572]	valid_0's auc: 0.64206
[573]	valid_0's auc: 0.642071
[574]	valid_0's auc: 0.642055
[575]	valid_0's auc: 0.642051
[576]	valid_0's auc: 0.642048
[577]	valid_0's auc: 0.642033
[578]	valid_0's auc: 0.642016
[579]	valid_0's auc: 0.641978
[580]	valid_0's auc: 0.641963
[581]	valid_0's auc: 0.641964
[582]	valid_0's auc: 0.641948
[583]	valid_0's auc: 0.641949
[584]	valid_0's auc: 0.641957
[585]	valid_0's auc: 0.641961
[586]	valid_0's auc: 0.641962
[587]	valid_0's auc: 0.641961
[588]	valid_0's auc: 0.641944
[589]	valid_0's auc: 0.641957
[590]	valid_0's auc: 0.641946
[591]	valid_0's auc: 0.641958
[592]	valid_0's auc: 0.641971
[593]	valid_0's auc: 0.642005
[594]	valid_

In [32]:
y_pred_lgb = lgb_clf2.predict(test_df)

In [33]:
# Create submission file
sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_pred_lgb
sub.to_csv('../submissions/submission_5.csv', float_format='%.6f', index=False)

In [22]:
y_pred_avg = (y_pred_xgb_trained + y_test_pred) / 2

In [23]:
sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_pred_avg
sub.to_csv('../submissions/submission_7.csv', float_format='%.6f', index=False)