In [1]:
import warnings
warnings.filterwarnings("ignore")
import joblib
import gc
import multiprocessing as mp
import numpy as np
import pandas as pd
import _pickle as cpickle
import pickle
from datetime import datetime
import time
from pandas import HDFStore
import lightgbm as lgb
from lightgbm.sklearn import LGBMClassifier
import xgboost
from xgboost.sklearn import XGBClassifier
import xgboost as xgb
from operator import itemgetter
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import check_cv, train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, roc_auc_score, log_loss, auc

In [2]:
train = pd.read_pickle("data/final_train.pkl")
x_train = train.drop(['order_id'], axis=1)
del train;
gc.collect()
test = pd.read_pickle("data/final_test.pkl")
x_test = test.drop(["order_id"], axis=1)
labels = pd.read_pickle("data/final_labels.pkl")

In [3]:
x_train.shape, x_test.shape

((8474661, 79), (4833292, 78))

In [4]:
class CustomStackingClassifier:
    def __init__(self, estimators, random_state, params, nround, 
                 version, loop=3,
                 valid_size=0.05, stratify=True, verbose=1,
                 early_stopping=60, use_probas=True):
        self.clf = estimators
        self.mod=cpickle
        self.loop = loop
        self.params = params
        self.nround = nround    
        self.version = version
        self.valid_size = valid_size
        self.verbose = verbose
        self.random_state = random_state
        self.early_stopping = early_stopping
        self.models = []


    def split_build_valid(self, train_user, X_train, y_train):
        train_user['is_valid'] = np.random.choice(
            [0,1],
            size=len(train_user),
            p=[1-self.valid_size, self.valid_size])

        valid_n = train_user['is_valid'].sum()
        build_n = (train_user.shape[0] - valid_n)
        
        print('build user:{}, valid user:{}'.format(build_n, valid_n))
        valid_user = train_user[train_user['is_valid']==1].user_id
        is_valid = X_train.user_id.isin(valid_user)
        
        dbuild = lgb.Dataset(X_train[~is_valid].drop('user_id', axis=1),
                             y_train[~is_valid],
                             categorical_feature=['product_id', 'aisle_id', 'department_id'])
        dvalid = lgb.Dataset(X_train[is_valid].drop('user_id', axis=1),
                             label=y_train[is_valid],
                             categorical_feature=['product_id', 'aisle_id', 'department_id'])
        watchlist_set = [dbuild, dvalid]
        watchlist_name = ['build', 'valid']
        
        print('FINAL SHAPE')
        print('dbuild.shape:{}  dvalid.shape:{}\n'.format(
            dbuild.data.shape,
            dvalid.data.shape))
        return dbuild, dvalid, watchlist_set, watchlist_name

    def fit(self, x, y):
        np.random.seed(self.random_state)
        train_user = x[['user_id']].drop_duplicates()

        for i in range(self.loop):
            dbuild, dvalid, watchlist_set, watchlist_name = self.split_build_valid(train_user, x, y)
            gc.collect();

            # Train models
            model = lgb.train(
                self.params,
                dbuild,
                self.nround,
                watchlist_set,
                watchlist_name,
                early_stopping_rounds=self.early_stopping,
                categorical_feature=['product_id', 'aisle_id', 'department_id'],
                verbose_eval=5)
            joblib.dump(model, "lgb_models/lgb_trained_{}_{}".format(self.version, i))
            self.models.append(model)
            del [dbuild, dvalid, watchlist_set, watchlist_name];
            gc.collect();
        del train_user;
        gc.collect()
        return self


    def predict(self, x, test_data):
#         dtest  = lgb.Dataset(x)
        sub_test = test_data[['order_id', 'product_id']]
        sub_test['yhat'] = 0
        for model in self.models:
            sub_test['yhat'] += model.predict(x)
        sub_test['yhat'] /= self.loop
        return sub_test

In [5]:
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': {'binary_logloss', 'auc'},
    'num_leaves': 256,
    'min_sum_hessian_in_leaf':20,
    'max_depth': 12,
    'learning_rate': 0.05,
    'feature_fraction': 0.6,
    # 'bagging_fraction': 0.9,
    # 'bagging_freq': 3,
    'verbose': 1
}

cscf_1 = CustomStackingClassifier(lgb, 71, params, 10000, 1)
cscf_2 = CustomStackingClassifier(lgb, 72, params, 10000, 2)
cscf_3 = CustomStackingClassifier(lgb, 73, params, 10000, 3)

In [6]:
cscf_1.fit(x_train, labels)
stack1 = cscf_1.predict(x_test, test)
stack1.to_csv("data/lgb_stack1.csv", index=False)

build user:124543, valid user:6666
FINAL SHAPE
dbuild.shape:(8044708, 78)  dvalid.shape:(429953, 78)

[LightGBM] [Info] Number of positive: 787000, number of negative: 7257708
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28196
[LightGBM] [Info] Number of data points in the train set: 8044708, number of used features: 78
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.097828 -> initscore=-2.221591
[LightGBM] [Info] Start training from score -2.221591
Training until validation scores don't improve for 60 rounds
[5]	build's auc: 0.832155	build's binary_logloss: 0.288275	valid's auc: 0.828539	valid's binary_logloss: 0.288105
[10]	build's auc: 0.833872	build's binary_logloss: 0.27253	valid's auc: 0.829417	valid's binary_logloss: 0.272974
[15]	build's auc: 0.835195	build's binary_logloss: 0.262692	valid's auc: 0.830561	valid's binary_logloss: 0.263553
[20]	build's auc: 0.836732	build's binary_logloss: 0.256103	valid's auc: 0.831398	valid's binary_l

[180]	build's auc: 0.852833	build's binary_logloss: 0.234994	valid's auc: 0.836539	valid's binary_logloss: 0.243113
[185]	build's auc: 0.853011	build's binary_logloss: 0.234881	valid's auc: 0.836586	valid's binary_logloss: 0.243085
[190]	build's auc: 0.853135	build's binary_logloss: 0.234807	valid's auc: 0.836599	valid's binary_logloss: 0.243078
[195]	build's auc: 0.853287	build's binary_logloss: 0.234702	valid's auc: 0.836614	valid's binary_logloss: 0.243066
[200]	build's auc: 0.853366	build's binary_logloss: 0.234651	valid's auc: 0.836602	valid's binary_logloss: 0.24307
[205]	build's auc: 0.853462	build's binary_logloss: 0.234596	valid's auc: 0.836598	valid's binary_logloss: 0.243074
[210]	build's auc: 0.853542	build's binary_logloss: 0.234545	valid's auc: 0.836595	valid's binary_logloss: 0.243073
[215]	build's auc: 0.853656	build's binary_logloss: 0.234477	valid's auc: 0.836599	valid's binary_logloss: 0.243071
[220]	build's auc: 0.853732	build's binary_logloss: 0.234426	valid's auc:

[300]	build's auc: 0.855418	build's binary_logloss: 0.23341	valid's auc: 0.836623	valid's binary_logloss: 0.243036
[305]	build's auc: 0.855534	build's binary_logloss: 0.23334	valid's auc: 0.836638	valid's binary_logloss: 0.24303
[310]	build's auc: 0.855644	build's binary_logloss: 0.233272	valid's auc: 0.836645	valid's binary_logloss: 0.243026
[315]	build's auc: 0.855683	build's binary_logloss: 0.23325	valid's auc: 0.836636	valid's binary_logloss: 0.243029
[320]	build's auc: 0.855773	build's binary_logloss: 0.233198	valid's auc: 0.836639	valid's binary_logloss: 0.243028
[325]	build's auc: 0.855842	build's binary_logloss: 0.23316	valid's auc: 0.836629	valid's binary_logloss: 0.243031
Early stopping, best iteration is:
[265]	build's auc: 0.854775	build's binary_logloss: 0.233776	valid's auc: 0.836659	valid's binary_logloss: 0.243025
build user:124789, valid user:6420
FINAL SHAPE
dbuild.shape:(8055139, 78)  dvalid.shape:(419522, 78)

[LightGBM] [Info] Number of positive: 788611, number of 

[145]	build's auc: 0.851517	build's binary_logloss: 0.235952	valid's auc: 0.836768	valid's binary_logloss: 0.24002
[150]	build's auc: 0.851672	build's binary_logloss: 0.235857	valid's auc: 0.836781	valid's binary_logloss: 0.240012
[155]	build's auc: 0.851816	build's binary_logloss: 0.235777	valid's auc: 0.836778	valid's binary_logloss: 0.240014
[160]	build's auc: 0.852006	build's binary_logloss: 0.235659	valid's auc: 0.836812	valid's binary_logloss: 0.239994
[165]	build's auc: 0.852153	build's binary_logloss: 0.235564	valid's auc: 0.836839	valid's binary_logloss: 0.239975
[170]	build's auc: 0.852344	build's binary_logloss: 0.235442	valid's auc: 0.836859	valid's binary_logloss: 0.23996
[175]	build's auc: 0.852544	build's binary_logloss: 0.2353	valid's auc: 0.836921	valid's binary_logloss: 0.239924
[180]	build's auc: 0.852683	build's binary_logloss: 0.235208	valid's auc: 0.836963	valid's binary_logloss: 0.239902
[185]	build's auc: 0.852856	build's binary_logloss: 0.2351	valid's auc: 0.83

[260]	build's auc: 0.854472	build's binary_logloss: 0.234088	valid's auc: 0.837171	valid's binary_logloss: 0.239777
[265]	build's auc: 0.854637	build's binary_logloss: 0.233985	valid's auc: 0.837216	valid's binary_logloss: 0.239753
[270]	build's auc: 0.854707	build's binary_logloss: 0.233943	valid's auc: 0.837222	valid's binary_logloss: 0.239749
[275]	build's auc: 0.854785	build's binary_logloss: 0.233896	valid's auc: 0.837225	valid's binary_logloss: 0.239746
[280]	build's auc: 0.85487	build's binary_logloss: 0.233849	valid's auc: 0.837223	valid's binary_logloss: 0.239748
[285]	build's auc: 0.854909	build's binary_logloss: 0.233825	valid's auc: 0.837216	valid's binary_logloss: 0.239751
[290]	build's auc: 0.855068	build's binary_logloss: 0.233734	valid's auc: 0.837247	valid's binary_logloss: 0.239733
[295]	build's auc: 0.855214	build's binary_logloss: 0.233654	valid's auc: 0.837245	valid's binary_logloss: 0.239736
[300]	build's auc: 0.855288	build's binary_logloss: 0.233612	valid's auc:

[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.097855 -> initscore=-2.221293
[LightGBM] [Info] Start training from score -2.221293
Training until validation scores don't improve for 60 rounds
[5]	build's auc: 0.832222	build's binary_logloss: 0.28834	valid's auc: 0.826337	valid's binary_logloss: 0.286949
[10]	build's auc: 0.833999	build's binary_logloss: 0.272572	valid's auc: 0.827232	valid's binary_logloss: 0.271941
[15]	build's auc: 0.835391	build's binary_logloss: 0.262715	valid's auc: 0.828495	valid's binary_logloss: 0.262635
[20]	build's auc: 0.836827	build's binary_logloss: 0.256124	valid's auc: 0.829378	valid's binary_logloss: 0.256613
[25]	build's auc: 0.838721	build's binary_logloss: 0.251446	valid's auc: 0.830054	valid's binary_logloss: 0.252596
[30]	build's auc: 0.839358	build's binary_logloss: 0.248424	valid's auc: 0.83062	valid's binary_logloss: 0.249912
[35]	build's auc: 0.840492	build's binary_logloss: 0.246109	valid's auc: 0.831133	valid's binary_logloss: 0.24804
[40]

[195]	build's auc: 0.853338	build's binary_logloss: 0.234737	valid's auc: 0.834827	valid's binary_logloss: 0.242756
[200]	build's auc: 0.853418	build's binary_logloss: 0.234686	valid's auc: 0.83483	valid's binary_logloss: 0.242759
[205]	build's auc: 0.853511	build's binary_logloss: 0.234627	valid's auc: 0.834827	valid's binary_logloss: 0.242758
[210]	build's auc: 0.853583	build's binary_logloss: 0.234579	valid's auc: 0.83484	valid's binary_logloss: 0.242748
[215]	build's auc: 0.853707	build's binary_logloss: 0.234501	valid's auc: 0.834869	valid's binary_logloss: 0.242732
[220]	build's auc: 0.853825	build's binary_logloss: 0.234434	valid's auc: 0.834877	valid's binary_logloss: 0.242729
[225]	build's auc: 0.853958	build's binary_logloss: 0.234351	valid's auc: 0.834899	valid's binary_logloss: 0.24272
[230]	build's auc: 0.854121	build's binary_logloss: 0.234252	valid's auc: 0.834924	valid's binary_logloss: 0.242711
[235]	build's auc: 0.854214	build's binary_logloss: 0.234195	valid's auc: 0

[310]	build's auc: 0.855711	build's binary_logloss: 0.233283	valid's auc: 0.835024	valid's binary_logloss: 0.242655
[315]	build's auc: 0.855757	build's binary_logloss: 0.233256	valid's auc: 0.83501	valid's binary_logloss: 0.242662
[320]	build's auc: 0.855834	build's binary_logloss: 0.233209	valid's auc: 0.835014	valid's binary_logloss: 0.242661
[325]	build's auc: 0.855886	build's binary_logloss: 0.23318	valid's auc: 0.835005	valid's binary_logloss: 0.242666
[330]	build's auc: 0.855976	build's binary_logloss: 0.233126	valid's auc: 0.835007	valid's binary_logloss: 0.242664
[335]	build's auc: 0.856032	build's binary_logloss: 0.233094	valid's auc: 0.834994	valid's binary_logloss: 0.242671
[340]	build's auc: 0.856108	build's binary_logloss: 0.233044	valid's auc: 0.834995	valid's binary_logloss: 0.242671
[345]	build's auc: 0.85618	build's binary_logloss: 0.233003	valid's auc: 0.834994	valid's binary_logloss: 0.242672
[350]	build's auc: 0.856319	build's binary_logloss: 0.232918	valid's auc: 0

[420]	build's auc: 0.857734	build's binary_logloss: 0.232077	valid's auc: 0.835057	valid's binary_logloss: 0.242631
[425]	build's auc: 0.857792	build's binary_logloss: 0.232043	valid's auc: 0.835057	valid's binary_logloss: 0.24263
[430]	build's auc: 0.857835	build's binary_logloss: 0.23202	valid's auc: 0.835041	valid's binary_logloss: 0.242638
[435]	build's auc: 0.857876	build's binary_logloss: 0.231996	valid's auc: 0.835036	valid's binary_logloss: 0.242642
[440]	build's auc: 0.857962	build's binary_logloss: 0.231944	valid's auc: 0.835033	valid's binary_logloss: 0.242647
[445]	build's auc: 0.858052	build's binary_logloss: 0.231892	valid's auc: 0.83503	valid's binary_logloss: 0.24265
[450]	build's auc: 0.858095	build's binary_logloss: 0.231866	valid's auc: 0.835021	valid's binary_logloss: 0.242655
[455]	build's auc: 0.858233	build's binary_logloss: 0.231791	valid's auc: 0.835018	valid's binary_logloss: 0.242657
Early stopping, best iteration is:
[395]	build's auc: 0.857247	build's binar

<__main__.CustomStackingClassifier at 0x7f1e42c5c0b8>

In [7]:
cscf_2.fit(x_train, labels)
stack1 = cscf_2.predict(x_test, test)
stack1.to_csv("data/lgb_stack2.csv", index=False)

build user:124537, valid user:6672
FINAL SHAPE
dbuild.shape:(8045207, 78)  dvalid.shape:(429454, 78)

[LightGBM] [Info] Number of positive: 786258, number of negative: 7258949
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 28101
[LightGBM] [Info] Number of data points in the train set: 8045207, number of used features: 78
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.097730 -> initscore=-2.222705
[LightGBM] [Info] Start training from score -2.222705
Training until validation scores don't improve for 60 rounds
[5]	build's auc: 0.831897	build's binary_logloss: 0.288141	valid's auc: 0.83094	valid's binary_logloss: 0.291103
[10]	build's auc: 0.833682	build's binary_logloss: 0.272414	valid's auc: 0.831621	valid's binary_logloss: 0.275492
[15]	build's auc: 0.835056	build's binary_logloss: 0.262588	valid's auc: 0.832834	valid's binary_logloss: 0.265739
[20]	build's auc: 0.836564	build's binary_logloss: 0.25601	valid's auc: 0.83378	valid's binary_log

[185]	build's auc: 0.85289	build's binary_logloss: 0.234794	valid's auc: 0.839393	valid's binary_logloss: 0.244142
[190]	build's auc: 0.853008	build's binary_logloss: 0.234721	valid's auc: 0.839407	valid's binary_logloss: 0.244135
[195]	build's auc: 0.85314	build's binary_logloss: 0.234628	valid's auc: 0.83943	valid's binary_logloss: 0.24412
[200]	build's auc: 0.853224	build's binary_logloss: 0.234575	valid's auc: 0.839437	valid's binary_logloss: 0.244116
[205]	build's auc: 0.853311	build's binary_logloss: 0.234521	valid's auc: 0.839446	valid's binary_logloss: 0.244109
[210]	build's auc: 0.853383	build's binary_logloss: 0.234475	valid's auc: 0.839462	valid's binary_logloss: 0.244096
[215]	build's auc: 0.853511	build's binary_logloss: 0.234399	valid's auc: 0.839483	valid's binary_logloss: 0.244083
[220]	build's auc: 0.853579	build's binary_logloss: 0.234355	valid's auc: 0.839492	valid's binary_logloss: 0.244078
[225]	build's auc: 0.853696	build's binary_logloss: 0.234283	valid's auc: 0.

[300]	build's auc: 0.855228	build's binary_logloss: 0.233358	valid's auc: 0.839651	valid's binary_logloss: 0.243977
[305]	build's auc: 0.855335	build's binary_logloss: 0.233287	valid's auc: 0.839675	valid's binary_logloss: 0.243963
[310]	build's auc: 0.855444	build's binary_logloss: 0.233226	valid's auc: 0.839685	valid's binary_logloss: 0.24396
[315]	build's auc: 0.855483	build's binary_logloss: 0.233204	valid's auc: 0.839676	valid's binary_logloss: 0.243963
[320]	build's auc: 0.855551	build's binary_logloss: 0.233162	valid's auc: 0.839679	valid's binary_logloss: 0.243962
[325]	build's auc: 0.855608	build's binary_logloss: 0.233131	valid's auc: 0.839679	valid's binary_logloss: 0.243964
[330]	build's auc: 0.855707	build's binary_logloss: 0.233075	valid's auc: 0.839675	valid's binary_logloss: 0.243966
[335]	build's auc: 0.855758	build's binary_logloss: 0.233048	valid's auc: 0.83967	valid's binary_logloss: 0.24397
[340]	build's auc: 0.855838	build's binary_logloss: 0.232994	valid's auc: 0

[410]	build's auc: 0.857289	build's binary_logloss: 0.232147	valid's auc: 0.839729	valid's binary_logloss: 0.243919
[415]	build's auc: 0.857359	build's binary_logloss: 0.232106	valid's auc: 0.839731	valid's binary_logloss: 0.243916
[420]	build's auc: 0.857443	build's binary_logloss: 0.232054	valid's auc: 0.83974	valid's binary_logloss: 0.243912
[425]	build's auc: 0.857509	build's binary_logloss: 0.232015	valid's auc: 0.839746	valid's binary_logloss: 0.24391
[430]	build's auc: 0.857547	build's binary_logloss: 0.231992	valid's auc: 0.839742	valid's binary_logloss: 0.243913
[435]	build's auc: 0.857612	build's binary_logloss: 0.231955	valid's auc: 0.839734	valid's binary_logloss: 0.243918
[440]	build's auc: 0.857705	build's binary_logloss: 0.231897	valid's auc: 0.839739	valid's binary_logloss: 0.243916
[445]	build's auc: 0.857781	build's binary_logloss: 0.231854	valid's auc: 0.839736	valid's binary_logloss: 0.243912
[450]	build's auc: 0.857851	build's binary_logloss: 0.231816	valid's auc: 

[50]	build's auc: 0.844006	build's binary_logloss: 0.241422	valid's auc: 0.833216	valid's binary_logloss: 0.250345
[55]	build's auc: 0.844569	build's binary_logloss: 0.240657	valid's auc: 0.83347	valid's binary_logloss: 0.249831
[60]	build's auc: 0.845385	build's binary_logloss: 0.239939	valid's auc: 0.833714	valid's binary_logloss: 0.249455
[65]	build's auc: 0.846114	build's binary_logloss: 0.239339	valid's auc: 0.833954	valid's binary_logloss: 0.249158
[70]	build's auc: 0.846916	build's binary_logloss: 0.238742	valid's auc: 0.834164	valid's binary_logloss: 0.248907
[75]	build's auc: 0.847329	build's binary_logloss: 0.238368	valid's auc: 0.834414	valid's binary_logloss: 0.248696
[80]	build's auc: 0.847725	build's binary_logloss: 0.238019	valid's auc: 0.83464	valid's binary_logloss: 0.248501
[85]	build's auc: 0.848165	build's binary_logloss: 0.237698	valid's auc: 0.834797	valid's binary_logloss: 0.248371
[90]	build's auc: 0.848498	build's binary_logloss: 0.23744	valid's auc: 0.8349	val

[205]	build's auc: 0.853348	build's binary_logloss: 0.234429	valid's auc: 0.835889	valid's binary_logloss: 0.247656
[210]	build's auc: 0.853444	build's binary_logloss: 0.23437	valid's auc: 0.835897	valid's binary_logloss: 0.247649
[215]	build's auc: 0.853584	build's binary_logloss: 0.23429	valid's auc: 0.835902	valid's binary_logloss: 0.247645
[220]	build's auc: 0.853671	build's binary_logloss: 0.234236	valid's auc: 0.835911	valid's binary_logloss: 0.247641
[225]	build's auc: 0.853822	build's binary_logloss: 0.234144	valid's auc: 0.835909	valid's binary_logloss: 0.247643
[230]	build's auc: 0.853972	build's binary_logloss: 0.234056	valid's auc: 0.835923	valid's binary_logloss: 0.247635
[235]	build's auc: 0.854078	build's binary_logloss: 0.233989	valid's auc: 0.83594	valid's binary_logloss: 0.247624
[240]	build's auc: 0.854153	build's binary_logloss: 0.233944	valid's auc: 0.835938	valid's binary_logloss: 0.247627
[245]	build's auc: 0.854244	build's binary_logloss: 0.23389	valid's auc: 0.

[320]	build's auc: 0.85571	build's binary_logloss: 0.233019	valid's auc: 0.83604	valid's binary_logloss: 0.247562
[325]	build's auc: 0.855768	build's binary_logloss: 0.232988	valid's auc: 0.836033	valid's binary_logloss: 0.247567
[330]	build's auc: 0.855863	build's binary_logloss: 0.232934	valid's auc: 0.836026	valid's binary_logloss: 0.24757
[335]	build's auc: 0.855908	build's binary_logloss: 0.232908	valid's auc: 0.836017	valid's binary_logloss: 0.247574
[340]	build's auc: 0.855996	build's binary_logloss: 0.232855	valid's auc: 0.836025	valid's binary_logloss: 0.24757
[345]	build's auc: 0.856061	build's binary_logloss: 0.232816	valid's auc: 0.836021	valid's binary_logloss: 0.247574
[350]	build's auc: 0.856202	build's binary_logloss: 0.232729	valid's auc: 0.836022	valid's binary_logloss: 0.247573
[355]	build's auc: 0.856368	build's binary_logloss: 0.232621	valid's auc: 0.836045	valid's binary_logloss: 0.247556
[360]	build's auc: 0.856447	build's binary_logloss: 0.232575	valid's auc: 0.

[430]	build's auc: 0.857811	build's binary_logloss: 0.231787	valid's auc: 0.836091	valid's binary_logloss: 0.247529
[435]	build's auc: 0.857853	build's binary_logloss: 0.231763	valid's auc: 0.836087	valid's binary_logloss: 0.247531
[440]	build's auc: 0.857919	build's binary_logloss: 0.231726	valid's auc: 0.83609	valid's binary_logloss: 0.247529
[445]	build's auc: 0.858002	build's binary_logloss: 0.23168	valid's auc: 0.836087	valid's binary_logloss: 0.247529
[450]	build's auc: 0.858081	build's binary_logloss: 0.231639	valid's auc: 0.836074	valid's binary_logloss: 0.247536
[455]	build's auc: 0.858206	build's binary_logloss: 0.231565	valid's auc: 0.83609	valid's binary_logloss: 0.247527
[460]	build's auc: 0.858317	build's binary_logloss: 0.231498	valid's auc: 0.836083	valid's binary_logloss: 0.247532
[465]	build's auc: 0.858397	build's binary_logloss: 0.231446	valid's auc: 0.83608	valid's binary_logloss: 0.247533
Early stopping, best iteration is:
[409]	build's auc: 0.857494	build's binar

[125]	build's auc: 0.850595	build's binary_logloss: 0.23637	valid's auc: 0.840463	valid's binary_logloss: 0.240906
[130]	build's auc: 0.850784	build's binary_logloss: 0.236254	valid's auc: 0.840513	valid's binary_logloss: 0.240875
[135]	build's auc: 0.851008	build's binary_logloss: 0.236114	valid's auc: 0.840548	valid's binary_logloss: 0.240854
[140]	build's auc: 0.851186	build's binary_logloss: 0.23601	valid's auc: 0.840559	valid's binary_logloss: 0.240846
[145]	build's auc: 0.851382	build's binary_logloss: 0.235885	valid's auc: 0.840576	valid's binary_logloss: 0.240837
[150]	build's auc: 0.851536	build's binary_logloss: 0.235791	valid's auc: 0.840577	valid's binary_logloss: 0.240836
[155]	build's auc: 0.851701	build's binary_logloss: 0.235699	valid's auc: 0.840563	valid's binary_logloss: 0.240842
[160]	build's auc: 0.851906	build's binary_logloss: 0.235571	valid's auc: 0.840612	valid's binary_logloss: 0.240815
[165]	build's auc: 0.852059	build's binary_logloss: 0.235475	valid's auc: 

[250]	build's auc: 0.854195	build's binary_logloss: 0.234131	valid's auc: 0.840863	valid's binary_logloss: 0.240631
[255]	build's auc: 0.854328	build's binary_logloss: 0.234052	valid's auc: 0.840871	valid's binary_logloss: 0.240627
[260]	build's auc: 0.854402	build's binary_logloss: 0.234008	valid's auc: 0.840861	valid's binary_logloss: 0.240632
[265]	build's auc: 0.854575	build's binary_logloss: 0.233903	valid's auc: 0.840891	valid's binary_logloss: 0.240614
[270]	build's auc: 0.854656	build's binary_logloss: 0.233858	valid's auc: 0.840886	valid's binary_logloss: 0.240616
[275]	build's auc: 0.854736	build's binary_logloss: 0.233809	valid's auc: 0.840883	valid's binary_logloss: 0.240619
[280]	build's auc: 0.854812	build's binary_logloss: 0.233764	valid's auc: 0.84088	valid's binary_logloss: 0.240618
[285]	build's auc: 0.854867	build's binary_logloss: 0.233733	valid's auc: 0.840875	valid's binary_logloss: 0.24062
[290]	build's auc: 0.855022	build's binary_logloss: 0.233644	valid's auc: 

[360]	build's auc: 0.856331	build's binary_logloss: 0.23288	valid's auc: 0.840913	valid's binary_logloss: 0.240584
[365]	build's auc: 0.856495	build's binary_logloss: 0.232784	valid's auc: 0.840935	valid's binary_logloss: 0.24057
[370]	build's auc: 0.856533	build's binary_logloss: 0.232763	valid's auc: 0.840924	valid's binary_logloss: 0.240575
[375]	build's auc: 0.85663	build's binary_logloss: 0.2327	valid's auc: 0.840916	valid's binary_logloss: 0.240577
[380]	build's auc: 0.856711	build's binary_logloss: 0.232654	valid's auc: 0.840916	valid's binary_logloss: 0.240577
[385]	build's auc: 0.856752	build's binary_logloss: 0.23263	valid's auc: 0.840908	valid's binary_logloss: 0.240579
[390]	build's auc: 0.856905	build's binary_logloss: 0.232544	valid's auc: 0.840911	valid's binary_logloss: 0.240579
[395]	build's auc: 0.856983	build's binary_logloss: 0.232495	valid's auc: 0.840906	valid's binary_logloss: 0.240579
[400]	build's auc: 0.857062	build's binary_logloss: 0.232449	valid's auc: 0.84

[465]	build's auc: 0.858184	build's binary_logloss: 0.231795	valid's auc: 0.840916	valid's binary_logloss: 0.240578
Early stopping, best iteration is:
[409]	build's auc: 0.857312	build's binary_logloss: 0.232297	valid's auc: 0.840944	valid's binary_logloss: 0.240559


<__main__.CustomStackingClassifier at 0x7f1e42c5c0f0>

In [8]:
cscf_3.fit(x_train, labels)
stack1 = cscf_3.predict(x_test, test)
stack1.to_csv("data/lgb_stack3.csv", index=False)

build user:124611, valid user:6598
FINAL SHAPE
dbuild.shape:(8048078, 78)  dvalid.shape:(426583, 78)

[LightGBM] [Info] Number of positive: 787676, number of negative: 7260402
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28108
[LightGBM] [Info] Number of data points in the train set: 8048078, number of used features: 78
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.097871 -> initscore=-2.221104
[LightGBM] [Info] Start training from score -2.221104
Training until validation scores don't improve for 60 rounds
[5]	build's auc: 0.83199	build's binary_logloss: 0.28841	valid's auc: 0.8311	valid's binary_logloss: 0.286016
[10]	build's auc: 0.833687	build's binary_logloss: 0.272682	valid's auc: 0.831861	valid's binary_logloss: 0.270738
[15]	build's auc: 0.835032	build's binary_logloss: 0.262839	valid's auc: 0.833232	valid's binary_logloss: 0.261145
[20]	build's auc: 0.836549	build's bi

[185]	build's auc: 0.852882	build's binary_logloss: 0.235002	valid's auc: 0.839512	valid's binary_logloss: 0.239991
[190]	build's auc: 0.853006	build's binary_logloss: 0.234929	valid's auc: 0.839527	valid's binary_logloss: 0.23998
[195]	build's auc: 0.853164	build's binary_logloss: 0.23482	valid's auc: 0.839538	valid's binary_logloss: 0.239965
[200]	build's auc: 0.853259	build's binary_logloss: 0.23476	valid's auc: 0.839541	valid's binary_logloss: 0.239961
[205]	build's auc: 0.853352	build's binary_logloss: 0.234712	valid's auc: 0.839563	valid's binary_logloss: 0.239952
[210]	build's auc: 0.853424	build's binary_logloss: 0.234663	valid's auc: 0.839567	valid's binary_logloss: 0.239949
[215]	build's auc: 0.853539	build's binary_logloss: 0.234592	valid's auc: 0.839594	valid's binary_logloss: 0.239933
[220]	build's auc: 0.853634	build's binary_logloss: 0.234536	valid's auc: 0.839598	valid's binary_logloss: 0.23993
[225]	build's auc: 0.853762	build's binary_logloss: 0.234458	valid's auc: 0.

[300]	build's auc: 0.855326	build's binary_logloss: 0.233525	valid's auc: 0.83973	valid's binary_logloss: 0.23985
[305]	build's auc: 0.855447	build's binary_logloss: 0.233452	valid's auc: 0.839732	valid's binary_logloss: 0.239845
[310]	build's auc: 0.855564	build's binary_logloss: 0.23338	valid's auc: 0.839726	valid's binary_logloss: 0.23985
[315]	build's auc: 0.855603	build's binary_logloss: 0.233358	valid's auc: 0.839721	valid's binary_logloss: 0.239853
[320]	build's auc: 0.855695	build's binary_logloss: 0.233306	valid's auc: 0.839722	valid's binary_logloss: 0.239851
[325]	build's auc: 0.855739	build's binary_logloss: 0.233281	valid's auc: 0.839717	valid's binary_logloss: 0.239853
[330]	build's auc: 0.855839	build's binary_logloss: 0.23322	valid's auc: 0.83973	valid's binary_logloss: 0.239845
[335]	build's auc: 0.855894	build's binary_logloss: 0.233192	valid's auc: 0.839714	valid's binary_logloss: 0.239851
[340]	build's auc: 0.855981	build's binary_logloss: 0.233142	valid's auc: 0.83

[410]	build's auc: 0.857443	build's binary_logloss: 0.232263	valid's auc: 0.839757	valid's binary_logloss: 0.239812
[415]	build's auc: 0.857523	build's binary_logloss: 0.232218	valid's auc: 0.83974	valid's binary_logloss: 0.239819
[420]	build's auc: 0.857624	build's binary_logloss: 0.232159	valid's auc: 0.839736	valid's binary_logloss: 0.23982
[425]	build's auc: 0.857689	build's binary_logloss: 0.23212	valid's auc: 0.839741	valid's binary_logloss: 0.239818
[430]	build's auc: 0.857728	build's binary_logloss: 0.232099	valid's auc: 0.839738	valid's binary_logloss: 0.23982
[435]	build's auc: 0.857765	build's binary_logloss: 0.232077	valid's auc: 0.839729	valid's binary_logloss: 0.239825
[440]	build's auc: 0.857851	build's binary_logloss: 0.232024	valid's auc: 0.839726	valid's binary_logloss: 0.239824
[445]	build's auc: 0.857939	build's binary_logloss: 0.231974	valid's auc: 0.839723	valid's binary_logloss: 0.239825
[450]	build's auc: 0.858005	build's binary_logloss: 0.231939	valid's auc: 0.

[125]	build's auc: 0.850754	build's binary_logloss: 0.236212	valid's auc: 0.837351	valid's binary_logloss: 0.243511
[130]	build's auc: 0.850933	build's binary_logloss: 0.236098	valid's auc: 0.837391	valid's binary_logloss: 0.24349
[135]	build's auc: 0.851163	build's binary_logloss: 0.235948	valid's auc: 0.83748	valid's binary_logloss: 0.243442
[140]	build's auc: 0.851345	build's binary_logloss: 0.235844	valid's auc: 0.837498	valid's binary_logloss: 0.243428
[145]	build's auc: 0.851536	build's binary_logloss: 0.235729	valid's auc: 0.83751	valid's binary_logloss: 0.243421
[150]	build's auc: 0.85167	build's binary_logloss: 0.235648	valid's auc: 0.837504	valid's binary_logloss: 0.243422
[155]	build's auc: 0.851851	build's binary_logloss: 0.235548	valid's auc: 0.837491	valid's binary_logloss: 0.243431
[160]	build's auc: 0.852075	build's binary_logloss: 0.235404	valid's auc: 0.837551	valid's binary_logloss: 0.243397
[165]	build's auc: 0.852217	build's binary_logloss: 0.235319	valid's auc: 0.

[245]	build's auc: 0.854261	build's binary_logloss: 0.234054	valid's auc: 0.837787	valid's binary_logloss: 0.243239
[250]	build's auc: 0.854369	build's binary_logloss: 0.233984	valid's auc: 0.837813	valid's binary_logloss: 0.243224
[255]	build's auc: 0.854498	build's binary_logloss: 0.233907	valid's auc: 0.837831	valid's binary_logloss: 0.243213
[260]	build's auc: 0.854581	build's binary_logloss: 0.233858	valid's auc: 0.837825	valid's binary_logloss: 0.243218
[265]	build's auc: 0.854758	build's binary_logloss: 0.233746	valid's auc: 0.837853	valid's binary_logloss: 0.243207
[270]	build's auc: 0.854826	build's binary_logloss: 0.233707	valid's auc: 0.837853	valid's binary_logloss: 0.243208
[275]	build's auc: 0.854905	build's binary_logloss: 0.23366	valid's auc: 0.837827	valid's binary_logloss: 0.24322
[280]	build's auc: 0.854991	build's binary_logloss: 0.233609	valid's auc: 0.837832	valid's binary_logloss: 0.24322
[285]	build's auc: 0.855032	build's binary_logloss: 0.233587	valid's auc: 0

[55]	build's auc: 0.844401	build's binary_logloss: 0.241022	valid's auc: 0.837211	valid's binary_logloss: 0.242337
[60]	build's auc: 0.845194	build's binary_logloss: 0.240319	valid's auc: 0.837494	valid's binary_logloss: 0.241949
[65]	build's auc: 0.845924	build's binary_logloss: 0.23971	valid's auc: 0.837755	valid's binary_logloss: 0.241629
[70]	build's auc: 0.846719	build's binary_logloss: 0.239122	valid's auc: 0.837983	valid's binary_logloss: 0.241373
[75]	build's auc: 0.847142	build's binary_logloss: 0.238738	valid's auc: 0.838256	valid's binary_logloss: 0.241136
[80]	build's auc: 0.847569	build's binary_logloss: 0.238369	valid's auc: 0.838509	valid's binary_logloss: 0.240927
[85]	build's auc: 0.848014	build's binary_logloss: 0.238043	valid's auc: 0.838681	valid's binary_logloss: 0.240791
[90]	build's auc: 0.848346	build's binary_logloss: 0.237789	valid's auc: 0.83882	valid's binary_logloss: 0.240686
[95]	build's auc: 0.848705	build's binary_logloss: 0.237547	valid's auc: 0.838905	

[210]	build's auc: 0.853287	build's binary_logloss: 0.234724	valid's auc: 0.839781	valid's binary_logloss: 0.240052
[215]	build's auc: 0.853405	build's binary_logloss: 0.234653	valid's auc: 0.839801	valid's binary_logloss: 0.240041
[220]	build's auc: 0.853487	build's binary_logloss: 0.234604	valid's auc: 0.839793	valid's binary_logloss: 0.240043
[225]	build's auc: 0.853628	build's binary_logloss: 0.234515	valid's auc: 0.839816	valid's binary_logloss: 0.240029
[230]	build's auc: 0.853784	build's binary_logloss: 0.23442	valid's auc: 0.839837	valid's binary_logloss: 0.240015
[235]	build's auc: 0.853892	build's binary_logloss: 0.234351	valid's auc: 0.839854	valid's binary_logloss: 0.240002
[240]	build's auc: 0.853966	build's binary_logloss: 0.234308	valid's auc: 0.83985	valid's binary_logloss: 0.240003
[245]	build's auc: 0.854039	build's binary_logloss: 0.234262	valid's auc: 0.839851	valid's binary_logloss: 0.239998
[250]	build's auc: 0.854152	build's binary_logloss: 0.23419	valid's auc: 0

[320]	build's auc: 0.855519	build's binary_logloss: 0.23338	valid's auc: 0.839963	valid's binary_logloss: 0.239938
[325]	build's auc: 0.855572	build's binary_logloss: 0.233349	valid's auc: 0.83995	valid's binary_logloss: 0.239945
[330]	build's auc: 0.855672	build's binary_logloss: 0.233292	valid's auc: 0.839944	valid's binary_logloss: 0.239948
[335]	build's auc: 0.855728	build's binary_logloss: 0.233263	valid's auc: 0.839942	valid's binary_logloss: 0.23995
[340]	build's auc: 0.855811	build's binary_logloss: 0.23321	valid's auc: 0.839945	valid's binary_logloss: 0.239949
[345]	build's auc: 0.855882	build's binary_logloss: 0.233167	valid's auc: 0.839943	valid's binary_logloss: 0.23995
[350]	build's auc: 0.856017	build's binary_logloss: 0.233084	valid's auc: 0.83997	valid's binary_logloss: 0.239936
[355]	build's auc: 0.856181	build's binary_logloss: 0.232986	valid's auc: 0.84	valid's binary_logloss: 0.239919
[360]	build's auc: 0.856262	build's binary_logloss: 0.232941	valid's auc: 0.840005

[435]	build's auc: 0.857584	build's binary_logloss: 0.232148	valid's auc: 0.840034	valid's binary_logloss: 0.239901
[440]	build's auc: 0.857658	build's binary_logloss: 0.232104	valid's auc: 0.840035	valid's binary_logloss: 0.239901
[445]	build's auc: 0.857736	build's binary_logloss: 0.232051	valid's auc: 0.840029	valid's binary_logloss: 0.239901
[450]	build's auc: 0.857788	build's binary_logloss: 0.232023	valid's auc: 0.840026	valid's binary_logloss: 0.239904
[455]	build's auc: 0.857898	build's binary_logloss: 0.231959	valid's auc: 0.840039	valid's binary_logloss: 0.2399
[460]	build's auc: 0.858	build's binary_logloss: 0.231899	valid's auc: 0.840028	valid's binary_logloss: 0.239903
[465]	build's auc: 0.858112	build's binary_logloss: 0.231833	valid's auc: 0.840017	valid's binary_logloss: 0.239907
[470]	build's auc: 0.858203	build's binary_logloss: 0.231787	valid's auc: 0.840011	valid's binary_logloss: 0.239907
[475]	build's auc: 0.858275	build's binary_logloss: 0.231744	valid's auc: 0.8

<__main__.CustomStackingClassifier at 0x7f1e42c5c128>