In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('../data/train.csv')
df_test = pd.read_csv('../data/test.csv')

In [4]:
df.head()

Unnamed: 0,id,bin_0,bin_1,bin_2,bin_3,bin_4,nom_0,nom_1,nom_2,nom_3,...,nom_9,ord_0,ord_1,ord_2,ord_3,ord_4,ord_5,day,month,target
0,0,0.0,0.0,0.0,F,N,Red,Trapezoid,Hamster,Russia,...,02e7c8990,3.0,Contributor,Hot,c,U,Pw,6.0,3.0,0
1,1,1.0,1.0,0.0,F,Y,Red,Star,Axolotl,,...,f37df64af,3.0,Grandmaster,Warm,e,X,pE,7.0,7.0,0
2,2,0.0,1.0,0.0,F,N,Red,,Hamster,Canada,...,,3.0,,Freezing,n,P,eN,5.0,9.0,0
3,3,,0.0,0.0,F,N,Red,Circle,Hamster,Finland,...,f9d456e57,1.0,Novice,Lava Hot,a,C,,3.0,3.0,0
4,4,0.0,,0.0,T,N,Red,Triangle,Hamster,Costa Rica,...,c5361037c,3.0,Grandmaster,Cold,h,C,OZ,5.0,12.0,0


# Preporcess data

In [5]:
from sklearn.model_selection import StratifiedKFold
import category_encoders as ce
import numpy as np
import datetime

mapping_ord1 = {'Unknown': 0, 'Novice': 1, 'Expert': 2, 'Contributor': 3, 'Master': 4, 'Grandmaster': 5}
mapping_ord2 = {'Unknown': 0, 'Freezing': 1, 'Cold': 2, 'Warm': 3, 'Hot': 4, 'Boiling Hot': 5, 'Lava Hot': 6}
mapping_ord3 = dict([(v, i) for i, v in enumerate(sorted(set(df['ord_3'].fillna("0"))))])
mapping_ord4 = dict([(v, i) for i, v in enumerate(sorted(set(df['ord_4'].fillna("0"))))])
mapping_ord5 = dict([(v, i) for i, v in enumerate(sorted(set(df['ord_5'].fillna("0"))))])

def preprocess_data(df):
    df['ord_0'] = df['ord_0'].fillna(0)
    df['ord_1'] =  df['ord_1'].fillna('Unknown').map(mapping_ord1)
    df['ord_2'] =  df['ord_2'].fillna('Unknown').map(mapping_ord2)
    df['ord_3'] =  df['ord_3'].fillna('0').map(mapping_ord3)
    df['ord_4'] =  df['ord_4'].fillna('0').map(mapping_ord4)
    df['ord_5'] =  df['ord_5'].fillna('0').map(mapping_ord5)
    df['bin_3'] = df['bin_3'].fillna('U').map({"T": 1, "F": 0, "U": np.nan})
    df['bin_4'] = df['bin_4'].fillna('U').map({"Y": 1, "N": 0, "U": np.nan})
    return df

def encode(df, df_test, cols):
    cols_enc = list(map(lambda x: x + "_enc", cols))
    for c in cols_enc:
        df[c] = np.nan
        df_test[c] = np.nan
        
    skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    
    for train_idx, oof_idx in skf.split(df, df["target"]):
        enc = ce.TargetEncoder(cols = cols, smoothing=0.3)
        enc.fit(df.loc[train_idx, cols], df.loc[train_idx, "target"])
        df.loc[oof_idx, cols_enc] = enc.transform(df.loc[oof_idx, cols]).values
    
    enc = ce.TargetEncoder(cols = cols, smoothing=0.3)
    enc.fit(df[cols], df["target"])
    df_test[cols_enc] = enc.transform(df_test[cols])
    return df, df_test, cols_enc
    
df = preprocess_data(df)
df_test = preprocess_data(df_test)

binary = ['bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4']
ordinal  = ['ord_0', 'ord_1', 'ord_2', 'ord_3', 'ord_4', 'ord_5']
low_card = ['nom_0', 'nom_1', 'nom_2', 'nom_3', 'nom_4']
high_card = ['nom_5', 'nom_6', 'nom_7', 'nom_8', 'nom_9']
date = ['day', 'month']

df, df_test, cat_enc =  encode(df, df_test, high_card + low_card + date)
# features = high_card + binary + ordinal + cat_enc + low_card + date
features = binary + ordinal + cat_enc

In [6]:
df.head()

Unnamed: 0,id,bin_0,bin_1,bin_2,bin_3,bin_4,nom_0,nom_1,nom_2,nom_3,...,nom_7_enc,nom_8_enc,nom_9_enc,nom_0_enc,nom_1_enc,nom_2_enc,nom_3_enc,nom_4_enc,day_enc,month_enc
0,0,0.0,0.0,0.0,0.0,0.0,Red,Trapezoid,Hamster,Russia,...,0.098296,0.129236,0.177914,0.183158,0.225096,0.167919,0.219491,0.207147,0.200424,0.146475
1,1,1.0,1.0,0.0,0.0,1.0,Red,Star,Axolotl,,...,0.128414,0.191517,0.155502,0.183176,0.130233,0.200552,0.185771,0.179593,0.212892,0.208686
2,2,0.0,1.0,0.0,0.0,0.0,Red,,Hamster,Canada,...,0.266392,0.169176,0.188439,0.183158,0.1819,0.167919,0.159677,0.207147,0.164081,0.213645
3,3,,0.0,0.0,0.0,0.0,Red,Circle,Hamster,Finland,...,0.139367,0.222915,0.271186,0.182968,0.179723,0.168632,0.178692,0.179306,0.163514,0.14627
4,4,0.0,,0.0,1.0,0.0,Red,Triangle,Hamster,Costa Rica,...,0.195893,0.191633,0.261851,0.183176,0.157501,0.167905,0.202839,0.188604,0.163069,0.224929


# Learn model

In [7]:
from sklearn.base import BaseEstimator, TransformerMixin

class ColumnSelector(BaseEstimator, TransformerMixin):
    
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
            return X[self.columns]
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)

In [8]:
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import make_column_transformer
import category_encoders as ce
 
params = {
        'clf__objective': 'binary',
        'clf__boosting_type': 'gbdt', 
        'clf__metric': 'auc',
        'clf__learning_rate': 0.01,
        "clf__bagging_freq": 5,
        "clf__bagging_fraction": 0.8,
        "clf__min_data_in_leaf": 30,
        "clf__min_sum_hessian_in_leaf": 5,
        "clf__use_two_round_loading": False,
        "clf__feature_fraction": 0.8,
        'clf__verbose': 1,
        "clf__lambda_l1": 0.1,
        "clf__n_estimators": 5000,
        "clf__max_depth": 3,
        "clf__reg_alpha": 1,
        "clf__reg_lambda": 1,
        #"enc__a": 0.2
    }

                        
pipeline = Pipeline(steps=[('sel', ColumnSelector(features)), 
                           #('enc', ce.CatBoostEncoder(cols=high_card + low_card + date)),
                           ('clf', LGBMClassifier())]).set_params(**params)

In [9]:
from hyperopt import hp, tpe
from hyperopt.fmin import fmin
from hyperopt.pyll.base import scope
from hyperopt import space_eval
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold


def make_objective(pipeline, df):
    
    def objective(params):
                
        pipeline.set_params(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        
        scores = []
        for train_idx, val_idx in tqdm(skf.split(df, df["target"])):
            
            df_train, df_val = df.loc[train_idx, :], df.loc[val_idx, :]
            transformers = Pipeline(pipeline.steps[:-1]).fit(df_train, df_train['target'])
            X_val, y_val = transformers.transform(df_val), df_val['target']
            
            fit_args = {
                "clf__early_stopping_rounds": 1000, 
                "clf__eval_set": (X_val, y_val),
                "clf__verbose": 500,
                "clf__eval_metric": 'auc',
            }
            
            pipeline.fit(df_train, df_train['target'], **fit_args)
            prob = pipeline.predict_proba(df_val)[:, 1]
            scores.append(roc_auc_score(y_val, prob))
            
        score = np.mean(scores)
        print("AUC {:.5f}+-{:.5f}.\n".format(np.mean(scores), np.std(scores)) + 
              "Params: \n" + 
              "\n".join(["{}: {:.3f}".format(k, params[k]) for k in params]))
        return 1 - score
    
    return objective

space = {
        "clf__learning_rate": hp.loguniform("clf__learning_rate", np.log(0.005), np.log(0.5)),
        'clf__lambda_l1': hp.loguniform('clf__lambda_l1', np.log(0.005), np.log(0.5)),
        "clf__num_leaves": hp.choice("clf__num_leaves", np.linspace(8, 128, 50, dtype=int)),
        "clf__feature_fraction": hp.quniform("clf__feature_fraction", 0.5, 1.0, 0.1),
        "clf__bagging_fraction": hp.quniform("clf__bagging_fraction", 0.5, 1.0, 0.1),
        "clf__min_child_weight": hp.uniform('clf__min_child_weight', 0.5, 10),
        'clf__colsample_bytree': hp.uniform('clf__colsample_bytree', 0.3, 1.0),
    }

best = fmin(fn=make_objective(pipeline, df),
            space=space,
            algo=tpe.suggest,
            max_evals=10)

best_params = space_eval(space, best)

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.781489                         
[1000]	valid_0's auc: 0.785834                        
[1500]	valid_0's auc: 0.786576                        
[2000]	valid_0's auc: 0.786656                        
[2500]	valid_0's auc: 0.786585                        
[3000]	valid_0's auc: 0.786487                        
Early stopping, best iteration is:                    
[2041]	valid_0's auc: 0.786666
  0%|          | 0/10 [00:34<?, ?trial/s, best loss=?]

1it [00:35, 35.83s/it]
[A


Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.783714                         
[1000]	valid_0's auc: 0.788265                        
[1500]	valid_0's auc: 0.788985                        
[2000]	valid_0's auc: 0.789063                        
[2500]	valid_0's auc: 0.789016                        
Early stopping, best iteration is:                    
[1943]	valid_0's auc: 0.789096
  0%|          | 0/10 [01:09<?, ?trial/s, best loss=?]

2it [01:10, 35.62s/it]
[A


Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.784501                         
[1000]	valid_0's auc: 0.788693                        
[1500]	valid_0's auc: 0.78922                         
[2000]	valid_0's auc: 0.789179                        
[2500]	valid_0's auc: 0.789105                        
Early stopping, best iteration is:                    
[1684]	valid_0's auc: 0.789279
  0%|          | 0/10 [01:42<?, ?trial/s, best loss=?]

3it [01:43, 34.61s/it]
[A


Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.781946                         
[1000]	valid_0's auc: 0.786031                        
[1500]	valid_0's auc: 0.786543                        
[2000]	valid_0's auc: 0.786591                        
[2500]	valid_0's auc: 0.786515                        
[3000]	valid_0's auc: 0.786444                        
Early stopping, best iteration is:                    
[2146]	valid_0's auc: 0.786624
  0%|          | 0/10 [02:18<?, ?trial/s, best loss=?]

4it [02:20, 35.29s/it]
[A


Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.781055                         
[1000]	valid_0's auc: 0.785682                        
[1500]	valid_0's auc: 0.786405                        
[2000]	valid_0's auc: 0.786468                        
[2500]	valid_0's auc: 0.786377                        
Early stopping, best iteration is:                    
[1840]	valid_0's auc: 0.7865
  0%|          | 0/10 [02:52<?, ?trial/s, best loss=?]

5it [02:53, 34.63s/it]
[A
5it [02:53, 34.64s/it]


AUC 0.78763+-0.00127.                                 
Params: 
clf__bagging_fraction: 0.700
clf__colsample_bytree: 0.387
clf__feature_fraction: 0.600
clf__lambda_l1: 0.034
clf__learning_rate: 0.026
clf__min_child_weight: 4.014
clf__num_leaves: 101.000
 10%|█         | 1/10 [02:53<25:58, 173.21s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.778392                                                     
[1000]	valid_0's auc: 0.784618                                                    
[1500]	valid_0's auc: 0.786177                                                    
[2000]	valid_0's auc: 0.786529                                                    
[2500]	valid_0's auc: 0.786545                                                    
[3000]	valid_0's auc: 0.786495                                                    
Early stopping, best iteration is:                                                
[2260]	valid_0's auc: 0.786575
 10%|█         | 1/10 [03:30<25:58, 173.21s/trial, best loss: 0.21236703645197186]

1it [00:38, 38.45s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.780253                                                     
[1000]	valid_0's auc: 0.78693                                                     
[1500]	valid_0's auc: 0.788534                                                    
[2000]	valid_0's auc: 0.788918                                                    
[2500]	valid_0's auc: 0.788971                                                    
[3000]	valid_0's auc: 0.788971                                                    
Early stopping, best iteration is:                                                
[2268]	valid_0's auc: 0.788999
 10%|█         | 1/10 [04:09<25:58, 173.21s/trial, best loss: 0.21236703645197186]

2it [01:17, 38.63s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.781064                                                     
[1000]	valid_0's auc: 0.787443                                                    
[1500]	valid_0's auc: 0.788821                                                    
[2000]	valid_0's auc: 0.789104                                                    
[2500]	valid_0's auc: 0.7891                                                      
[3000]	valid_0's auc: 0.789034                                                    
Early stopping, best iteration is:                                                
[2121]	valid_0's auc: 0.789121
 10%|█         | 1/10 [04:49<25:58, 173.21s/trial, best loss: 0.21236703645197186]

3it [01:57, 39.00s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.778513                                                     
[1000]	valid_0's auc: 0.784749                                                    
[1500]	valid_0's auc: 0.786173                                                    
[2000]	valid_0's auc: 0.786445                                                    
[2500]	valid_0's auc: 0.786507                                                    
[3000]	valid_0's auc: 0.786459                                                    
Early stopping, best iteration is:                                                
[2313]	valid_0's auc: 0.786524
 10%|█         | 1/10 [05:29<25:58, 173.21s/trial, best loss: 0.21236703645197186]

4it [02:37, 39.37s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.777585                                                     
[1000]	valid_0's auc: 0.784417                                                    
[1500]	valid_0's auc: 0.786042                                                    
[2000]	valid_0's auc: 0.786389                                                    
[2500]	valid_0's auc: 0.786509                                                    
[3000]	valid_0's auc: 0.786469                                                    
Early stopping, best iteration is:                                                
[2409]	valid_0's auc: 0.786527
 10%|█         | 1/10 [06:09<25:58, 173.21s/trial, best loss: 0.21236703645197186]

5it [03:18, 39.71s/it]
[A
5it [03:18, 39.62s/it]


AUC 0.78755+-0.00123.                                                             
Params: 
clf__bagging_fraction: 0.700
clf__colsample_bytree: 0.309
clf__feature_fraction: 0.800
clf__lambda_l1: 0.239
clf__learning_rate: 0.019
clf__min_child_weight: 3.802
clf__num_leaves: 54.000
 20%|██        | 2/10 [06:11<24:05, 180.69s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.786143                                                     
[1000]	valid_0's auc: 0.785465                                                    
[1500]	valid_0's auc: 0.784904                                                    
Early stopping, best iteration is:                                                
[508]	valid_0's auc: 0.786192
 20%|██        | 2/10 [06:32<24:05, 180.69s/trial, best loss: 0.21236703645197186]

1it [00:21, 21.50s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.78846                                                      
[1000]	valid_0's auc: 0.788132                                                    
Early stopping, best iteration is:                                                
[420]	valid_0's auc: 0.788512
 20%|██        | 2/10 [06:51<24:05, 180.69s/trial, best loss: 0.21236703645197186]

2it [00:40, 20.75s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.788668                                                     
[1000]	valid_0's auc: 0.787897                                                    
[1500]	valid_0's auc: 0.787225                                                    
Early stopping, best iteration is:                                                
[504]	valid_0's auc: 0.788682
 20%|██        | 2/10 [07:11<24:05, 180.69s/trial, best loss: 0.21236703645197186]

3it [01:01, 20.69s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785718                                                     
[1000]	valid_0's auc: 0.785219                                                    
Early stopping, best iteration is:                                                
[467]	valid_0's auc: 0.785756
 20%|██        | 2/10 [07:33<24:05, 180.69s/trial, best loss: 0.21236703645197186]

4it [01:22, 21.00s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785803                                                     
[1000]	valid_0's auc: 0.785411                                                    
Early stopping, best iteration is:                                                
[497]	valid_0's auc: 0.785812
 20%|██        | 2/10 [07:55<24:05, 180.69s/trial, best loss: 0.21236703645197186]

5it [01:44, 21.32s/it]
[A
5it [01:44, 20.97s/it]


AUC 0.78699+-0.00132.                                                             
Params: 
clf__bagging_fraction: 0.800
clf__colsample_bytree: 0.796
clf__feature_fraction: 1.000
clf__lambda_l1: 0.478
clf__learning_rate: 0.101
clf__min_child_weight: 5.890
clf__num_leaves: 105.000
 30%|███       | 3/10 [07:56<18:25, 157.95s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.786366                                                     
[1000]	valid_0's auc: 0.786083                                                    
[1500]	valid_0's auc: 0.785671                                                    
Early stopping, best iteration is:                                                
[602]	valid_0's auc: 0.786402
 30%|███       | 3/10 [08:17<18:25, 157.95s/trial, best loss: 0.21236703645197186]

1it [00:21, 21.43s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.788338                                                     
[1000]	valid_0's auc: 0.788235                                                    
[1500]	valid_0's auc: 0.787875                                                    
Early stopping, best iteration is:                                                
[663]	valid_0's auc: 0.788383
 30%|███       | 3/10 [08:39<18:25, 157.95s/trial, best loss: 0.21236703645197186]

2it [00:43, 21.63s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.788587                                                     
[1000]	valid_0's auc: 0.788238                                                    
Early stopping, best iteration is:                                                
[464]	valid_0's auc: 0.788606
 30%|███       | 3/10 [08:59<18:25, 157.95s/trial, best loss: 0.21236703645197186]

3it [01:03, 21.13s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785869                                                     
[1000]	valid_0's auc: 0.78549                                                     
Early stopping, best iteration is:                                                
[482]	valid_0's auc: 0.785899
 30%|███       | 3/10 [09:18<18:25, 157.95s/trial, best loss: 0.21236703645197186]

4it [01:23, 20.69s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785918                                                     
[1000]	valid_0's auc: 0.78582                                                     
[1500]	valid_0's auc: 0.785308                                                    
Early stopping, best iteration is:                                                
[536]	valid_0's auc: 0.78596
 30%|███       | 3/10 [09:40<18:25, 157.95s/trial, best loss: 0.21236703645197186]

5it [01:44, 20.83s/it]
[A
5it [01:44, 20.86s/it]


AUC 0.78705+-0.00119.                                                             
Params: 
clf__bagging_fraction: 1.000
clf__colsample_bytree: 0.557
clf__feature_fraction: 0.900
clf__lambda_l1: 0.028
clf__learning_rate: 0.090
clf__min_child_weight: 3.152
clf__num_leaves: 49.000
 40%|████      | 4/10 [09:40<14:11, 141.86s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.761926                                                     
[1000]	valid_0's auc: 0.776656                                                    
[1500]	valid_0's auc: 0.781459                                                    
[2000]	valid_0's auc: 0.783803                                                    
[2500]	valid_0's auc: 0.785106                                                    
[3000]	valid_0's auc: 0.785807                                                    
[3500]	valid_0's auc: 0.786165                                                    
[4000]	valid_0's auc: 0.786334                                                    
[4500]	valid_0's auc: 0.786452                                                    
[5000]	valid_0's auc: 0.786506                                                    
Did not meet early stopping. Best iteration is:                                   
[496

1it [01:23, 83.02s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.763862                                                     
[1000]	valid_0's auc: 0.7786                                                      
[1500]	valid_0's auc: 0.783667                                                    
[2000]	valid_0's auc: 0.786201                                                    
[2500]	valid_0's auc: 0.787464                                                    
[3000]	valid_0's auc: 0.788189                                                    
[3500]	valid_0's auc: 0.788585                                                    
[4000]	valid_0's auc: 0.788802                                                    
[4500]	valid_0's auc: 0.788932                                                    
[5000]	valid_0's auc: 0.788985                                                    
Did not meet early stopping. Best iteration is:                                   
[500

2it [02:49, 84.00s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.764543                                                     
[1000]	valid_0's auc: 0.779606                                                    
[1500]	valid_0's auc: 0.784573                                                    
[2000]	valid_0's auc: 0.786815                                                    
[2500]	valid_0's auc: 0.787976                                                    
[3000]	valid_0's auc: 0.788573                                                    
[3500]	valid_0's auc: 0.788865                                                    
[4000]	valid_0's auc: 0.789024                                                    
[4500]	valid_0's auc: 0.789099                                                    
[5000]	valid_0's auc: 0.789124                                                    
Did not meet early stopping. Best iteration is:                                   
[498

3it [04:17, 85.11s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.763206                                                     
[1000]	valid_0's auc: 0.776957                                                    
[1500]	valid_0's auc: 0.781704                                                    
[2000]	valid_0's auc: 0.783949                                                    
[2500]	valid_0's auc: 0.785215                                                    
[3000]	valid_0's auc: 0.785854                                                    
[3500]	valid_0's auc: 0.786179                                                    
[4000]	valid_0's auc: 0.786326                                                    
[4500]	valid_0's auc: 0.786383                                                    
[5000]	valid_0's auc: 0.786393                                                    
Did not meet early stopping. Best iteration is:                                   
[471

4it [05:45, 86.06s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.761118                                                     
[1000]	valid_0's auc: 0.775939                                                    
[1500]	valid_0's auc: 0.780997                                                    
[2000]	valid_0's auc: 0.783625                                                    
[2500]	valid_0's auc: 0.785013                                                    
[3000]	valid_0's auc: 0.785724                                                    
[3500]	valid_0's auc: 0.786078                                                    
[4000]	valid_0's auc: 0.786295                                                    
[4500]	valid_0's auc: 0.786374                                                    
[5000]	valid_0's auc: 0.786402                                                    
Did not meet early stopping. Best iteration is:                                   
[480

5it [07:11, 86.10s/it]
[A
5it [07:11, 86.29s/it]


AUC 0.78749+-0.00128.                                                             
Params: 
clf__bagging_fraction: 0.800
clf__colsample_bytree: 0.675
clf__feature_fraction: 1.000
clf__lambda_l1: 0.084
clf__learning_rate: 0.009
clf__min_child_weight: 3.053
clf__num_leaves: 125.000
 50%|█████     | 5/10 [16:52<19:03, 228.75s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.784856                                                     
[1000]	valid_0's auc: 0.786231                                                    
[1500]	valid_0's auc: 0.786146                                                    
[2000]	valid_0's auc: 0.785941                                                    
Early stopping, best iteration is:                                                
[1135]	valid_0's auc: 0.786274
 50%|█████     | 5/10 [17:29<19:03, 228.75s/trial, best loss: 0.21236703645197186]

1it [00:37, 37.97s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.787166                                                     
[1000]	valid_0's auc: 0.788679                                                    
[1500]	valid_0's auc: 0.788648                                                    
[2000]	valid_0's auc: 0.788499                                                    
Early stopping, best iteration is:                                                
[1413]	valid_0's auc: 0.788706
 50%|█████     | 5/10 [18:09<19:03, 228.75s/trial, best loss: 0.21236703645197186]

2it [01:18, 38.88s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.787811                                                     
[1000]	valid_0's auc: 0.788872                                                    
[1500]	valid_0's auc: 0.788697                                                    
Early stopping, best iteration is:                                                
[960]	valid_0's auc: 0.788879
 50%|█████     | 5/10 [18:43<19:03, 228.75s/trial, best loss: 0.21236703645197186]

3it [01:52, 37.34s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785046                                                     
[1000]	valid_0's auc: 0.78613                                                     
[1500]	valid_0's auc: 0.786018                                                    
Early stopping, best iteration is:                                                
[931]	valid_0's auc: 0.786149
 50%|█████     | 5/10 [19:16<19:03, 228.75s/trial, best loss: 0.21236703645197186]

4it [02:24, 35.74s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.784622                                                     
[1000]	valid_0's auc: 0.786102                                                    
[1500]	valid_0's auc: 0.786083                                                    
[2000]	valid_0's auc: 0.785913                                                    
Early stopping, best iteration is:                                                
[1244]	valid_0's auc: 0.786139
 50%|█████     | 5/10 [19:52<19:03, 228.75s/trial, best loss: 0.21236703645197186]

5it [03:01, 36.00s/it]
[A
5it [03:01, 36.27s/it]


AUC 0.78723+-0.00128.                                                             
Params: 
clf__bagging_fraction: 0.900
clf__colsample_bytree: 0.960
clf__feature_fraction: 1.000
clf__lambda_l1: 0.427
clf__learning_rate: 0.045
clf__min_child_weight: 4.624
clf__num_leaves: 71.000
 60%|██████    | 6/10 [19:53<14:18, 214.54s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.783016                                                     
[1000]	valid_0's auc: 0.781087                                                    
Early stopping, best iteration is:                                                
[153]	valid_0's auc: 0.78446
 60%|██████    | 6/10 [20:11<14:18, 214.54s/trial, best loss: 0.21236703645197186]

1it [00:18, 18.01s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785191                                                     
[1000]	valid_0's auc: 0.783182                                                    
Early stopping, best iteration is:                                                
[183]	valid_0's auc: 0.786363
 60%|██████    | 6/10 [20:29<14:18, 214.54s/trial, best loss: 0.21236703645197186]

2it [00:36, 18.01s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.784883                                                     
[1000]	valid_0's auc: 0.782498                                                    
Early stopping, best iteration is:                                                
[145]	valid_0's auc: 0.786663
 60%|██████    | 6/10 [20:46<14:18, 214.54s/trial, best loss: 0.21236703645197186]

3it [00:52, 17.65s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.782966                                                     
[1000]	valid_0's auc: 0.779928                                                    
Early stopping, best iteration is:                                                
[123]	valid_0's auc: 0.784199
 60%|██████    | 6/10 [21:03<14:18, 214.54s/trial, best loss: 0.21236703645197186]

4it [01:09, 17.47s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.782399                                                     
[1000]	valid_0's auc: 0.780372                                                    
Early stopping, best iteration is:                                                
[193]	valid_0's auc: 0.783915
 60%|██████    | 6/10 [21:22<14:18, 214.54s/trial, best loss: 0.21236703645197186]

5it [01:29, 18.06s/it]
[A
5it [01:29, 17.86s/it]


AUC 0.78512+-0.00115.                                                             
Params: 
clf__bagging_fraction: 0.800
clf__colsample_bytree: 0.885
clf__feature_fraction: 1.000
clf__lambda_l1: 0.006
clf__learning_rate: 0.310
clf__min_child_weight: 9.962
clf__num_leaves: 108.000
 70%|███████   | 7/10 [21:22<08:50, 176.98s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785945                                                     
[1000]	valid_0's auc: 0.785435                                                    
Early stopping, best iteration is:                                                
[456]	valid_0's auc: 0.78599
 70%|███████   | 7/10 [21:43<08:50, 176.98s/trial, best loss: 0.21236703645197186]

1it [00:21, 21.45s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.788376                                                     
[1000]	valid_0's auc: 0.787954                                                    
Early stopping, best iteration is:                                                
[469]	valid_0's auc: 0.788449
 70%|███████   | 7/10 [22:06<08:50, 176.98s/trial, best loss: 0.21236703645197186]

2it [00:43, 21.75s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.788636                                                     
[1000]	valid_0's auc: 0.787978                                                    
[1500]	valid_0's auc: 0.787195                                                    
Early stopping, best iteration is:                                                
[515]	valid_0's auc: 0.788641
 70%|███████   | 7/10 [22:29<08:50, 176.98s/trial, best loss: 0.21236703645197186]

3it [01:06, 22.15s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.786293                                                     
[1000]	valid_0's auc: 0.785462                                                    
Early stopping, best iteration is:                                                
[413]	valid_0's auc: 0.786355
 70%|███████   | 7/10 [22:51<08:50, 176.98s/trial, best loss: 0.21236703645197186]

4it [01:29, 22.15s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785791                                                     
[1000]	valid_0's auc: 0.785384                                                    
Early stopping, best iteration is:                                                
[426]	valid_0's auc: 0.785858
 70%|███████   | 7/10 [23:12<08:50, 176.98s/trial, best loss: 0.21236703645197186]

5it [01:50, 21.91s/it]
[A
5it [01:50, 22.10s/it]


AUC 0.78706+-0.00123.                                                             
Params: 
clf__bagging_fraction: 0.800
clf__colsample_bytree: 0.692
clf__feature_fraction: 0.600
clf__lambda_l1: 0.011
clf__learning_rate: 0.110
clf__min_child_weight: 5.505
clf__num_leaves: 10.000
 80%|████████  | 8/10 [23:13<05:14, 157.05s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.78248                                                      
[1000]	valid_0's auc: 0.785885                                                    
[1500]	valid_0's auc: 0.786311                                                    
[2000]	valid_0's auc: 0.786294                                                    
[2500]	valid_0's auc: 0.786177                                                    
Early stopping, best iteration is:                                                
[1623]	valid_0's auc: 0.786324
 80%|████████  | 8/10 [23:54<05:14, 157.05s/trial, best loss: 0.21236703645197186]

1it [00:42, 42.80s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.784418                                                     
[1000]	valid_0's auc: 0.788209                                                    
[1500]	valid_0's auc: 0.788674                                                    
[2000]	valid_0's auc: 0.788692                                                    
[2500]	valid_0's auc: 0.788644                                                    
Early stopping, best iteration is:                                                
[1801]	valid_0's auc: 0.788728
 80%|████████  | 8/10 [24:46<05:14, 157.05s/trial, best loss: 0.21236703645197186]

2it [01:34, 45.45s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.785307                                                     
[1000]	valid_0's auc: 0.788491                                                    
[1500]	valid_0's auc: 0.788728                                                    
[2000]	valid_0's auc: 0.788623                                                    
Early stopping, best iteration is:                                                
[1469]	valid_0's auc: 0.788741
 80%|████████  | 8/10 [25:31<05:14, 157.05s/trial, best loss: 0.21236703645197186]

3it [02:19, 45.40s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.782602                                                     
[1000]	valid_0's auc: 0.785808                                                    
[1500]	valid_0's auc: 0.786122                                                    
[2000]	valid_0's auc: 0.786089                                                    
[2500]	valid_0's auc: 0.785981                                                    
Early stopping, best iteration is:                                                
[1686]	valid_0's auc: 0.786134
 80%|████████  | 8/10 [26:21<05:14, 157.05s/trial, best loss: 0.21236703645197186]

4it [03:09, 46.67s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.78206                                                      
[1000]	valid_0's auc: 0.785685                                                    
[1500]	valid_0's auc: 0.786091                                                    
[2000]	valid_0's auc: 0.786091                                                    
[2500]	valid_0's auc: 0.786051                                                    
Early stopping, best iteration is:                                                
[1893]	valid_0's auc: 0.786116
 80%|████████  | 8/10 [27:13<05:14, 157.05s/trial, best loss: 0.21236703645197186]

5it [04:01, 48.41s/it]
[A
5it [04:01, 48.37s/it]


AUC 0.78721+-0.00125.                                                             
Params: 
clf__bagging_fraction: 1.000
clf__colsample_bytree: 0.921
clf__feature_fraction: 0.900
clf__lambda_l1: 0.014
clf__learning_rate: 0.030
clf__min_child_weight: 7.126
clf__num_leaves: 64.000
 90%|█████████ | 9/10 [27:15<03:02, 182.49s/trial, best loss: 0.21236703645197186]

0it [00:00, ?it/s]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.766568                                                     
[1000]	valid_0's auc: 0.77885                                                     
[1500]	valid_0's auc: 0.782753                                                    
[2000]	valid_0's auc: 0.784521                                                    
[2500]	valid_0's auc: 0.785404                                                    
[3000]	valid_0's auc: 0.785876                                                    
[3500]	valid_0's auc: 0.786099                                                    
[4000]	valid_0's auc: 0.786223                                                    
[4500]	valid_0's auc: 0.786261                                                    
[5000]	valid_0's auc: 0.786246                                                    
Did not meet early stopping. Best iteration is:                                   
[460

1it [01:32, 92.73s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.768539                                                     
[1000]	valid_0's auc: 0.780574                                                    
[1500]	valid_0's auc: 0.784899                                                    
[2000]	valid_0's auc: 0.786854                                                    
[2500]	valid_0's auc: 0.787838                                                    
[3000]	valid_0's auc: 0.788339                                                    
[3500]	valid_0's auc: 0.788543                                                    
[4000]	valid_0's auc: 0.788666                                                    
[4500]	valid_0's auc: 0.788713                                                    
[5000]	valid_0's auc: 0.788731                                                    
Did not meet early stopping. Best iteration is:                                   
[499

2it [03:06, 93.01s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.769237                                                     
[1000]	valid_0's auc: 0.781871                                                    
[1500]	valid_0's auc: 0.785801                                                    
[2000]	valid_0's auc: 0.787476                                                    
[2500]	valid_0's auc: 0.788285                                                    
[3000]	valid_0's auc: 0.788657                                                    
[3500]	valid_0's auc: 0.788809                                                    
[4000]	valid_0's auc: 0.788859                                                    
[4500]	valid_0's auc: 0.78884                                                     
Early stopping, best iteration is:                                                
[3994]	valid_0's auc: 0.788861
 90%|█████████ | 9/10 [31:45<03:02, 182.49s/trial, best 

3it [04:32, 91.08s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.767507                                                     
[1000]	valid_0's auc: 0.779066                                                    
[1500]	valid_0's auc: 0.782995                                                    
[2000]	valid_0's auc: 0.784749                                                    
[2500]	valid_0's auc: 0.785567                                                    
[3000]	valid_0's auc: 0.785928                                                    
[3500]	valid_0's auc: 0.786119                                                    
[4000]	valid_0's auc: 0.786173                                                    
[4500]	valid_0's auc: 0.78618                                                     
[5000]	valid_0's auc: 0.786148                                                    
Did not meet early stopping. Best iteration is:                                   
[436

4it [05:44, 85.09s/it]
[A


Training until validation scores don't improve for 1000 rounds                    
[500]	valid_0's auc: 0.765501                                                     
[1000]	valid_0's auc: 0.778072                                                    
[1500]	valid_0's auc: 0.782429                                                    
[2000]	valid_0's auc: 0.784385                                                    
[2500]	valid_0's auc: 0.785352                                                    
[3000]	valid_0's auc: 0.785816                                                    
[3500]	valid_0's auc: 0.786035                                                    
[4000]	valid_0's auc: 0.786086                                                    
[4500]	valid_0's auc: 0.786116                                                    
[5000]	valid_0's auc: 0.786124                                                    
Did not meet early stopping. Best iteration is:                                   
[494

5it [06:58, 82.02s/it]
[A
5it [06:58, 83.79s/it]


AUC 0.78723+-0.00128.                                                             
Params: 
clf__bagging_fraction: 1.000
clf__colsample_bytree: 0.532
clf__feature_fraction: 0.900
clf__lambda_l1: 0.059
clf__learning_rate: 0.011
clf__min_child_weight: 7.889
clf__num_leaves: 79.000
100%|██████████| 10/10 [34:14<00:00, 205.42s/trial, best loss: 0.21236703645197186]


In [10]:
print("Best params: \n" + "\n".join(["{}: {:.3f}".format(k, best_params[k]) for k in best_params]))

Best params: 
clf__bagging_fraction: 0.700
clf__colsample_bytree: 0.387
clf__feature_fraction: 0.600
clf__lambda_l1: 0.034
clf__learning_rate: 0.026
clf__min_child_weight: 4.014
clf__num_leaves: 101.000


In [11]:
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm

folds = 20
skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)
pipeline.set_params(**best_params)
df_test['target'] = 0

for train_idx, val_idx in tqdm(skf.split(df, df['target'])):
    df_train = df.loc[train_idx, :]
    df_val = df.loc[val_idx, :]
    X_val, y_val = Pipeline(pipeline.steps[:-1]).fit(df_train, df_train['target'])\
                                                    .transform(df_val), df_val['target']
    fit_args = {
        "clf__early_stopping_rounds": 1000, 
        "clf__eval_set": (X_val, y_val),
        "clf__verbose": 500,
        "clf__eval_metric": 'auc'
    }
        
    pipeline.fit(df_train, df_train['target'], **fit_args)
    prob = pipeline.predict_proba(df_val)[:, 1]
    score = roc_auc_score(y_val, prob)
    
    df_test['target'] += pipeline.predict_proba(df_test)[:, 1]


0it [00:00, ?it/s]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.785194
[1000]	valid_0's auc: 0.788848
[1500]	valid_0's auc: 0.7894
[2000]	valid_0's auc: 0.789405
[2500]	valid_0's auc: 0.789371
Early stopping, best iteration is:
[1835]	valid_0's auc: 0.789476


1it [00:32, 32.88s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.77889
[1000]	valid_0's auc: 0.782994
[1500]	valid_0's auc: 0.78367
[2000]	valid_0's auc: 0.783726
[2500]	valid_0's auc: 0.783602
Early stopping, best iteration is:
[1751]	valid_0's auc: 0.783775


2it [01:06, 33.09s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.78394
[1000]	valid_0's auc: 0.788725
[1500]	valid_0's auc: 0.789631
[2000]	valid_0's auc: 0.789641
[2500]	valid_0's auc: 0.789551
Early stopping, best iteration is:
[1717]	valid_0's auc: 0.789719


3it [01:38, 32.84s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.777847
[1000]	valid_0's auc: 0.782904
[1500]	valid_0's auc: 0.783807
[2000]	valid_0's auc: 0.783842
[2500]	valid_0's auc: 0.783745
Early stopping, best iteration is:
[1853]	valid_0's auc: 0.783906


4it [02:13, 33.53s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.788658
[1000]	valid_0's auc: 0.792444
[1500]	valid_0's auc: 0.792809
[2000]	valid_0's auc: 0.792853
[2500]	valid_0's auc: 0.792822
Early stopping, best iteration is:
[1677]	valid_0's auc: 0.792887


5it [02:47, 33.57s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.781107
[1000]	valid_0's auc: 0.785921
[1500]	valid_0's auc: 0.786659
[2000]	valid_0's auc: 0.786758
[2500]	valid_0's auc: 0.786712
[3000]	valid_0's auc: 0.786703
Early stopping, best iteration is:
[2075]	valid_0's auc: 0.786778


6it [03:25, 34.96s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.783794
[1000]	valid_0's auc: 0.788154
[1500]	valid_0's auc: 0.788799
[2000]	valid_0's auc: 0.788914
[2500]	valid_0's auc: 0.788829
[3000]	valid_0's auc: 0.788767
Early stopping, best iteration is:
[2243]	valid_0's auc: 0.788943


7it [04:03, 35.70s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.78102
[1000]	valid_0's auc: 0.786465
[1500]	valid_0's auc: 0.787692
[2000]	valid_0's auc: 0.787872
[2500]	valid_0's auc: 0.787856
Early stopping, best iteration is:
[1992]	valid_0's auc: 0.787878


8it [04:37, 35.24s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.780475
[1000]	valid_0's auc: 0.784553
[1500]	valid_0's auc: 0.785371
[2000]	valid_0's auc: 0.785608
[2500]	valid_0's auc: 0.785577
[3000]	valid_0's auc: 0.78552
Early stopping, best iteration is:
[2078]	valid_0's auc: 0.785647


9it [05:12, 35.17s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.783502
[1000]	valid_0's auc: 0.787422
[1500]	valid_0's auc: 0.788076
[2000]	valid_0's auc: 0.78816
[2500]	valid_0's auc: 0.788075
Early stopping, best iteration is:
[1696]	valid_0's auc: 0.788216


10it [05:43, 34.08s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.788298
[1000]	valid_0's auc: 0.792683
[1500]	valid_0's auc: 0.793134
[2000]	valid_0's auc: 0.793132
[2500]	valid_0's auc: 0.793037
Early stopping, best iteration is:
[1580]	valid_0's auc: 0.793202


11it [06:13, 32.69s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.786053
[1000]	valid_0's auc: 0.79039
[1500]	valid_0's auc: 0.790767
[2000]	valid_0's auc: 0.790648
[2500]	valid_0's auc: 0.79049
Early stopping, best iteration is:
[1577]	valid_0's auc: 0.790811


12it [06:42, 31.71s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.778166
[1000]	valid_0's auc: 0.782141
[1500]	valid_0's auc: 0.782637
[2000]	valid_0's auc: 0.782508
[2500]	valid_0's auc: 0.782329
Early stopping, best iteration is:
[1554]	valid_0's auc: 0.782675


13it [07:11, 30.95s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.778647
[1000]	valid_0's auc: 0.783359
[1500]	valid_0's auc: 0.784328
[2000]	valid_0's auc: 0.784521
[2500]	valid_0's auc: 0.784476
[3000]	valid_0's auc: 0.784416
Early stopping, best iteration is:
[2145]	valid_0's auc: 0.784527


14it [07:47, 32.41s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.788142
[1000]	valid_0's auc: 0.791678
[1500]	valid_0's auc: 0.792259
[2000]	valid_0's auc: 0.792254
[2500]	valid_0's auc: 0.792138
Early stopping, best iteration is:
[1851]	valid_0's auc: 0.792294


15it [08:20, 32.51s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.782776
[1000]	valid_0's auc: 0.786762
[1500]	valid_0's auc: 0.787092
[2000]	valid_0's auc: 0.787047
Early stopping, best iteration is:
[1470]	valid_0's auc: 0.787114


16it [08:48, 31.20s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.782107
[1000]	valid_0's auc: 0.786685
[1500]	valid_0's auc: 0.787651
[2000]	valid_0's auc: 0.787777
[2500]	valid_0's auc: 0.7878
[3000]	valid_0's auc: 0.787726
Early stopping, best iteration is:
[2389]	valid_0's auc: 0.787842


17it [09:26, 33.31s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.780768
[1000]	valid_0's auc: 0.785591
[1500]	valid_0's auc: 0.786307
[2000]	valid_0's auc: 0.786327
[2500]	valid_0's auc: 0.786239
Early stopping, best iteration is:
[1636]	valid_0's auc: 0.786432


18it [09:56, 32.33s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.778824
[1000]	valid_0's auc: 0.782711
[1500]	valid_0's auc: 0.783298
[2000]	valid_0's auc: 0.783272
[2500]	valid_0's auc: 0.783147
Early stopping, best iteration is:
[1624]	valid_0's auc: 0.783326


19it [10:26, 31.60s/it]

Training until validation scores don't improve for 1000 rounds
[500]	valid_0's auc: 0.783127
[1000]	valid_0's auc: 0.788
[1500]	valid_0's auc: 0.788778
[2000]	valid_0's auc: 0.78878
[2500]	valid_0's auc: 0.788732
Early stopping, best iteration is:
[1677]	valid_0's auc: 0.788879


20it [11:02, 33.11s/it]


In [12]:
# df_test['target'] /= folds
df_test[['id', 'target']].to_csv('../data/submission.csv', index=False)
!head -n 5 ../data/submission.csv

id,target
600000,0.12409889844630224
600001,0.2602588890606694
600002,0.1844708873922944
600003,0.11899949368513063
