## This Notebook trains various LGBM Models for each of the six outcomes.
## It also trains models with both class weights, and with no class weights

In [1]:
import  numpy as np
import pandas as pd
import statsmodels.api as sm

from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeCV

from sklearn.preprocessing import QuantileTransformer, quantile_transform

from sklearn.metrics import median_absolute_error, r2_score, mean_squared_error
# ....

from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt
# import lightgbm as lgb 

from lightgbm import LGBMRegressor,LGBMClassifier
from sklearn.pipeline import Pipeline, make_pipeline

from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV
from datetime import datetime
from sklearn import preprocessing 
import pickle
import joblib



%matplotlib inline

In [2]:
# Load cleaned data
with open('cleaned_data_label_encoding.pickle', 'rb') as handle:
    background_imputed_tot = pickle.load(handle)
    
with open('X_train_label_encoding.pickle', 'rb') as handle:
    X_train = pickle.load(handle)

with open('X_CV_label_encoding.pickle', 'rb') as handle:
    X_CV = pickle.load(handle)

with open('x_test_label_encoding.pickle', 'rb') as handle:
    x_test = pickle.load(handle)
    
with open('x_leaderboard_label_encoding.pickle', 'rb') as handle:
    x_leaderboard = pickle.load(handle)
    
with open('y_train_label_encoding.pickle', 'rb') as handle:
    y_train = pickle.load(handle)
    
with open('y_CV_label_encoding.pickle', 'rb') as handle:
    y_CV = pickle.load(handle)
    
background = pd.read_csv('FFChallenge_v5/background.csv', sep=',', header=0,index_col=0,low_memory=False)
# train.csv contains 2,121 rows (one per child in the training set) and 7 columns.
train = pd.read_csv('FFChallenge_v5/train.csv', sep=',', header=0, index_col=0,low_memory=False)
########### Holdout dataset for internal testing only
test = pd.read_csv('test.csv',header=0, index_col=0,low_memory=False)
leaderboard = pd.read_csv('leaderboard.csv', header=0, index_col=0,low_memory=False)
leaderboard = leaderboard.dropna()

In [4]:
# Final stats and useful variables associated with each column

numerical_columns = [c for c,v in background_imputed_tot.dtypes.iteritems() if v in [np.float,np.int,np.int64]]
non_numerical_columns = [c for c,v in background_imputed_tot.dtypes.iteritems() if v not in [np.float,np.int,np.int64]]


# The categorical columns are the intersection of both the non_numerical columns and the ones that have less than 15 distinct levels
# categorical_cols_lst = list(set(categorical_bools.columns).union(set(non_numerical_columns)))

continuous_cols_lst = list()
continuous_cols_lst = background_imputed_tot.T.loc[(background_imputed_tot.apply(pd.Series.nunique) >= 15).values==True].index.to_list()

non_continuous_lst = set(background_imputed_tot.columns.tolist())-set(continuous_cols_lst)


print('Number of continuous columns %s' % len(continuous_cols_lst))
# background = background[numerical_columns]
# background.head()

Number of continuous columns 601


### The Following cell trains three LGBM models: GBDT, DART and GOSS using no class weights. The models trained with class weights are provided in cells below this one
### The hyperparameters of each model are tuned using 20 iterations of bayesian optimization, with repeated cross validation and repeated stratified cross validation with 3 folds and 2 repeats.
### Early stopping is used to help minimize overfitting

In [5]:
from datetime import datetime 
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV
from datetime import datetime
from sklearn.model_selection import RepeatedKFold,RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization


variables = test.columns.to_list()


prediction = pd.read_csv('FFChallenge_v5/prediction.csv', sep=',', header=0, index_col=0)

best_params_lgbm = dict()


# Bayesian optimization function for regression-based models
def optimize_gbm_regressor(data, targets,boosting_type):
    """Apply Bayesian Optimization to GBM parameters."""

    def gbm_crossval(num_leaves, max_depth, learning_rate,
                     n_estimators,num_iteration,subsample,min_child_samples,
                    colsample_bytree,reg_alpha,reg_lambda,):
        """GB cross validation.
        """
        # Parameters to be optimized
        clf = LGBMRegressor(
            num_leaves=int(num_leaves),
            max_depth=int(max_depth),
            learning_rate=learning_rate,
            n_estimators=int(n_estimators),
            num_iteration=int(num_iteration),
            boosting_type = boosting_type,
            subsample = subsample,
            min_child_samples = int(min_child_samples),
            colsample_bytree=colsample_bytree,
            reg_alpha = reg_alpha,
            reg_lambda = reg_lambda,
        )
        cv = RepeatedKFold(n_splits=3, n_repeats=2, random_state=1)
        cval = cross_val_score(clf, data, targets, scoring='neg_mean_squared_error', cv=cv,n_jobs=-1)
        print(cval)
        return cval.mean()
    
    # Parameters to be optimized
    optimizer = BayesianOptimization(
        f=gbm_crossval,
        pbounds={
            'num_leaves': (2, 30),
            'max_depth': (1, 15),
            'learning_rate': (0.01, 0.2),
            'n_estimators': (10,1000),
            'num_iteration':(50,200),
            'subsample': (0.5,1.0),
            'min_child_samples' : (10, 200),
            'colsample_bytree':(0.5,1),
            'reg_alpha': (0.0, 1.0),
            'reg_lambda': (0.0, 1.0),
        },
        random_state=12345,
        verbose=3
    )
    optimizer.maximize(n_iter=15, init_points=5)

    print("Final result:", optimizer.max)
    return optimizer.max


# Bayesian optimization function for classification-based models
def optimize_gbm_classifier(data, targets,boosting_type):
    """Apply Bayesian Optimization to GBM parameters."""
#     self.boosting_type = boosting_type

    def gbm_crossval(num_leaves, max_depth, learning_rate,
                     n_estimators,num_iteration,subsample,min_child_samples,
                    colsample_bytree,reg_alpha,reg_lambda,):
        """GB cross validation.
        """
        clf = LGBMClassifier(
        num_leaves=int(num_leaves),
        max_depth=int(max_depth),
        learning_rate=learning_rate,
        n_estimators=int(n_estimators),
        num_iteration=int(num_iteration),
        boosting_type = boosting_type,
        subsample = subsample,
        min_child_samples = int(min_child_samples),
        colsample_bytree=colsample_bytree,
        reg_alpha = reg_alpha,
        reg_lambda = reg_lambda,
        )
        cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=2, random_state=123)
        cval = cross_val_score(clf, data, targets, scoring='neg_brier_score', cv=cv,n_jobs=-1)
#         print(cval)
        return cval.mean()
    
    # Parameters to be optimized
    optimizer = BayesianOptimization(
        f=gbm_crossval,
        pbounds={
            'num_leaves': (2, 30),
            'max_depth': (1, 15),
            'learning_rate': (0.01, 0.2),
            'n_estimators': (10,1000),
            'num_iteration':(50,200),
            'subsample': (0.5,1.0),
            'min_child_samples' : (10, 200),
            'colsample_bytree':(0.5,1),
            'reg_alpha': (0.0, 1.0),
            'reg_lambda': (0.0, 1.0),
        },
        random_state=12345,
        verbose=3
    )
    optimizer.maximize(n_iter=15, init_points=5)

    print("Final result:", optimizer.max)
    return optimizer.max


classification_vars = ['eviction','layoff','jobTraining']




boosting_types = ['gbdt','dart','goss'] # List of the three boosting types

best_params_each_model = dict()

accuracies_each_model = dict()

timings_each_model = dict()
for boosting_type in boosting_types: # Go through all three of the different boosting types
    print('------------- ',boosting_type, ' --------------')
    print('\n')
    lgbm_accuracies = dict()
    lgbm_timings = dict()
    for variable in variables: # Go through all the variable outcomes
#     for variable in ['grit','layoff']:
    # variable = 'eviction'
        print(variable)
        start=datetime.now()
        # Let's drop instances where the response is NA
        y_train_no_na = y_train[variable].dropna()
        X_train_no_na = X_train.loc[y_train_no_na.index.values]

        y_CV_no_na = y_CV[variable].dropna()
        X_CV_no_na = X_CV.loc[y_CV_no_na.index.values]

        mask = leaderboard[variable].isna()
        y_leaderboard = leaderboard[variable]

        #training our model using light gbm

        # specify parameters and distributions to sample from

        # If the outcomes are continuous use the regressor. Otherwise use the classifier
        if variable not in classification_vars:
            print(variable)
            # Run bayesian optimization to get optimal hyperparameters
            params = optimize_gbm_regressor(X_train_no_na, y_train_no_na,boosting_type)
            best_params = params['params']  
            print(1)

            # train a new model using the best hyperparameters 
            lgbm=LGBMRegressor(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                   n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                        num_iteration = int(best_params['num_iteration']),
                        boosting_type=boosting_type, 
                        subsample = best_params['subsample'],
                        min_child_samples = int(best_params['min_child_samples']),
                        colsample_bytree=best_params['colsample_bytree'],
                        reg_alpha = best_params['reg_alpha'],
                        reg_lambda = best_params['reg_lambda'],
                               silent = True)

            # I used early stopping to help prevent overfitting the model on the training set (stops the model once it's performance doesn't
            # improve for 5 iterations on the validation set)
            lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2',
             early_stopping_rounds = 5)

            
            # For early stopping (i.e. early stopping at 10 rounds with no improvement to cv)
            # The model that displayed the score 10 rounds ago does not exist anymore (a few trees have been dropped).
            # To get the best score you got 20 rounds ago, you have no option but to retrain a model with the appropriate number of iterations.
#             https://www.kaggle.com/c/microsoft-malware-prediction/discussion/78253
            if boosting_type=='dart':
        
                evaluation_score = lgbm.evals_result_

                dart_best_iteration = evaluation_score['valid_0']['l2'].index(min(evaluation_score['valid_0']['l2']))+1
            
                
                lgbm=LGBMRegressor(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                           n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                                num_iteration = dart_best_iteration,
                                boosting_type='dart', 
                                subsample = best_params['subsample'],
                                min_child_samples = int(best_params['min_child_samples']),
                                colsample_bytree=best_params['colsample_bytree'],
                                reg_alpha = best_params['reg_alpha'],
                                reg_lambda = best_params['reg_lambda'],
                                       silent = True)
                lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2')

            # Predictions on the training, cv, leaderboard and test set
            y_pred_train = lgbm.predict(X_train_no_na)
            y_pred_all = lgbm.predict(x_leaderboard)
            cv_preds = lgbm.predict(X_CV_no_na)
            # Save predictions of best model for leaderboard submition
            pred_all_backgrounds = lgbm.predict(background_imputed_tot)

        else:
            # Run bayesian optimization
            params = optimize_gbm_classifier(X_train_no_na, y_train_no_na,boosting_type)
            best_params = params['params']  
            print(0)
            # Validation accuracy
            
            # train a new model using the best hyperparameters 

            lgbm=LGBMClassifier(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                       n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                        num_iteration = int(best_params['num_iteration']),
                        boosting_type=boosting_type,
                        subsample = best_params['subsample'],
                        min_child_samples = int(best_params['min_child_samples']),
                        colsample_bytree=best_params['colsample_bytree'],
                        reg_alpha = best_params['reg_alpha'],
                        reg_lambda = best_params['reg_lambda'],   
                        silent = True)
    #         lgbm.fit(X_train_no_na, y_train_no_na,eval_metric='l1')
            lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2',
                 early_stopping_rounds = 10)
        
            if boosting_type=='dart':
                
                evaluation_score = lgbm.evals_result_

                dart_best_iteration = evaluation_score['valid_0']['l2'].index(min(evaluation_score['valid_0']['l2']))+1

                lgbm=LGBMClassifier(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                           n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                                num_iteration = dart_best_iteration,
                                boosting_type='dart', 
                                subsample = best_params['subsample'],
                                min_child_samples = int(best_params['min_child_samples']),
                                colsample_bytree=best_params['colsample_bytree'],
                                reg_alpha = best_params['reg_alpha'],
                                reg_lambda = best_params['reg_lambda'],
                                       silent = True)
                lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2')

            # Predictions on the training, cv, leaderboard and test set
            y_pred_train = lgbm.predict_proba(X_train_no_na)[:,1]
            y_pred_all = lgbm.predict_proba(x_leaderboard)[:,1]
            cv_preds = lgbm.predict_proba(X_CV_no_na)[:,1]
            # Save predictions of best model for leaderboard submition
            pred_all_backgrounds = lgbm.predict_proba(background_imputed_tot)[:,1]

        best_params_lgbm[variable] = best_params
        
        #### Saving model to be interpretted later
        joblib.dump(lgbm, 'lgbm_'+ variable + '_' + boosting_type + '.pkl')

        # Can use mean square error as performance metric for both, since for 1 output, the mean square error and brier loss are the same.       
        error_types = dict()  
        print('---- Train error ----')
        error_types['train'] = mean_squared_error(y_train_no_na,y_pred_train)
        print(mean_squared_error(y_train_no_na,y_pred_train))
        print('---- CV error ----')
        error_types['CV'] = mean_squared_error(y_CV_no_na,cv_preds)
        print(mean_squared_error(y_CV_no_na,cv_preds))

        print('---- leaderboard stats ----')
        print('r2 =',r2_score(y_leaderboard[~mask],y_pred_all[~mask]))
        
        error_types['leaderboard'] = mean_squared_error(y_leaderboard[~mask],y_pred_all[~mask])
        print('mse =',mean_squared_error(y_leaderboard[~mask],y_pred_all[~mask]))
        
        lgbm_accuracies[variable] = error_types

        # lgbm=LGBMClassifier(learning_rate = 0.05, n_estimators = len(background.columns), num_leaves = 32)


        # Save predictions of best model for leaderboard submition

        prediction[variable] = pred_all_backgrounds

        lgbm_timings[variable] = datetime.now()-start
        
    best_params_each_model[boosting_type] = best_params_lgbm
    accuracies_each_model[boosting_type] = lgbm_accuracies
    timings_each_model[boosting_type] = lgbm_timings
    #     print(pred_all_backgrounds==cv_preds)


    prediction.to_csv('lgbm_preds_' + boosting_type + '.csv')
    print(datetime.now()-start)
    print('\n\n\n------------------------------------------')
best_params_each_model_no_sampling_copy = best_params_each_model.copy()


accuracies_each_model_no_sampling_copy = accuracies_each_model.copy()
    #     print(pred_all_backgrounds==cv_preds)

-------------  gbdt  --------------


gpa
gpa
|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | n_esti... | num_it... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------------------
[-0.3847114  -0.39860344 -0.3823323  -0.36396924 -0.40016859 -0.40008174]
| [0m 1       [0m | [0m-0.3883  [0m | [0m 0.9648  [0m | [0m 0.07011 [0m | [0m 3.575   [0m | [0m 48.87   [0m | [0m 572.0   [0m | [0m 139.3   [0m | [0m 29.01   [0m | [0m 0.6532  [0m | [0m 0.7489  [0m | [0m 0.8268  [0m |
[-0.39797068 -0.39114754 -0.38533161 -0.37535318 -0.39039006 -0.41225703]
| [0m 2       [0m | [0m-0.3921  [0m | [0m 0.8739  [0m | [0m 0.1926  [0m | [0m 1.117   [0m | [0m 30.22   [0m | [0m 305.7   [0m | [0m 148.5   [0m | [0m 24.67   [0m | [0m 0.8722  [0m | [0m 0.9646  [0m | [0m 0.8618  [0m |
[-0.40053479 -0.41253653 -0.37



[1]	valid_0's l2: 0.419105
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 0.415402
[3]	valid_0's l2: 0.408967
[4]	valid_0's l2: 0.404659
[5]	valid_0's l2: 0.402966
[6]	valid_0's l2: 0.401038
[7]	valid_0's l2: 0.398723
[8]	valid_0's l2: 0.39693
[9]	valid_0's l2: 0.394334
[10]	valid_0's l2: 0.392788
[11]	valid_0's l2: 0.391387
[12]	valid_0's l2: 0.392798
[13]	valid_0's l2: 0.391322
[14]	valid_0's l2: 0.391016
[15]	valid_0's l2: 0.390686
[16]	valid_0's l2: 0.390656
[17]	valid_0's l2: 0.388981
[18]	valid_0's l2: 0.388777
[19]	valid_0's l2: 0.387407
[20]	valid_0's l2: 0.38591
[21]	valid_0's l2: 0.386525
[22]	valid_0's l2: 0.386503
[23]	valid_0's l2: 0.384589
[24]	valid_0's l2: 0.382543
[25]	valid_0's l2: 0.383436
[26]	valid_0's l2: 0.382759
[27]	valid_0's l2: 0.382086
[28]	valid_0's l2: 0.382311
[29]	valid_0's l2: 0.38188
[30]	valid_0's l2: 0.381776
[31]	valid_0's l2: 0.38086
[32]	valid_0's l2: 0.378829
[33]	valid_0's l2: 0.378894
[34]	valid_0's l2: 0.379418
[



[1]	valid_0's l2: 0.212476
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 0.211777
[3]	valid_0's l2: 0.211368
[4]	valid_0's l2: 0.211052
[5]	valid_0's l2: 0.210399
[6]	valid_0's l2: 0.209799
[7]	valid_0's l2: 0.209816
[8]	valid_0's l2: 0.209972
[9]	valid_0's l2: 0.209968
[10]	valid_0's l2: 0.210099
[11]	valid_0's l2: 0.209387
[12]	valid_0's l2: 0.209259
[13]	valid_0's l2: 0.208825
[14]	valid_0's l2: 0.208828
[15]	valid_0's l2: 0.208333
[16]	valid_0's l2: 0.208057
[17]	valid_0's l2: 0.207857
[18]	valid_0's l2: 0.207707
[19]	valid_0's l2: 0.20791
[20]	valid_0's l2: 0.207568
[21]	valid_0's l2: 0.2075
[22]	valid_0's l2: 0.206995
[23]	valid_0's l2: 0.207
[24]	valid_0's l2: 0.206607
[25]	valid_0's l2: 0.206815
[26]	valid_0's l2: 0.20697
[27]	valid_0's l2: 0.207017
[28]	valid_0's l2: 0.206559
[29]	valid_0's l2: 0.206209
[30]	valid_0's l2: 0.205807
[31]	valid_0's l2: 0.205823
[32]	valid_0's l2: 0.205571
[33]	valid_0's l2: 0.205346
[34]	valid_0's l2: 0.205418
[35]



[1]	valid_0's l2: 0.0190597
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 0.0187939
[3]	valid_0's l2: 0.0186081
[4]	valid_0's l2: 0.0183899
[5]	valid_0's l2: 0.0183485
[6]	valid_0's l2: 0.0181893
[7]	valid_0's l2: 0.0180134
[8]	valid_0's l2: 0.0178934
[9]	valid_0's l2: 0.0178138
[10]	valid_0's l2: 0.017667
[11]	valid_0's l2: 0.0175781
[12]	valid_0's l2: 0.0174312
[13]	valid_0's l2: 0.0173771
[14]	valid_0's l2: 0.0173545
[15]	valid_0's l2: 0.0172519
[16]	valid_0's l2: 0.0172454
[17]	valid_0's l2: 0.0171779
[18]	valid_0's l2: 0.0171454
[19]	valid_0's l2: 0.0170911
[20]	valid_0's l2: 0.0170512
[21]	valid_0's l2: 0.0170351
[22]	valid_0's l2: 0.016939
[23]	valid_0's l2: 0.0168566
[24]	valid_0's l2: 0.0168647
[25]	valid_0's l2: 0.0168849
[26]	valid_0's l2: 0.0168973
[27]	valid_0's l2: 0.016867
[28]	valid_0's l2: 0.0168643
Early stopping, best iteration is:
[23]	valid_0's l2: 0.0168566
---- Train error ----
0.02119034994777951
---- CV error ----
0.0168565906403



[1]	valid_0's l2: 0.0493167	valid_0's binary_logloss: 0.204192
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.0491804	valid_0's binary_logloss: 0.203051
[3]	valid_0's l2: 0.0490497	valid_0's binary_logloss: 0.201982
[4]	valid_0's l2: 0.0489424	valid_0's binary_logloss: 0.201125
[5]	valid_0's l2: 0.0488177	valid_0's binary_logloss: 0.200141
[6]	valid_0's l2: 0.0487153	valid_0's binary_logloss: 0.199355
[7]	valid_0's l2: 0.0485965	valid_0's binary_logloss: 0.198449
[8]	valid_0's l2: 0.0484988	valid_0's binary_logloss: 0.197727
[9]	valid_0's l2: 0.0483856	valid_0's binary_logloss: 0.19689
[10]	valid_0's l2: 0.0482926	valid_0's binary_logloss: 0.196225
[11]	valid_0's l2: 0.0481849	valid_0's binary_logloss: 0.19545
[12]	valid_0's l2: 0.0480964	valid_0's binary_logloss: 0.194837
[13]	valid_0's l2: 0.0479939	valid_0's binary_logloss: 0.194119
[14]	valid_0's l2: 0.0478957	valid_0's binary_logloss: 0.193438
[15]	valid_0's l2: 0.0478123	valid_0's binary_logloss:



[1]	valid_0's l2: 0.154564	valid_0's binary_logloss: 0.488133
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.154606	valid_0's binary_logloss: 0.488256
[3]	valid_0's l2: 0.154621	valid_0's binary_logloss: 0.488299
[4]	valid_0's l2: 0.154631	valid_0's binary_logloss: 0.488329
[5]	valid_0's l2: 0.154676	valid_0's binary_logloss: 0.488464
[6]	valid_0's l2: 0.154621	valid_0's binary_logloss: 0.4883
[7]	valid_0's l2: 0.15464	valid_0's binary_logloss: 0.488353
[8]	valid_0's l2: 0.154655	valid_0's binary_logloss: 0.488397
[9]	valid_0's l2: 0.154704	valid_0's binary_logloss: 0.488538
[10]	valid_0's l2: 0.154755	valid_0's binary_logloss: 0.488683
[11]	valid_0's l2: 0.154749	valid_0's binary_logloss: 0.488661
Early stopping, best iteration is:
[1]	valid_0's l2: 0.154564	valid_0's binary_logloss: 0.488133
---- Train error ----
0.16775670167059656
---- CV error ----
0.15456409151019343
---- leaderboard stats ----
r2 = -0.0004677795201850632
mse = 0.1741967911304831



[1]	valid_0's l2: 0.174338	valid_0's binary_logloss: 0.533093
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.174207	valid_0's binary_logloss: 0.532732
[3]	valid_0's l2: 0.174085	valid_0's binary_logloss: 0.532397
[4]	valid_0's l2: 0.17397	valid_0's binary_logloss: 0.532085
[5]	valid_0's l2: 0.173863	valid_0's binary_logloss: 0.531794
[6]	valid_0's l2: 0.173764	valid_0's binary_logloss: 0.531524
[7]	valid_0's l2: 0.173532	valid_0's binary_logloss: 0.530895
[8]	valid_0's l2: 0.173439	valid_0's binary_logloss: 0.530645
[9]	valid_0's l2: 0.173212	valid_0's binary_logloss: 0.530018
[10]	valid_0's l2: 0.17299	valid_0's binary_logloss: 0.529425
[11]	valid_0's l2: 0.17295	valid_0's binary_logloss: 0.529313
[12]	valid_0's l2: 0.172866	valid_0's binary_logloss: 0.529087
[13]	valid_0's l2: 0.172651	valid_0's binary_logloss: 0.528496
[14]	valid_0's l2: 0.172438	valid_0's binary_logloss: 0.527938
[15]	valid_0's l2: 0.172403	valid_0's binary_logloss: 0.527839
[16]	v



[1]	valid_0's l2: 0.418101
[2]	valid_0's l2: 0.414116
[3]	valid_0's l2: 0.40995
[4]	valid_0's l2: 0.404993
[5]	valid_0's l2: 0.399755
[6]	valid_0's l2: 0.394798
[7]	valid_0's l2: 0.390779
[8]	valid_0's l2: 0.390341
[9]	valid_0's l2: 0.387415
[10]	valid_0's l2: 0.382706
[11]	valid_0's l2: 0.380839
[12]	valid_0's l2: 0.380473




[13]	valid_0's l2: 0.376991
[14]	valid_0's l2: 0.37408
[15]	valid_0's l2: 0.369769
[16]	valid_0's l2: 0.371793
[17]	valid_0's l2: 0.369702
[18]	valid_0's l2: 0.368341
[19]	valid_0's l2: 0.366568
[20]	valid_0's l2: 0.367192
[21]	valid_0's l2: 0.367891
[22]	valid_0's l2: 0.368268
[23]	valid_0's l2: 0.369464
[24]	valid_0's l2: 0.371125
[25]	valid_0's l2: 0.369767
[26]	valid_0's l2: 0.36766
[27]	valid_0's l2: 0.36707
[28]	valid_0's l2: 0.367263
[29]	valid_0's l2: 0.368954
[30]	valid_0's l2: 0.368406
[31]	valid_0's l2: 0.366749
[32]	valid_0's l2: 0.368569
[33]	valid_0's l2: 0.36717
[34]	valid_0's l2: 0.36793
[35]	valid_0's l2: 0.36817
[36]	valid_0's l2: 0.368407
[37]	valid_0's l2: 0.366194
[38]	valid_0's l2: 0.365776
[39]	valid_0's l2: 0.366423
[40]	valid_0's l2: 0.365907
[41]	valid_0's l2: 0.365831
[42]	valid_0's l2: 0.366135
[43]	valid_0's l2: 0.366203
[44]	valid_0's l2: 0.366587
[45]	valid_0's l2: 0.367377
[46]	valid_0's l2: 0.495288
[47]	valid_0's l2: 0.472316
[48]	valid_0's l2: 0.4614



[1]	valid_0's l2: 0.212731
[2]	valid_0's l2: 0.212485
[3]	valid_0's l2: 0.212565
[4]	valid_0's l2: 0.211471
[5]	valid_0's l2: 0.213369
[6]	valid_0's l2: 0.212103
[7]	valid_0's l2: 0.212785
[8]	valid_0's l2: 0.212637
[9]	valid_0's l2: 0.212846
[10]	valid_0's l2: 0.213935
[11]	valid_0's l2: 0.213454
[12]	valid_0's l2: 0.21273
[13]	valid_0's l2: 0.212427
[14]	valid_0's l2: 0.211444
[15]	valid_0's l2: 0.211957
[16]	valid_0's l2: 0.212131
[17]	valid_0's l2: 0.212077
[18]	valid_0's l2: 0.21124




[19]	valid_0's l2: 0.211059
[20]	valid_0's l2: 0.212463
[21]	valid_0's l2: 0.210828
[22]	valid_0's l2: 0.211118
[23]	valid_0's l2: 0.210512
[24]	valid_0's l2: 0.211044
[25]	valid_0's l2: 0.210672
[26]	valid_0's l2: 0.210274
[27]	valid_0's l2: 0.209855
[28]	valid_0's l2: 0.209694
[29]	valid_0's l2: 0.21023
[30]	valid_0's l2: 0.210277
[31]	valid_0's l2: 0.210185
[32]	valid_0's l2: 0.210823
[33]	valid_0's l2: 0.211344
[34]	valid_0's l2: 0.210445
[35]	valid_0's l2: 0.210267
[36]	valid_0's l2: 0.210307
[37]	valid_0's l2: 0.211138
[38]	valid_0's l2: 0.210605
[39]	valid_0's l2: 0.211026
[40]	valid_0's l2: 0.21081
[41]	valid_0's l2: 0.210646
[42]	valid_0's l2: 0.210696
[43]	valid_0's l2: 0.210445
[44]	valid_0's l2: 0.209909
[45]	valid_0's l2: 0.211059
[46]	valid_0's l2: 0.211059
[47]	valid_0's l2: 0.211059
[48]	valid_0's l2: 0.211059
[49]	valid_0's l2: 0.211059
[50]	valid_0's l2: 0.211059
[51]	valid_0's l2: 0.211059
[52]	valid_0's l2: 0.211059
[53]	valid_0's l2: 0.211059
[54]	valid_0's l2: 0.



[1]	valid_0's l2: 0.0187974
[2]	valid_0's l2: 0.0183888
[3]	valid_0's l2: 0.0180847
[4]	valid_0's l2: 0.0178497
[5]	valid_0's l2: 0.0177725
[6]	valid_0's l2: 0.0176624
[7]	valid_0's l2: 0.01757
[8]	valid_0's l2: 0.0175752
[9]	valid_0's l2: 0.0174151
[10]	valid_0's l2: 0.0174211
[11]	valid_0's l2: 0.0172617
[12]	valid_0's l2: 0.0173021
[13]	valid_0's l2: 0.0173438
[14]	valid_0's l2: 0.0173615
[15]	valid_0's l2: 0.0173333
[16]	valid_0's l2: 0.0172803
[17]	valid_0's l2: 0.0171224
[18]	valid_0's l2: 0.017091
[19]	valid_0's l2: 0.0170655
[20]	valid_0's l2: 0.017111
[21]	valid_0's l2: 0.0170848
[22]	valid_0's l2: 0.0169654
[23]	valid_0's l2: 0.0169904
[24]	valid_0's l2: 0.0170093
[25]	valid_0's l2: 0.017071
[26]	valid_0's l2: 0.0170972
[27]	valid_0's l2: 0.0169984
[28]	valid_0's l2: 0.0169865
[29]	valid_0's l2: 0.0169842
[30]	valid_0's l2: 0.0170375
[31]	valid_0's l2: 0.0170494
[32]	valid_0's l2: 0.0170501
[33]	valid_0's l2: 0.0171234
[34]	valid_0's l2: 0.0171229
[35]	valid_0's l2: 0.0171406




[36]	valid_0's l2: 0.0170885
[37]	valid_0's l2: 0.0171188
[38]	valid_0's l2: 0.0170653
[39]	valid_0's l2: 0.0169865
[40]	valid_0's l2: 0.0169816
[41]	valid_0's l2: 0.0169865
[42]	valid_0's l2: 0.0170122
[43]	valid_0's l2: 0.0169962
[44]	valid_0's l2: 0.0170591
[45]	valid_0's l2: 0.0170522
[46]	valid_0's l2: 0.0166397
[47]	valid_0's l2: 0.0167274
[48]	valid_0's l2: 0.0167395
[49]	valid_0's l2: 0.0167137
[50]	valid_0's l2: 0.016709
[51]	valid_0's l2: 0.0168287
[52]	valid_0's l2: 0.0169417
[53]	valid_0's l2: 0.0169049
[1]	valid_0's l2: 0.0187974
[2]	valid_0's l2: 0.0183888
[3]	valid_0's l2: 0.0180847
[4]	valid_0's l2: 0.0178497
[5]	valid_0's l2: 0.0177725
[6]	valid_0's l2: 0.0176624
[7]	valid_0's l2: 0.01757
[8]	valid_0's l2: 0.0175752
[9]	valid_0's l2: 0.0174151
[10]	valid_0's l2: 0.0174211
[11]	valid_0's l2: 0.0172617
[12]	valid_0's l2: 0.0173021
[13]	valid_0's l2: 0.0173438
[14]	valid_0's l2: 0.0173615
[15]	valid_0's l2: 0.0173333
[16]	valid_0's l2: 0.0172803
[17]	valid_0's l2: 0.0171



[1]	valid_0's l2: 0.0491849	valid_0's binary_logloss: 0.203108
[2]	valid_0's l2: 0.0488819	valid_0's binary_logloss: 0.200707
[3]	valid_0's l2: 0.0485988	valid_0's binary_logloss: 0.198578
[4]	valid_0's l2: 0.0484497	valid_0's binary_logloss: 0.197472
[5]	valid_0's l2: 0.048362	valid_0's binary_logloss: 0.196815
[6]	valid_0's l2: 0.0482491	valid_0's binary_logloss: 0.196016
[7]	valid_0's l2: 0.0480867	valid_0's binary_logloss: 0.194835
[8]	valid_0's l2: 0.0481365	valid_0's binary_logloss: 0.19517
[9]	valid_0's l2: 0.0479156	valid_0's binary_logloss: 0.193741
[10]	valid_0's l2: 0.0477881	valid_0's binary_logloss: 0.192961
[11]	valid_0's l2: 0.0477249	valid_0's binary_logloss: 0.192397
[12]	valid_0's l2: 0.0477506	valid_0's binary_logloss: 0.192614




[13]	valid_0's l2: 0.0475904	valid_0's binary_logloss: 0.191453
[14]	valid_0's l2: 0.0474277	valid_0's binary_logloss: 0.190271
[15]	valid_0's l2: 0.0473965	valid_0's binary_logloss: 0.190083
[16]	valid_0's l2: 0.0473229	valid_0's binary_logloss: 0.189518
[17]	valid_0's l2: 0.0471718	valid_0's binary_logloss: 0.188711
[18]	valid_0's l2: 0.0471425	valid_0's binary_logloss: 0.188705
[19]	valid_0's l2: 0.0470669	valid_0's binary_logloss: 0.188192
[20]	valid_0's l2: 0.046912	valid_0's binary_logloss: 0.187451
[21]	valid_0's l2: 0.0469199	valid_0's binary_logloss: 0.187411
[22]	valid_0's l2: 0.0468551	valid_0's binary_logloss: 0.187383
[23]	valid_0's l2: 0.0466519	valid_0's binary_logloss: 0.186215
[24]	valid_0's l2: 0.0467119	valid_0's binary_logloss: 0.186171
[25]	valid_0's l2: 0.046658	valid_0's binary_logloss: 0.185901
[26]	valid_0's l2: 0.0465954	valid_0's binary_logloss: 0.185884
[27]	valid_0's l2: 0.0463924	valid_0's binary_logloss: 0.184464
[28]	valid_0's l2: 0.0464279	valid_0's bi



[1]	valid_0's l2: 0.0491849	valid_0's binary_logloss: 0.203108
[2]	valid_0's l2: 0.0488819	valid_0's binary_logloss: 0.200707
[3]	valid_0's l2: 0.0485988	valid_0's binary_logloss: 0.198578
[4]	valid_0's l2: 0.0484497	valid_0's binary_logloss: 0.197472
[5]	valid_0's l2: 0.048362	valid_0's binary_logloss: 0.196815
[6]	valid_0's l2: 0.0482491	valid_0's binary_logloss: 0.196016
[7]	valid_0's l2: 0.0480867	valid_0's binary_logloss: 0.194835
[8]	valid_0's l2: 0.0481365	valid_0's binary_logloss: 0.19517
[9]	valid_0's l2: 0.0479156	valid_0's binary_logloss: 0.193741
[10]	valid_0's l2: 0.0477881	valid_0's binary_logloss: 0.192961
[11]	valid_0's l2: 0.0477249	valid_0's binary_logloss: 0.192397
[12]	valid_0's l2: 0.0477506	valid_0's binary_logloss: 0.192614
[13]	valid_0's l2: 0.0475904	valid_0's binary_logloss: 0.191453
[14]	valid_0's l2: 0.0474277	valid_0's binary_logloss: 0.190271
[15]	valid_0's l2: 0.0473965	valid_0's binary_logloss: 0.190083
[16]	valid_0's l2: 0.0473229	valid_0's binary_loglo



[1]	valid_0's l2: 0.155358	valid_0's binary_logloss: 0.490398
[2]	valid_0's l2: 0.155787	valid_0's binary_logloss: 0.491546
[3]	valid_0's l2: 0.156287	valid_0's binary_logloss: 0.492886
[4]	valid_0's l2: 0.155987	valid_0's binary_logloss: 0.492101
[5]	valid_0's l2: 0.1559	valid_0's binary_logloss: 0.491853
[6]	valid_0's l2: 0.156442	valid_0's binary_logloss: 0.493505
[7]	valid_0's l2: 0.15674	valid_0's binary_logloss: 0.494296
[8]	valid_0's l2: 0.156085	valid_0's binary_logloss: 0.492301
[9]	valid_0's l2: 0.156203	valid_0's binary_logloss: 0.492429
[10]	valid_0's l2: 0.156737	valid_0's binary_logloss: 0.493876
[11]	valid_0's l2: 0.156765	valid_0's binary_logloss: 0.494077
[12]	valid_0's l2: 0.156693	valid_0's binary_logloss: 0.493907
[13]	valid_0's l2: 0.156413	valid_0's binary_logloss: 0.493036
[14]	valid_0's l2: 0.155523	valid_0's binary_logloss: 0.490154
[15]	valid_0's l2: 0.155406	valid_0's binary_logloss: 0.489803
[16]	valid_0's l2: 0.155512	valid_0's binary_logloss: 0.490432
[17]




[26]	valid_0's l2: 0.156554	valid_0's binary_logloss: 0.492989
[27]	valid_0's l2: 0.156049	valid_0's binary_logloss: 0.491547
[28]	valid_0's l2: 0.15598	valid_0's binary_logloss: 0.491453
[29]	valid_0's l2: 0.155807	valid_0's binary_logloss: 0.490844
[30]	valid_0's l2: 0.155921	valid_0's binary_logloss: 0.491002
[31]	valid_0's l2: 0.155894	valid_0's binary_logloss: 0.490843
[32]	valid_0's l2: 0.155828	valid_0's binary_logloss: 0.490793
[33]	valid_0's l2: 0.156032	valid_0's binary_logloss: 0.491134
[34]	valid_0's l2: 0.155878	valid_0's binary_logloss: 0.490779
[35]	valid_0's l2: 0.155766	valid_0's binary_logloss: 0.490475
[36]	valid_0's l2: 0.155866	valid_0's binary_logloss: 0.490771
[37]	valid_0's l2: 0.156169	valid_0's binary_logloss: 0.491828
[38]	valid_0's l2: 0.156446	valid_0's binary_logloss: 0.49252
[39]	valid_0's l2: 0.155737	valid_0's binary_logloss: 0.490321
[40]	valid_0's l2: 0.155775	valid_0's binary_logloss: 0.49061
[41]	valid_0's l2: 0.155643	valid_0's binary_logloss: 0.4



[1]	valid_0's l2: 0.155358	valid_0's binary_logloss: 0.490398
[2]	valid_0's l2: 0.155787	valid_0's binary_logloss: 0.491546
[3]	valid_0's l2: 0.156287	valid_0's binary_logloss: 0.492886
[4]	valid_0's l2: 0.155987	valid_0's binary_logloss: 0.492101
[5]	valid_0's l2: 0.1559	valid_0's binary_logloss: 0.491853
[6]	valid_0's l2: 0.156442	valid_0's binary_logloss: 0.493505
[7]	valid_0's l2: 0.15674	valid_0's binary_logloss: 0.494296
[8]	valid_0's l2: 0.156085	valid_0's binary_logloss: 0.492301
[9]	valid_0's l2: 0.156203	valid_0's binary_logloss: 0.492429
[10]	valid_0's l2: 0.156737	valid_0's binary_logloss: 0.493876
[11]	valid_0's l2: 0.156765	valid_0's binary_logloss: 0.494077
[12]	valid_0's l2: 0.156693	valid_0's binary_logloss: 0.493907
[13]	valid_0's l2: 0.156413	valid_0's binary_logloss: 0.493036
[14]	valid_0's l2: 0.155523	valid_0's binary_logloss: 0.490154
[15]	valid_0's l2: 0.155406	valid_0's binary_logloss: 0.489803
[16]	valid_0's l2: 0.155512	valid_0's binary_logloss: 0.490432
[17]



[1]	valid_0's l2: 0.173877	valid_0's binary_logloss: 0.531817
[2]	valid_0's l2: 0.173114	valid_0's binary_logloss: 0.529704
[3]	valid_0's l2: 0.172474	valid_0's binary_logloss: 0.527879
[4]	valid_0's l2: 0.170899	valid_0's binary_logloss: 0.523513
[5]	valid_0's l2: 0.170104	valid_0's binary_logloss: 0.521236
[6]	valid_0's l2: 0.170074	valid_0's binary_logloss: 0.521093
[7]	valid_0's l2: 0.169412	valid_0's binary_logloss: 0.51915
[8]	valid_0's l2: 0.169543	valid_0's binary_logloss: 0.519538
[9]	valid_0's l2: 0.168659	valid_0's binary_logloss: 0.517078
[10]	valid_0's l2: 0.168474	valid_0's binary_logloss: 0.516376
[11]	valid_0's l2: 0.167805	valid_0's binary_logloss: 0.514487
[12]	valid_0's l2: 0.168049	valid_0's binary_logloss: 0.515171
[13]	valid_0's l2: 0.167408	valid_0's binary_logloss: 0.513402
[14]	valid_0's l2: 0.16688	valid_0's binary_logloss: 0.511694
[15]	valid_0's l2: 0.167128	valid_0's binary_logloss: 0.512342
[16]	valid_0's l2: 0.167425	valid_0's binary_logloss: 0.513105
[17




[21]	valid_0's l2: 0.166013	valid_0's binary_logloss: 0.508923
[22]	valid_0's l2: 0.165844	valid_0's binary_logloss: 0.508501
[23]	valid_0's l2: 0.165379	valid_0's binary_logloss: 0.507209
[24]	valid_0's l2: 0.165097	valid_0's binary_logloss: 0.506417
[25]	valid_0's l2: 0.164477	valid_0's binary_logloss: 0.504406
[26]	valid_0's l2: 0.164484	valid_0's binary_logloss: 0.504459
[27]	valid_0's l2: 0.164239	valid_0's binary_logloss: 0.503586
[28]	valid_0's l2: 0.164139	valid_0's binary_logloss: 0.503463
[29]	valid_0's l2: 0.163478	valid_0's binary_logloss: 0.501912
[30]	valid_0's l2: 0.163382	valid_0's binary_logloss: 0.501567
[31]	valid_0's l2: 0.163095	valid_0's binary_logloss: 0.500856
[32]	valid_0's l2: 0.162888	valid_0's binary_logloss: 0.500089
[33]	valid_0's l2: 0.162977	valid_0's binary_logloss: 0.500418
[34]	valid_0's l2: 0.162914	valid_0's binary_logloss: 0.500221
[35]	valid_0's l2: 0.163037	valid_0's binary_logloss: 0.500608
[36]	valid_0's l2: 0.163219	valid_0's binary_logloss: 



[1]	valid_0's l2: 0.173877	valid_0's binary_logloss: 0.531817
[2]	valid_0's l2: 0.173114	valid_0's binary_logloss: 0.529704
[3]	valid_0's l2: 0.172474	valid_0's binary_logloss: 0.527879
[4]	valid_0's l2: 0.170899	valid_0's binary_logloss: 0.523513
[5]	valid_0's l2: 0.170104	valid_0's binary_logloss: 0.521236
[6]	valid_0's l2: 0.170074	valid_0's binary_logloss: 0.521093
[7]	valid_0's l2: 0.169412	valid_0's binary_logloss: 0.51915
[8]	valid_0's l2: 0.169543	valid_0's binary_logloss: 0.519538
[9]	valid_0's l2: 0.168659	valid_0's binary_logloss: 0.517078
[10]	valid_0's l2: 0.168474	valid_0's binary_logloss: 0.516376
[11]	valid_0's l2: 0.167805	valid_0's binary_logloss: 0.514487
[12]	valid_0's l2: 0.168049	valid_0's binary_logloss: 0.515171
[13]	valid_0's l2: 0.167408	valid_0's binary_logloss: 0.513402
[14]	valid_0's l2: 0.16688	valid_0's binary_logloss: 0.511694
[15]	valid_0's l2: 0.167128	valid_0's binary_logloss: 0.512342
[16]	valid_0's l2: 0.167425	valid_0's binary_logloss: 0.513105
[17



[1]	valid_0's l2: 0.420708
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 0.418022
[3]	valid_0's l2: 0.416569
[4]	valid_0's l2: 0.414601
[5]	valid_0's l2: 0.412847
[6]	valid_0's l2: 0.411363
[7]	valid_0's l2: 0.409183
[8]	valid_0's l2: 0.406925
[9]	valid_0's l2: 0.405582
[10]	valid_0's l2: 0.404682
[11]	valid_0's l2: 0.401082
[12]	valid_0's l2: 0.39888
[13]	valid_0's l2: 0.39748
[14]	valid_0's l2: 0.396215
[15]	valid_0's l2: 0.394785
[16]	valid_0's l2: 0.393334
[17]	valid_0's l2: 0.392512
[18]	valid_0's l2: 0.391882
[19]	valid_0's l2: 0.39091
[20]	valid_0's l2: 0.389618
[21]	valid_0's l2: 0.388983
[22]	valid_0's l2: 0.389326
[23]	valid_0's l2: 0.387991
[24]	valid_0's l2: 0.38726
[25]	valid_0's l2: 0.385938
[26]	valid_0's l2: 0.385838
[27]	valid_0's l2: 0.384542
[28]	valid_0's l2: 0.384301
[29]	valid_0's l2: 0.383884
[30]	valid_0's l2: 0.383822
[31]	valid_0's l2: 0.384153
[32]	valid_0's l2: 0.384463
[33]	valid_0's l2: 0.385447
[34]	valid_0's l2: 0.38622
[3



[1]	valid_0's l2: 0.213202
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 0.213091
[3]	valid_0's l2: 0.212989
[4]	valid_0's l2: 0.213006
[5]	valid_0's l2: 0.212915
[6]	valid_0's l2: 0.212832
[7]	valid_0's l2: 0.212758
[8]	valid_0's l2: 0.212692
[9]	valid_0's l2: 0.212479
[10]	valid_0's l2: 0.212422
[11]	valid_0's l2: 0.212338
[12]	valid_0's l2: 0.212289
[13]	valid_0's l2: 0.212314
[14]	valid_0's l2: 0.212132
[15]	valid_0's l2: 0.212056
[16]	valid_0's l2: 0.21201
[17]	valid_0's l2: 0.212085
[18]	valid_0's l2: 0.212044
[19]	valid_0's l2: 0.211871
[20]	valid_0's l2: 0.211904
[21]	valid_0's l2: 0.211722
[22]	valid_0's l2: 0.211493
[23]	valid_0's l2: 0.21133
[24]	valid_0's l2: 0.211266
[25]	valid_0's l2: 0.211148
[26]	valid_0's l2: 0.211115
[27]	valid_0's l2: 0.211152
[28]	valid_0's l2: 0.211122
[29]	valid_0's l2: 0.210907
[30]	valid_0's l2: 0.210797
[31]	valid_0's l2: 0.210649
[32]	valid_0's l2: 0.210481
[33]	valid_0's l2: 0.210441
[34]	valid_0's l2: 0.210487



[1]	valid_0's l2: 0.0191825
Training until validation scores don't improve for 5 rounds
[2]	valid_0's l2: 0.0190195
[3]	valid_0's l2: 0.018875
[4]	valid_0's l2: 0.0187414
[5]	valid_0's l2: 0.0186459
[6]	valid_0's l2: 0.0184939
[7]	valid_0's l2: 0.0183661
[8]	valid_0's l2: 0.0181336
[9]	valid_0's l2: 0.0180607
[10]	valid_0's l2: 0.0179597
[11]	valid_0's l2: 0.0178304
[12]	valid_0's l2: 0.0176984
[13]	valid_0's l2: 0.0175906
[14]	valid_0's l2: 0.0174932
[15]	valid_0's l2: 0.0173947
[16]	valid_0's l2: 0.0173241
[17]	valid_0's l2: 0.0173051
[18]	valid_0's l2: 0.0172452
[19]	valid_0's l2: 0.0173045
[20]	valid_0's l2: 0.0172567
[21]	valid_0's l2: 0.0172703
[22]	valid_0's l2: 0.0172861
[23]	valid_0's l2: 0.0173412
Early stopping, best iteration is:
[18]	valid_0's l2: 0.0172452
---- Train error ----
0.019373252957904197
---- CV error ----
0.017245222259273905
---- leaderboard stats ----
r2 = 0.08609132185915336
mse = 0.026143124475680617
eviction
|   iter    |  target   | colsam... | learni...



[1]	valid_0's l2: 0.048698	valid_0's binary_logloss: 0.199072
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.0482359	valid_0's binary_logloss: 0.195694
[3]	valid_0's l2: 0.0477263	valid_0's binary_logloss: 0.192095
[4]	valid_0's l2: 0.0474943	valid_0's binary_logloss: 0.190785
[5]	valid_0's l2: 0.0476496	valid_0's binary_logloss: 0.19193
[6]	valid_0's l2: 0.0472762	valid_0's binary_logloss: 0.189278
[7]	valid_0's l2: 0.0470248	valid_0's binary_logloss: 0.1874
[8]	valid_0's l2: 0.046945	valid_0's binary_logloss: 0.187246
[9]	valid_0's l2: 0.0470603	valid_0's binary_logloss: 0.188183
[10]	valid_0's l2: 0.0470186	valid_0's binary_logloss: 0.187988
[11]	valid_0's l2: 0.0470139	valid_0's binary_logloss: 0.187959
[12]	valid_0's l2: 0.0469724	valid_0's binary_logloss: 0.187344
[13]	valid_0's l2: 0.0470165	valid_0's binary_logloss: 0.187813
[14]	valid_0's l2: 0.0470072	valid_0's binary_logloss: 0.18796
[15]	valid_0's l2: 0.0470692	valid_0's binary_logloss: 0.1



[1]	valid_0's l2: 0.154706	valid_0's binary_logloss: 0.488553
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.155438	valid_0's binary_logloss: 0.490573
[3]	valid_0's l2: 0.154705	valid_0's binary_logloss: 0.488596
[4]	valid_0's l2: 0.154686	valid_0's binary_logloss: 0.488574
[5]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[6]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[7]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[8]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[9]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[10]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[11]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[12]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[13]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[14]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
[15]	valid_0's l2: 0.154445	valid_0's binary_logloss: 0.487727
Ear



[1]	valid_0's l2: 0.174001	valid_0's binary_logloss: 0.53216
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.17377	valid_0's binary_logloss: 0.531526
[3]	valid_0's l2: 0.173266	valid_0's binary_logloss: 0.530127
[4]	valid_0's l2: 0.172887	valid_0's binary_logloss: 0.529076
[5]	valid_0's l2: 0.172761	valid_0's binary_logloss: 0.528722
[6]	valid_0's l2: 0.172388	valid_0's binary_logloss: 0.527694
[7]	valid_0's l2: 0.171874	valid_0's binary_logloss: 0.526273
[8]	valid_0's l2: 0.171486	valid_0's binary_logloss: 0.525184
[9]	valid_0's l2: 0.171311	valid_0's binary_logloss: 0.524687
[10]	valid_0's l2: 0.171071	valid_0's binary_logloss: 0.524022
[11]	valid_0's l2: 0.170913	valid_0's binary_logloss: 0.523585
[12]	valid_0's l2: 0.170475	valid_0's binary_logloss: 0.522405
[13]	valid_0's l2: 0.170343	valid_0's binary_logloss: 0.522049
[14]	valid_0's l2: 0.170247	valid_0's binary_logloss: 0.521744
[15]	valid_0's l2: 0.170196	valid_0's binary_logloss: 0.521611
[16]	

In [6]:
# Save model performance and parameters
import pickle

# Save models and params
with open('best_params_tress_no_sampling.pickle', 'wb') as handle:
    pickle.dump(best_params_each_model_no_sampling_copy, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('best_params_tress_no_sampling.pickle', 'wb') as handle:
    pickle.dump(accuracies_each_model_no_sampling_copy, handle, protocol=pickle.HIGHEST_PROTOCOL)

# best_params_each_model_no_sampling_copy
# accuracies_each_model_no_sampling_copy

In [7]:
# Print accuracies
lst_leader_board_accs = list()
for model_name in accuracies_each_model:
    for var_name in accuracies_each_model[model_name]:
        lst_leader_board_accs.append(var_name + ' ' + model_name + ' ' +  str(accuracies_each_model[model_name][var_name]['leaderboard']))

sorted(lst_leader_board_accs)

['eviction dart 0.05207330412177481',
 'eviction gbdt 0.050633443020071826',
 'eviction goss 0.05192825850648769',
 'gpa dart 0.38124499706416043',
 'gpa gbdt 0.37764382969532606',
 'gpa goss 0.3710648078690564',
 'grit dart 0.2169031608664567',
 'grit gbdt 0.21711522212374287',
 'grit goss 0.21659011055012237',
 'jobTraining dart 0.20428263080049058',
 'jobTraining gbdt 0.19940966236005536',
 'jobTraining goss 0.2032936824781467',
 'layoff dart 0.1744041463446064',
 'layoff gbdt 0.17419679113048317',
 'layoff goss 0.17438181236965813',
 'materialHardship dart 0.0255595316615577',
 'materialHardship gbdt 0.02609444653819857',
 'materialHardship goss 0.026143124475680617']

### Running the exact same code as the non-weighted model, except using balanced class weights to increase the weights associated with the minority class. Model calibration is used after training the models, since the probability become biased towards the minority class. This code only trains models for the categorical outcomes, since continuous outcomes do not need to be balanced

In [9]:
from datetime import datetime 
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV
from datetime import datetime
from sklearn.model_selection import RepeatedKFold,RepeatedStratifiedKFold
from sklearn.calibration import CalibratedClassifierCV


variables = test.columns.to_list()


prediction = pd.read_csv('FFChallenge_v5/prediction.csv', sep=',', header=0, index_col=0)



def optimize_gbm_regressor(data, targets,boosting_type):
    """Apply Bayesian Optimization to GBM parameters."""

    def gbm_crossval(num_leaves, max_depth, learning_rate,
                     n_estimators,num_iteration,subsample,min_child_samples,
                    colsample_bytree,reg_alpha,reg_lambda,):
        """GB cross validation.
        """
        clf = LGBMRegressor(
            num_leaves=int(num_leaves),
            max_depth=int(max_depth),
            learning_rate=learning_rate,
            n_estimators=int(n_estimators),
            num_iteration=int(num_iteration),
            boosting_type = boosting_type,
            subsample = subsample,
            min_child_samples = int(min_child_samples),
            colsample_bytree=colsample_bytree,
            reg_alpha = reg_alpha,
            reg_lambda = reg_lambda,
        )
        cv = RepeatedKFold(n_splits=3, n_repeats=2, random_state=1)
        cval = cross_val_score(clf, data, targets, scoring='neg_mean_squared_error', cv=cv,n_jobs=-1)
        print(cval)
        return cval.mean()
    
    # Set up the BO
    optimizer = BayesianOptimization(
        f=gbm_crossval,
        pbounds={
            'num_leaves': (2, 30),
            'max_depth': (1, 15),
            'learning_rate': (0.01, 0.2),
            'n_estimators': (10,1000),
            'num_iteration':(50,200),
            'subsample': (0.5,1.0),
            'min_child_samples' : (10, 200),
            'colsample_bytree':(0.5,1),
            'reg_alpha': (0.0, 1.0),
            'reg_lambda': (0.0, 1.0),
        },
        random_state=12345,
        verbose=3
    )
    optimizer.maximize(n_iter=15, init_points=5)

    print("Final result:", optimizer.max)
    return optimizer.max



def optimize_gbm_classifier(data, targets,boosting_type):
    """Apply Bayesian Optimization to GBM parameters."""
#     self.boosting_type = boosting_type

    def gbm_crossval(num_leaves, max_depth, learning_rate,
                     n_estimators,num_iteration,subsample,min_child_samples,
                    colsample_bytree,reg_alpha,reg_lambda,):
        """GB cross validation.
        """
        clf = LGBMClassifier(
        num_leaves=int(num_leaves),
        max_depth=int(max_depth),
        learning_rate=learning_rate,
        n_estimators=int(n_estimators),
        num_iteration=int(num_iteration),
        boosting_type = boosting_type,
        subsample = subsample,
        min_child_samples = int(min_child_samples),
        colsample_bytree=colsample_bytree,
        reg_alpha = reg_alpha,
        reg_lambda = reg_lambda,
        class_weight = 'balanced',
        )
        cv = RepeatedStratifiedKFold(n_splits=3, n_repeats=2, random_state=123)
        cval = cross_val_score(clf, data, targets, scoring='neg_brier_score', cv=cv,n_jobs=-1)
#         print(cval)
        return cval.mean()
    
    # Set up the BO
    optimizer = BayesianOptimization(
        f=gbm_crossval,
        pbounds={
            'num_leaves': (2, 30),
            'max_depth': (1, 15),
            'learning_rate': (0.01, 0.2),
            'n_estimators': (10,1000),
            'num_iteration':(50,200),
            'subsample': (0.5,1.0),
            'min_child_samples' : (10, 200),
            'colsample_bytree':(0.5,1),
            'reg_alpha': (0.0, 1.0),
            'reg_lambda': (0.0, 1.0),
        },
        random_state=12345,
#         verbose=1
    )
    optimizer.maximize(n_iter=15, init_points=5)

    print("Final result:", optimizer.max)
    return optimizer.max


classification_vars = ['eviction','layoff','jobTraining']




boosting_types = ['gbdt','dart','goss']

best_params_each_model = dict()

accuracies_each_model = dict()
for boosting_type in boosting_types:
    print('------------- ',boosting_type, ' --------------')
    print('\n')
    lgbm_accuracies = dict()
    for variable in variables[3:]: # Run for the three categorical outcomes
#     for variable in ['grit','layoff']:
    # variable = 'eviction'
        print(variable)
        start=datetime.now()
        # Let's drop instances where the response is NA
        y_train_no_na = y_train[variable].dropna()
        X_train_no_na = X_train.loc[y_train_no_na.index.values]

        y_CV_no_na = y_CV[variable].dropna()
        X_CV_no_na = X_CV.loc[y_CV_no_na.index.values]

        mask = leaderboard[variable].isna()
        y_leaderboard = leaderboard[variable]

        ###### model = Lasso(alpha=1000.0,max_iter=10000)
        # model.fit(X_train, y_train_trans)
        # from imblearn.over_sampling import SMOTE

        # train_data=lgb.Dataset(X_train,label=y_train_trans)
        #setting parameters for lightgbm
        # param = {'num_leaves':50, 'objective':'binary','max_depth':7,'learning_rate':.2,'max_bin':200}
        # param['metric'] = ['auc', 'binary_logloss']

        #training our model using light gbm

        # specify parameters and distributions to sample from

        if variable not in classification_vars:
            print(variable)
            params = optimize_gbm_regressor(X_train_no_na, y_train_no_na,boosting_type)
            best_params = params['params']  
            print(1)

            # Validation accuracy 
            lgbm=LGBMRegressor(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                   n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                        num_iteration = int(best_params['num_iteration']),
                        boosting_type=boosting_type, 
                        subsample = best_params['subsample'],
                        min_child_samples = int(best_params['min_child_samples']),
                        colsample_bytree=best_params['colsample_bytree'],
                        reg_alpha = best_params['reg_alpha'],
                        reg_lambda = best_params['reg_lambda'],
                               silent = True)

            # I used early stopping to help prevent overfitting the model on the training set (stops the model once it's performance doesn't
            # improve for 5 iterations on the validation set)
            lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2',
             early_stopping_rounds = 5)

            
            # For early stopping (i.e. early stopping at 10 rounds with no improvement to cv)
            # The model that displayed the score 10 rounds ago does not exist anymore (a few trees have been dropped).
            # To get the best score you got 20 rounds ago, you have no option but to retrain a model with the appropriate number of iterations.
#             https://www.kaggle.com/c/microsoft-malware-prediction/discussion/78253
            if boosting_type=='dart':
        
                evaluation_score = lgbm.evals_result_

                dart_best_iteration = evaluation_score['valid_0']['l2'].index(min(evaluation_score['valid_0']['l2']))

                lgbm=LGBMRegressor(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                           n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                                num_iteration = dart_best_iteration,
                                boosting_type='dart', 
                                subsample = best_params['subsample'],
                                min_child_samples = int(best_params['min_child_samples']),
                                colsample_bytree=best_params['colsample_bytree'],
                                reg_alpha = best_params['reg_alpha'],
                                reg_lambda = best_params['reg_lambda'],
                                       silent = True)
                lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2')


            y_pred_train = lgbm.predict(X_train_no_na)
            y_pred_all = lgbm.predict(x_leaderboard)
            cv_preds = lgbm.predict(X_CV_no_na)
            # Save predictions of best model for leaderboard submition
            pred_all_backgrounds = lgbm.predict(background_imputed_tot)

        else:
            #         'gbdt', 'goss', 'dart', 'rf'
            params = optimize_gbm_classifier(X_train_no_na, y_train_no_na,boosting_type)
            best_params = params['params']  
            print(0)
            # Validation accuracy

            lgbm=LGBMClassifier(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                       n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                        num_iteration = int(best_params['num_iteration']),
                        boosting_type=boosting_type,
                        subsample = best_params['subsample'],
                        min_child_samples = int(best_params['min_child_samples']),
                        colsample_bytree=best_params['colsample_bytree'],
                        reg_alpha = best_params['reg_alpha'],
                        reg_lambda = best_params['reg_lambda'],
                        class_weight = 'balanced',
                        silent = True)
    #         lgbm.fit(X_train_no_na, y_train_no_na,eval_metric='l1')
            lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2',
                 early_stopping_rounds = 10)
        
            if boosting_type=='dart':
                
                evaluation_score = lgbm.evals_result_

                dart_best_iteration = evaluation_score['valid_0']['l2'].index(min(evaluation_score['valid_0']['l2'])) + 1

                lgbm=LGBMClassifier(learning_rate = best_params['learning_rate'], max_depth =  int(best_params['max_depth']),
                           n_estimators = int(best_params['n_estimators']), num_leaves = int(best_params['num_leaves']),
                                num_iteration = dart_best_iteration,
                                boosting_type='dart', 
                                subsample = best_params['subsample'],
                                min_child_samples = int(best_params['min_child_samples']),
                                colsample_bytree=best_params['colsample_bytree'],
                                reg_alpha = best_params['reg_alpha'],
                                reg_lambda = best_params['reg_lambda'],
                                class_weight = 'balanced',
                                       silent = True)
                lgbm.fit(X_train_no_na, y_train_no_na,eval_set = (X_CV_no_na,y_CV_no_na), eval_metric='l2')
            
            
            # Calibrate after training the model on the best parameters using a sigmoid calibration function
            calibrated = CalibratedClassifierCV(lgbm, cv='prefit', method='sigmoid')
            calibrated.fit(X_CV_no_na, y_CV_no_na)

            y_pred_train = calibrated.predict_proba(X_train_no_na)[:,1]
            y_pred_all = calibrated.predict_proba(x_leaderboard)[:,1]
            cv_preds = calibrated.predict_proba(X_CV_no_na)[:,1]
                        # Save predictions of best model for leaderboard submition
            pred_all_backgrounds =calibrated.predict_proba(background_imputed_tot)[:,1]


#             y_pred_train = lgbm.predict_proba(X_train_no_na)[:,1]
#             y_pred_all = lgbm.predict_proba(x_leaderboard)[:,1]
#             cv_preds = lgbm.predict_proba(X_CV_no_na)[:,1]
#             # Save predictions of best model for leaderboard submition
#             pred_all_backgrounds = lgbm.predict_proba(background_imputed_tot)[:,1]

        best_params_lgbm[variable] = best_params


        # Can use mean square error as performance metric for both, since for 1 output, the mean square error and brier loss are the same.
        
        error_types = dict()  
        print('---- Train error ----')
        error_types['train'] = mean_squared_error(y_train_no_na,y_pred_train)
        print(mean_squared_error(y_train_no_na,y_pred_train))
        print('---- CV error ----')
        error_types['CV'] = mean_squared_error(y_CV_no_na,cv_preds)
        print(mean_squared_error(y_CV_no_na,cv_preds))

        print('---- leaderboard stats ----')
        print(r2_score(y_leaderboard[~mask],y_pred_all[~mask]))
        
        error_types['leaderboard'] = mean_squared_error(y_leaderboard[~mask],y_pred_all[~mask])
        print(mean_squared_error(y_leaderboard[~mask],y_pred_all[~mask]))
        
        lgbm_accuracies[variable] = error_types

        # lgbm=LGBMClassifier(learning_rate = 0.05, n_estimators = len(background.columns), num_leaves = 32)


        # Save predictions of best model for leaderboard submition

        prediction[variable] = pred_all_backgrounds


    best_params_each_model[boosting_type] = best_params_lgbm
    accuracies_each_model[boosting_type] = lgbm_accuracies
    #     print(pred_all_backgrounds==cv_preds)


    prediction.to_csv('lgbm_preds_' + boosting_type + '.csv')
    print(datetime.now()-start)
    print('\n\n\n------------------------------------------')
best_params_each_model_weighted_copy = best_params_each_model.copy()

-------------  gbdt  --------------


eviction
|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | n_esti... | num_it... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m-0.05957 [0m | [0m 0.9648  [0m | [0m 0.07011 [0m | [0m 3.575   [0m | [0m 48.87   [0m | [0m 572.0   [0m | [0m 139.3   [0m | [0m 29.01   [0m | [0m 0.6532  [0m | [0m 0.7489  [0m | [0m 0.8268  [0m |
| [0m 2       [0m | [0m-0.0814  [0m | [0m 0.8739  [0m | [0m 0.1926  [0m | [0m 1.117   [0m | [0m 30.22   [0m | [0m 305.7   [0m | [0m 148.5   [0m | [0m 24.67   [0m | [0m 0.8722  [0m | [0m 0.9646  [0m | [0m 0.8618  [0m |
| [95m 3       [0m | [95m-0.05879 [0m | [95m 0.8212  [0m | [95m 0.1463  [0m | [95m 7.546   [0m | [95m 71.86   [0m | [95m 445.2   [0m | [95m 159.5   [0m | [95m 2



[1]	valid_0's l2: 0.196833	valid_0's binary_logloss: 0.585557
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.159369	valid_0's binary_logloss: 0.505307
[3]	valid_0's l2: 0.132265	valid_0's binary_logloss: 0.443251
[4]	valid_0's l2: 0.110814	valid_0's binary_logloss: 0.391198
[5]	valid_0's l2: 0.0977501	valid_0's binary_logloss: 0.355265
[6]	valid_0's l2: 0.0860269	valid_0's binary_logloss: 0.321635
[7]	valid_0's l2: 0.0801892	valid_0's binary_logloss: 0.301981
[8]	valid_0's l2: 0.0743954	valid_0's binary_logloss: 0.282345
[9]	valid_0's l2: 0.068289	valid_0's binary_logloss: 0.263569
[10]	valid_0's l2: 0.0635337	valid_0's binary_logloss: 0.248025
[11]	valid_0's l2: 0.0612069	valid_0's binary_logloss: 0.238546
[12]	valid_0's l2: 0.0597573	valid_0's binary_logloss: 0.231027
[13]	valid_0's l2: 0.0584132	valid_0's binary_logloss: 0.223961
[14]	valid_0's l2: 0.056505	valid_0's binary_logloss: 0.216696
[15]	valid_0's l2: 0.0543107	valid_0's binary_logloss: 0.2



[1]	valid_0's l2: 0.246323	valid_0's binary_logloss: 0.685778
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.242177	valid_0's binary_logloss: 0.677442
[3]	valid_0's l2: 0.23848	valid_0's binary_logloss: 0.669953
[4]	valid_0's l2: 0.238591	valid_0's binary_logloss: 0.670141
[5]	valid_0's l2: 0.235823	valid_0's binary_logloss: 0.664422
[6]	valid_0's l2: 0.23212	valid_0's binary_logloss: 0.656786
[7]	valid_0's l2: 0.231657	valid_0's binary_logloss: 0.655802
[8]	valid_0's l2: 0.229908	valid_0's binary_logloss: 0.652061
[9]	valid_0's l2: 0.229266	valid_0's binary_logloss: 0.65065
[10]	valid_0's l2: 0.227135	valid_0's binary_logloss: 0.646194
[11]	valid_0's l2: 0.225432	valid_0's binary_logloss: 0.642769
[12]	valid_0's l2: 0.221741	valid_0's binary_logloss: 0.634908
[13]	valid_0's l2: 0.216964	valid_0's binary_logloss: 0.624762
[14]	valid_0's l2: 0.214077	valid_0's binary_logloss: 0.618468
[15]	valid_0's l2: 0.214203	valid_0's binary_logloss: 0.618388
[16]	v



[1]	valid_0's l2: 0.24608	valid_0's binary_logloss: 0.685303
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.2415	valid_0's binary_logloss: 0.67612
[3]	valid_0's l2: 0.23965	valid_0's binary_logloss: 0.672396
[4]	valid_0's l2: 0.236991	valid_0's binary_logloss: 0.667035
[5]	valid_0's l2: 0.234162	valid_0's binary_logloss: 0.661325
[6]	valid_0's l2: 0.231502	valid_0's binary_logloss: 0.655944
[7]	valid_0's l2: 0.228652	valid_0's binary_logloss: 0.650149
[8]	valid_0's l2: 0.227125	valid_0's binary_logloss: 0.646998
[9]	valid_0's l2: 0.223975	valid_0's binary_logloss: 0.640572
[10]	valid_0's l2: 0.22203	valid_0's binary_logloss: 0.636511
[11]	valid_0's l2: 0.221048	valid_0's binary_logloss: 0.634426
[12]	valid_0's l2: 0.218793	valid_0's binary_logloss: 0.629716
[13]	valid_0's l2: 0.217476	valid_0's binary_logloss: 0.626946
[14]	valid_0's l2: 0.216228	valid_0's binary_logloss: 0.624284
[15]	valid_0's l2: 0.215124	valid_0's binary_logloss: 0.621884
[16]	vali



[1]	valid_0's l2: 0.210235	valid_0's binary_logloss: 0.612659
[2]	valid_0's l2: 0.181005	valid_0's binary_logloss: 0.550685
[3]	valid_0's l2: 0.166278	valid_0's binary_logloss: 0.516942
[4]	valid_0's l2: 0.148456	valid_0's binary_logloss: 0.475951
[5]	valid_0's l2: 0.136093	valid_0's binary_logloss: 0.444722
[6]	valid_0's l2: 0.125422	valid_0's binary_logloss: 0.417113
[7]	valid_0's l2: 0.114998	valid_0's binary_logloss: 0.389389
[8]	valid_0's l2: 0.117498	valid_0's binary_logloss: 0.397342
[9]	valid_0's l2: 0.107256	valid_0's binary_logloss: 0.370104
[10]	valid_0's l2: 0.0979871	valid_0's binary_logloss: 0.344938
[11]	valid_0's l2: 0.0922764	valid_0's binary_logloss: 0.327595
[12]	valid_0's l2: 0.0928769	valid_0's binary_logloss: 0.331491




[13]	valid_0's l2: 0.0866294	valid_0's binary_logloss: 0.313357
[14]	valid_0's l2: 0.0826975	valid_0's binary_logloss: 0.300983
[15]	valid_0's l2: 0.0805447	valid_0's binary_logloss: 0.293602
[16]	valid_0's l2: 0.0778621	valid_0's binary_logloss: 0.284266
[17]	valid_0's l2: 0.0755736	valid_0's binary_logloss: 0.274659
[18]	valid_0's l2: 0.0732193	valid_0's binary_logloss: 0.26754
[19]	valid_0's l2: 0.0701364	valid_0's binary_logloss: 0.259314
[20]	valid_0's l2: 0.0680118	valid_0's binary_logloss: 0.25209
[21]	valid_0's l2: 0.068207	valid_0's binary_logloss: 0.253578
[22]	valid_0's l2: 0.0669554	valid_0's binary_logloss: 0.249244
[23]	valid_0's l2: 0.0649087	valid_0's binary_logloss: 0.242848
[24]	valid_0's l2: 0.0625783	valid_0's binary_logloss: 0.235639
[25]	valid_0's l2: 0.0618186	valid_0's binary_logloss: 0.231521
[26]	valid_0's l2: 0.060137	valid_0's binary_logloss: 0.227057
[27]	valid_0's l2: 0.0593525	valid_0's binary_logloss: 0.223351
[28]	valid_0's l2: 0.0593492	valid_0's binar



[1]	valid_0's l2: 0.210235	valid_0's binary_logloss: 0.612659
[2]	valid_0's l2: 0.181005	valid_0's binary_logloss: 0.550685
[3]	valid_0's l2: 0.166278	valid_0's binary_logloss: 0.516942
[4]	valid_0's l2: 0.148456	valid_0's binary_logloss: 0.475951
[5]	valid_0's l2: 0.136093	valid_0's binary_logloss: 0.444722
[6]	valid_0's l2: 0.125422	valid_0's binary_logloss: 0.417113
[7]	valid_0's l2: 0.114998	valid_0's binary_logloss: 0.389389
[8]	valid_0's l2: 0.117498	valid_0's binary_logloss: 0.397342
[9]	valid_0's l2: 0.107256	valid_0's binary_logloss: 0.370104
[10]	valid_0's l2: 0.0979871	valid_0's binary_logloss: 0.344938
[11]	valid_0's l2: 0.0922764	valid_0's binary_logloss: 0.327595
[12]	valid_0's l2: 0.0928769	valid_0's binary_logloss: 0.331491
[13]	valid_0's l2: 0.0866294	valid_0's binary_logloss: 0.313357
[14]	valid_0's l2: 0.0826975	valid_0's binary_logloss: 0.300983
[15]	valid_0's l2: 0.0805447	valid_0's binary_logloss: 0.293602
[16]	valid_0's l2: 0.0778621	valid_0's binary_logloss: 0.2



[1]	valid_0's l2: 0.239215	valid_0's binary_logloss: 0.671271
[2]	valid_0's l2: 0.229989	valid_0's binary_logloss: 0.652191
[3]	valid_0's l2: 0.227566	valid_0's binary_logloss: 0.646553
[4]	valid_0's l2: 0.227117	valid_0's binary_logloss: 0.64475
[5]	valid_0's l2: 0.219246	valid_0's binary_logloss: 0.628301
[6]	valid_0's l2: 0.212807	valid_0's binary_logloss: 0.614406
[7]	valid_0's l2: 0.207795	valid_0's binary_logloss: 0.604323
[8]	valid_0's l2: 0.210239	valid_0's binary_logloss: 0.60974
[9]	valid_0's l2: 0.208312	valid_0's binary_logloss: 0.605209




[10]	valid_0's l2: 0.200512	valid_0's binary_logloss: 0.588141
[11]	valid_0's l2: 0.197684	valid_0's binary_logloss: 0.580827
[12]	valid_0's l2: 0.196725	valid_0's binary_logloss: 0.579384
[13]	valid_0's l2: 0.191566	valid_0's binary_logloss: 0.568209
[14]	valid_0's l2: 0.189067	valid_0's binary_logloss: 0.561858
[15]	valid_0's l2: 0.18596	valid_0's binary_logloss: 0.55384
[16]	valid_0's l2: 0.187686	valid_0's binary_logloss: 0.556836
[17]	valid_0's l2: 0.1871	valid_0's binary_logloss: 0.554754
[18]	valid_0's l2: 0.183839	valid_0's binary_logloss: 0.548162
[19]	valid_0's l2: 0.181795	valid_0's binary_logloss: 0.543169
[20]	valid_0's l2: 0.180738	valid_0's binary_logloss: 0.54171
[21]	valid_0's l2: 0.180215	valid_0's binary_logloss: 0.540635
[22]	valid_0's l2: 0.18061	valid_0's binary_logloss: 0.541834
[23]	valid_0's l2: 0.18177	valid_0's binary_logloss: 0.545067
[24]	valid_0's l2: 0.182703	valid_0's binary_logloss: 0.54572
[25]	valid_0's l2: 0.181199	valid_0's binary_logloss: 0.543306



[1]	valid_0's l2: 0.239215	valid_0's binary_logloss: 0.671271
[2]	valid_0's l2: 0.229989	valid_0's binary_logloss: 0.652191
[3]	valid_0's l2: 0.227566	valid_0's binary_logloss: 0.646553
[4]	valid_0's l2: 0.227117	valid_0's binary_logloss: 0.64475
[5]	valid_0's l2: 0.219246	valid_0's binary_logloss: 0.628301
[6]	valid_0's l2: 0.212807	valid_0's binary_logloss: 0.614406
[7]	valid_0's l2: 0.207795	valid_0's binary_logloss: 0.604323
[8]	valid_0's l2: 0.210239	valid_0's binary_logloss: 0.60974
[9]	valid_0's l2: 0.208312	valid_0's binary_logloss: 0.605209
[10]	valid_0's l2: 0.200512	valid_0's binary_logloss: 0.588141
[11]	valid_0's l2: 0.197684	valid_0's binary_logloss: 0.580827
[12]	valid_0's l2: 0.196725	valid_0's binary_logloss: 0.579384
[13]	valid_0's l2: 0.191566	valid_0's binary_logloss: 0.568209
[14]	valid_0's l2: 0.189067	valid_0's binary_logloss: 0.561858
[15]	valid_0's l2: 0.18596	valid_0's binary_logloss: 0.55384
[16]	valid_0's l2: 0.187686	valid_0's binary_logloss: 0.556836
[17]	



[1]	valid_0's l2: 0.242196	valid_0's binary_logloss: 0.67746
[2]	valid_0's l2: 0.230647	valid_0's binary_logloss: 0.654032
[3]	valid_0's l2: 0.226672	valid_0's binary_logloss: 0.645641
[4]	valid_0's l2: 0.22246	valid_0's binary_logloss: 0.636656
[5]	valid_0's l2: 0.218525	valid_0's binary_logloss: 0.628344
[6]	valid_0's l2: 0.213842	valid_0's binary_logloss: 0.618447




[7]	valid_0's l2: 0.210662	valid_0's binary_logloss: 0.611071
[8]	valid_0's l2: 0.212075	valid_0's binary_logloss: 0.614402
[9]	valid_0's l2: 0.207554	valid_0's binary_logloss: 0.604769
[10]	valid_0's l2: 0.20383	valid_0's binary_logloss: 0.596043
[11]	valid_0's l2: 0.199534	valid_0's binary_logloss: 0.586442
[12]	valid_0's l2: 0.199917	valid_0's binary_logloss: 0.587303
[13]	valid_0's l2: 0.197079	valid_0's binary_logloss: 0.580255
[14]	valid_0's l2: 0.196729	valid_0's binary_logloss: 0.579208
[15]	valid_0's l2: 0.193741	valid_0's binary_logloss: 0.572818
[16]	valid_0's l2: 0.190902	valid_0's binary_logloss: 0.566285
[17]	valid_0's l2: 0.189549	valid_0's binary_logloss: 0.562375
[18]	valid_0's l2: 0.188065	valid_0's binary_logloss: 0.558755
[19]	valid_0's l2: 0.187298	valid_0's binary_logloss: 0.556983
[20]	valid_0's l2: 0.18803	valid_0's binary_logloss: 0.558454
[21]	valid_0's l2: 0.187225	valid_0's binary_logloss: 0.556562
[22]	valid_0's l2: 0.184666	valid_0's binary_logloss: 0.550



[1]	valid_0's l2: 0.242196	valid_0's binary_logloss: 0.67746
[2]	valid_0's l2: 0.230647	valid_0's binary_logloss: 0.654032
[3]	valid_0's l2: 0.226672	valid_0's binary_logloss: 0.645641
[4]	valid_0's l2: 0.22246	valid_0's binary_logloss: 0.636656
[5]	valid_0's l2: 0.218525	valid_0's binary_logloss: 0.628344
[6]	valid_0's l2: 0.213842	valid_0's binary_logloss: 0.618447
[7]	valid_0's l2: 0.210662	valid_0's binary_logloss: 0.611071
[8]	valid_0's l2: 0.212075	valid_0's binary_logloss: 0.614402
[9]	valid_0's l2: 0.207554	valid_0's binary_logloss: 0.604769
[10]	valid_0's l2: 0.20383	valid_0's binary_logloss: 0.596043
[11]	valid_0's l2: 0.199534	valid_0's binary_logloss: 0.586442
[12]	valid_0's l2: 0.199917	valid_0's binary_logloss: 0.587303
[13]	valid_0's l2: 0.197079	valid_0's binary_logloss: 0.580255
[14]	valid_0's l2: 0.196729	valid_0's binary_logloss: 0.579208
[15]	valid_0's l2: 0.193741	valid_0's binary_logloss: 0.572818
[16]	valid_0's l2: 0.190902	valid_0's binary_logloss: 0.566285
[17]



[1]	valid_0's l2: 0.238991	valid_0's binary_logloss: 0.671065
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.227768	valid_0's binary_logloss: 0.648318
[3]	valid_0's l2: 0.218474	valid_0's binary_logloss: 0.629025
[4]	valid_0's l2: 0.209854	valid_0's binary_logloss: 0.610995
[5]	valid_0's l2: 0.205295	valid_0's binary_logloss: 0.601279
[6]	valid_0's l2: 0.200619	valid_0's binary_logloss: 0.591214
[7]	valid_0's l2: 0.194947	valid_0's binary_logloss: 0.579028
[8]	valid_0's l2: 0.186722	valid_0's binary_logloss: 0.561079
[9]	valid_0's l2: 0.179853	valid_0's binary_logloss: 0.54572
[10]	valid_0's l2: 0.176105	valid_0's binary_logloss: 0.536974
[11]	valid_0's l2: 0.172306	valid_0's binary_logloss: 0.528337
[12]	valid_0's l2: 0.171163	valid_0's binary_logloss: 0.525414
[13]	valid_0's l2: 0.167764	valid_0's binary_logloss: 0.517788
[14]	valid_0's l2: 0.166181	valid_0's binary_logloss: 0.513851
[15]	valid_0's l2: 0.165007	valid_0's binary_logloss: 0.510892
[16]



[1]	valid_0's l2: 0.247972	valid_0's binary_logloss: 0.689062
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.240837	valid_0's binary_logloss: 0.674676
[3]	valid_0's l2: 0.2399	valid_0's binary_logloss: 0.672653
[4]	valid_0's l2: 0.236051	valid_0's binary_logloss: 0.664889
[5]	valid_0's l2: 0.231613	valid_0's binary_logloss: 0.655764
[6]	valid_0's l2: 0.227722	valid_0's binary_logloss: 0.647446
[7]	valid_0's l2: 0.223672	valid_0's binary_logloss: 0.639138
[8]	valid_0's l2: 0.222018	valid_0's binary_logloss: 0.635341
[9]	valid_0's l2: 0.221701	valid_0's binary_logloss: 0.634902
[10]	valid_0's l2: 0.227426	valid_0's binary_logloss: 0.646666
[11]	valid_0's l2: 0.234005	valid_0's binary_logloss: 0.660216
[12]	valid_0's l2: 0.237167	valid_0's binary_logloss: 0.666965
[13]	valid_0's l2: 0.241224	valid_0's binary_logloss: 0.675662
[14]	valid_0's l2: 0.246094	valid_0's binary_logloss: 0.68642
[15]	valid_0's l2: 0.250488	valid_0's binary_logloss: 0.696419
[16]	v



[1]	valid_0's l2: 0.245308	valid_0's binary_logloss: 0.683734
Training until validation scores don't improve for 10 rounds
[2]	valid_0's l2: 0.238096	valid_0's binary_logloss: 0.669196
[3]	valid_0's l2: 0.231505	valid_0's binary_logloss: 0.655743
[4]	valid_0's l2: 0.226697	valid_0's binary_logloss: 0.645817
[5]	valid_0's l2: 0.223452	valid_0's binary_logloss: 0.638997
[6]	valid_0's l2: 0.219753	valid_0's binary_logloss: 0.631204
[7]	valid_0's l2: 0.215409	valid_0's binary_logloss: 0.622165
[8]	valid_0's l2: 0.213482	valid_0's binary_logloss: 0.618025
[9]	valid_0's l2: 0.211757	valid_0's binary_logloss: 0.61416
[10]	valid_0's l2: 0.21688	valid_0's binary_logloss: 0.624879
[11]	valid_0's l2: 0.218807	valid_0's binary_logloss: 0.628717
[12]	valid_0's l2: 0.22171	valid_0's binary_logloss: 0.634949
[13]	valid_0's l2: 0.223636	valid_0's binary_logloss: 0.638757
[14]	valid_0's l2: 0.22591	valid_0's binary_logloss: 0.64348
[15]	valid_0's l2: 0.223654	valid_0's binary_logloss: 0.638227
[16]	val

In [10]:
import pickle

# Save models and params and accuracies
with open('best_params_trees_weighted.pickle', 'wb') as handle:
    pickle.dump(best_params_each_model_weighted_copy, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('best_accs_trees_weighted.pickle', 'wb') as handle:
    pickle.dump(accuracies_each_model, handle, protocol=pickle.HIGHEST_PROTOCOL)

# best_params_each_model_no_sampling_copy
# accuracies_each_model_no_sampling_copy

In [11]:

lst_leader_board_accs = list()
for model_name in accuracies_each_model:
    for var_name in accuracies_each_model[model_name]:
        lst_leader_board_accs.append(var_name + ' ' + model_name + ' ' +  str(accuracies_each_model[model_name][var_name]['leaderboard']))

sorted(lst_leader_board_accs)

['eviction dart 0.05315720683920307',
 'eviction gbdt 0.05256860701916669',
 'eviction goss 0.052061264545674536',
 'jobTraining dart 0.2118030106777266',
 'jobTraining gbdt 0.21187664440564227',
 'jobTraining goss 0.20413801623096692',
 'layoff dart 0.17664021831573956',
 'layoff gbdt 0.17505472493917246',
 'layoff goss 0.17617282798869383']