In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBClassifier
from itertools import product
from collections import defaultdict

  from pandas import MultiIndex, Int64Index


In [3]:
from sklearn.model_selection import train_test_split

In [1]:
def prep(series):
    for c in 'ABTCE':    
        series[c+'_count'] = series['f_27'].str.count(c)
    for i in range(10):
        series['pos'+str(i)] = series['f_27'].str.get(i).apply(ord) - ord('A')
    series['unique'] = series['f_27'].apply(lambda s:len(set(s)))
    series['i_02_21'] = (series.f_21 + series.f_02 > 5.2).astype(int) - (series.f_21 + series.f_02 < -5.3).astype(int)
    series['i_05_22'] = (series.f_22 + series.f_05 > 5.1).astype(int) - (series.f_22 + series.f_05 < -5.4).astype(int)
    i_00_01_26 = series.f_00 + series.f_01 + series.f_26
    series['i_00_01_26'] = (i_00_01_26 > 5.0).astype(int) - (i_00_01_26 < -5.0).astype(int)
    series['f_28'] = series['f_28']/100
    return series.drop(columns=['f_27'])

In [4]:
train_series = prep(pd.read_csv('train.csv'))
valid_series = prep(pd.read_csv('test.csv'))

In [5]:
X = train_series.drop(columns=['id','target']).values
y = train_series['target'].values
X_valid = valid_series.drop(columns=['id']).values

In [7]:
X_train, X_holdout, y_train, y_holdout = train_test_split(X, y, test_size=0.1, random_state=123)

In [8]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)

In [9]:
params = {'gamma': [0],
          'max_depth': [6],
          'min_child_weight': [0.0],
          'n_estimators': [10000],
          'early_stopping_rounds': [50],
          'learning_rate': [0.3]}

In [10]:
def train_xgboost(params, X, y, obj="binary:logistic"):
    param_fit, param_booster = {}, {}
    for key, val in params.items():
        if (key == 'early_stopping_rounds'):
            param_fit[key] = val[0]
        else:
            param_booster[key] = val[0]
    param_booster['random_state'] = 123
    param_fit['eval_metric'] = 'auc'
    param_n_estimators_sv = param_booster['n_estimators']
    
    clfs = []
    for train_idx, test_idx in kf.split(X, y):
        
        print(param_booster)
        
        clf = XGBClassifier(**param_booster, objective=obj, use_label_encoder=False)
        X_train, y_train = X[train_idx], y[train_idx]
        X_test, y_test = X[test_idx], y[test_idx]
        param_fit['eval_set'] = [(X_test, y_test)] 

        
        clf.fit(X_train, y_train, **param_fit, verbose=True)
        if hasattr(clf, 'best_ntree_limit'):
            param_booster['n_estimators'] = clf.best_ntree_limit
            print(f'retrain with {param_booster["n_estimators"]}')
            clf = XGBClassifier(**param_booster, objective=obj, use_label_encoder=False)
            clf.fit(X_train, y_train, **param_fit, verbose=True)
            param_booster['n_estimators'] = param_n_estimators_sv
        clfs.append(clf)
    
    return clfs

In [11]:
clfs = train_xgboost(params, X_train, y_train)

{'gamma': 0, 'max_depth': 6, 'min_child_weight': 0.0, 'n_estimators': 10000, 'learning_rate': 0.3, 'random_state': 123}
[0]	validation_0-auc:0.66146
[1]	validation_0-auc:0.68804
[2]	validation_0-auc:0.85083
[3]	validation_0-auc:0.87242
[4]	validation_0-auc:0.88506
[5]	validation_0-auc:0.89404
[6]	validation_0-auc:0.92168
[7]	validation_0-auc:0.92684
[8]	validation_0-auc:0.93382
[9]	validation_0-auc:0.93699
[10]	validation_0-auc:0.94330
[11]	validation_0-auc:0.94393
[12]	validation_0-auc:0.95061
[13]	validation_0-auc:0.95149
[14]	validation_0-auc:0.95544
[15]	validation_0-auc:0.95870
[16]	validation_0-auc:0.95930
[17]	validation_0-auc:0.96035
[18]	validation_0-auc:0.96357
[19]	validation_0-auc:0.96558
[20]	validation_0-auc:0.96657
[21]	validation_0-auc:0.96688
[22]	validation_0-auc:0.96820
[23]	validation_0-auc:0.96885
[24]	validation_0-auc:0.97072
[25]	validation_0-auc:0.97090
[26]	validation_0-auc:0.97171
[27]	validation_0-auc:0.97296
[28]	validation_0-auc:0.97334
[29]	validation_0-au

[264]	validation_0-auc:0.99585
[265]	validation_0-auc:0.99585
[266]	validation_0-auc:0.99587
[267]	validation_0-auc:0.99587
[268]	validation_0-auc:0.99589
[269]	validation_0-auc:0.99590
[270]	validation_0-auc:0.99591
[271]	validation_0-auc:0.99591
[272]	validation_0-auc:0.99592
[273]	validation_0-auc:0.99593
[274]	validation_0-auc:0.99593
[275]	validation_0-auc:0.99595
[276]	validation_0-auc:0.99595
[277]	validation_0-auc:0.99596
[278]	validation_0-auc:0.99602
[279]	validation_0-auc:0.99601
[280]	validation_0-auc:0.99602
[281]	validation_0-auc:0.99605
[282]	validation_0-auc:0.99607
[283]	validation_0-auc:0.99611
[284]	validation_0-auc:0.99614
[285]	validation_0-auc:0.99614
[286]	validation_0-auc:0.99615
[287]	validation_0-auc:0.99618
[288]	validation_0-auc:0.99621
[289]	validation_0-auc:0.99622
[290]	validation_0-auc:0.99624
[291]	validation_0-auc:0.99626
[292]	validation_0-auc:0.99627
[293]	validation_0-auc:0.99626
[294]	validation_0-auc:0.99626
[295]	validation_0-auc:0.99627
[296]	va

[529]	validation_0-auc:0.99695
[530]	validation_0-auc:0.99695
[531]	validation_0-auc:0.99694
[532]	validation_0-auc:0.99694
[533]	validation_0-auc:0.99695
[534]	validation_0-auc:0.99696
[535]	validation_0-auc:0.99697
[536]	validation_0-auc:0.99697
[537]	validation_0-auc:0.99697
[538]	validation_0-auc:0.99698
[539]	validation_0-auc:0.99698
[540]	validation_0-auc:0.99698
[541]	validation_0-auc:0.99698
[542]	validation_0-auc:0.99698
[543]	validation_0-auc:0.99698
[544]	validation_0-auc:0.99699
[545]	validation_0-auc:0.99699
[546]	validation_0-auc:0.99700
[547]	validation_0-auc:0.99700
[548]	validation_0-auc:0.99700
[549]	validation_0-auc:0.99700
[550]	validation_0-auc:0.99700
[551]	validation_0-auc:0.99699
[552]	validation_0-auc:0.99699
[553]	validation_0-auc:0.99699
[554]	validation_0-auc:0.99699
[555]	validation_0-auc:0.99699
[556]	validation_0-auc:0.99699
[557]	validation_0-auc:0.99699
[558]	validation_0-auc:0.99699
[559]	validation_0-auc:0.99699
[560]	validation_0-auc:0.99699
[561]	va

[21]	validation_0-auc:0.96688
[22]	validation_0-auc:0.96820
[23]	validation_0-auc:0.96885
[24]	validation_0-auc:0.97072
[25]	validation_0-auc:0.97090
[26]	validation_0-auc:0.97171
[27]	validation_0-auc:0.97296
[28]	validation_0-auc:0.97334
[29]	validation_0-auc:0.97382
[30]	validation_0-auc:0.97504
[31]	validation_0-auc:0.97539
[32]	validation_0-auc:0.97651
[33]	validation_0-auc:0.97670
[34]	validation_0-auc:0.97711
[35]	validation_0-auc:0.97745
[36]	validation_0-auc:0.97851
[37]	validation_0-auc:0.97865
[38]	validation_0-auc:0.97911
[39]	validation_0-auc:0.97939
[40]	validation_0-auc:0.97949
[41]	validation_0-auc:0.97975
[42]	validation_0-auc:0.98012
[43]	validation_0-auc:0.98045
[44]	validation_0-auc:0.98059
[45]	validation_0-auc:0.98115
[46]	validation_0-auc:0.98202
[47]	validation_0-auc:0.98210
[48]	validation_0-auc:0.98232
[49]	validation_0-auc:0.98277
[50]	validation_0-auc:0.98285
[51]	validation_0-auc:0.98318
[52]	validation_0-auc:0.98330
[53]	validation_0-auc:0.98343
[54]	valid

[288]	validation_0-auc:0.99621
[289]	validation_0-auc:0.99622
[290]	validation_0-auc:0.99624
[291]	validation_0-auc:0.99626
[292]	validation_0-auc:0.99627
[293]	validation_0-auc:0.99626
[294]	validation_0-auc:0.99626
[295]	validation_0-auc:0.99627
[296]	validation_0-auc:0.99628
[297]	validation_0-auc:0.99630
[298]	validation_0-auc:0.99632
[299]	validation_0-auc:0.99634
[300]	validation_0-auc:0.99634
[301]	validation_0-auc:0.99634
[302]	validation_0-auc:0.99634
[303]	validation_0-auc:0.99634
[304]	validation_0-auc:0.99634
[305]	validation_0-auc:0.99634
[306]	validation_0-auc:0.99634
[307]	validation_0-auc:0.99634
[308]	validation_0-auc:0.99634
[309]	validation_0-auc:0.99634
[310]	validation_0-auc:0.99635
[311]	validation_0-auc:0.99635
[312]	validation_0-auc:0.99635
[313]	validation_0-auc:0.99635
[314]	validation_0-auc:0.99635
[315]	validation_0-auc:0.99635
[316]	validation_0-auc:0.99635
[317]	validation_0-auc:0.99635
[318]	validation_0-auc:0.99636
[319]	validation_0-auc:0.99638
[320]	va

[553]	validation_0-auc:0.99699
[554]	validation_0-auc:0.99699
[555]	validation_0-auc:0.99699
[556]	validation_0-auc:0.99699
[557]	validation_0-auc:0.99699
[558]	validation_0-auc:0.99699
[559]	validation_0-auc:0.99699
[560]	validation_0-auc:0.99699
[561]	validation_0-auc:0.99699
[562]	validation_0-auc:0.99699
[563]	validation_0-auc:0.99699
[564]	validation_0-auc:0.99699
[565]	validation_0-auc:0.99699
[566]	validation_0-auc:0.99699
[567]	validation_0-auc:0.99699
[568]	validation_0-auc:0.99699
[569]	validation_0-auc:0.99699
[570]	validation_0-auc:0.99699
[571]	validation_0-auc:0.99699
[572]	validation_0-auc:0.99699
[573]	validation_0-auc:0.99699
[574]	validation_0-auc:0.99699
[575]	validation_0-auc:0.99699
[576]	validation_0-auc:0.99698
[577]	validation_0-auc:0.99699
[578]	validation_0-auc:0.99699
[579]	validation_0-auc:0.99699
[580]	validation_0-auc:0.99699
[581]	validation_0-auc:0.99698
[582]	validation_0-auc:0.99698
[583]	validation_0-auc:0.99698
[584]	validation_0-auc:0.99699
[585]	va

[93]	validation_0-auc:0.98811
[94]	validation_0-auc:0.98811
[95]	validation_0-auc:0.98813
[96]	validation_0-auc:0.98823
[97]	validation_0-auc:0.98835
[98]	validation_0-auc:0.98842
[99]	validation_0-auc:0.98851
[100]	validation_0-auc:0.98852
[101]	validation_0-auc:0.98854
[102]	validation_0-auc:0.98853
[103]	validation_0-auc:0.98866
[104]	validation_0-auc:0.98912
[105]	validation_0-auc:0.98913
[106]	validation_0-auc:0.98941
[107]	validation_0-auc:0.98943
[108]	validation_0-auc:0.98942
[109]	validation_0-auc:0.98942
[110]	validation_0-auc:0.98944
[111]	validation_0-auc:0.98950
[112]	validation_0-auc:0.98955
[113]	validation_0-auc:0.98970
[114]	validation_0-auc:0.98974
[115]	validation_0-auc:0.98972
[116]	validation_0-auc:0.98973
[117]	validation_0-auc:0.98985
[118]	validation_0-auc:0.98985
[119]	validation_0-auc:0.98987
[120]	validation_0-auc:0.98991
[121]	validation_0-auc:0.99000
[122]	validation_0-auc:0.99003
[123]	validation_0-auc:0.99005
[124]	validation_0-auc:0.99009
[125]	validatio

[358]	validation_0-auc:0.99502
[359]	validation_0-auc:0.99502
[360]	validation_0-auc:0.99502
[361]	validation_0-auc:0.99502
[362]	validation_0-auc:0.99502
[363]	validation_0-auc:0.99503
[364]	validation_0-auc:0.99504
[365]	validation_0-auc:0.99504
[366]	validation_0-auc:0.99504
[367]	validation_0-auc:0.99504
[368]	validation_0-auc:0.99503
[369]	validation_0-auc:0.99504
[370]	validation_0-auc:0.99506
[371]	validation_0-auc:0.99510
[372]	validation_0-auc:0.99511
[373]	validation_0-auc:0.99513
[374]	validation_0-auc:0.99516
[375]	validation_0-auc:0.99517
[376]	validation_0-auc:0.99517
[377]	validation_0-auc:0.99517
[378]	validation_0-auc:0.99516
[379]	validation_0-auc:0.99516
[380]	validation_0-auc:0.99517
[381]	validation_0-auc:0.99516
[382]	validation_0-auc:0.99517
[383]	validation_0-auc:0.99516
[384]	validation_0-auc:0.99516
[385]	validation_0-auc:0.99516
[386]	validation_0-auc:0.99516
[387]	validation_0-auc:0.99516
[388]	validation_0-auc:0.99516
[389]	validation_0-auc:0.99516
[390]	va

[623]	validation_0-auc:0.99547
[624]	validation_0-auc:0.99546
[625]	validation_0-auc:0.99545
[626]	validation_0-auc:0.99546
[627]	validation_0-auc:0.99545
[628]	validation_0-auc:0.99546
[629]	validation_0-auc:0.99546
[630]	validation_0-auc:0.99546
[631]	validation_0-auc:0.99546
[632]	validation_0-auc:0.99546
[633]	validation_0-auc:0.99546
[634]	validation_0-auc:0.99546
[635]	validation_0-auc:0.99546
[636]	validation_0-auc:0.99546
[637]	validation_0-auc:0.99546
[638]	validation_0-auc:0.99546
[639]	validation_0-auc:0.99546
[640]	validation_0-auc:0.99546
[641]	validation_0-auc:0.99546
[642]	validation_0-auc:0.99546
[643]	validation_0-auc:0.99547
[644]	validation_0-auc:0.99548
[645]	validation_0-auc:0.99549
[646]	validation_0-auc:0.99549
[647]	validation_0-auc:0.99549
[648]	validation_0-auc:0.99548
[649]	validation_0-auc:0.99548
[650]	validation_0-auc:0.99547
[651]	validation_0-auc:0.99548
[652]	validation_0-auc:0.99548
[653]	validation_0-auc:0.99548
[654]	validation_0-auc:0.99547
[655]	va

[164]	validation_0-auc:0.99169
[165]	validation_0-auc:0.99180
[166]	validation_0-auc:0.99188
[167]	validation_0-auc:0.99189
[168]	validation_0-auc:0.99189
[169]	validation_0-auc:0.99189
[170]	validation_0-auc:0.99191
[171]	validation_0-auc:0.99198
[172]	validation_0-auc:0.99199
[173]	validation_0-auc:0.99208
[174]	validation_0-auc:0.99218
[175]	validation_0-auc:0.99232
[176]	validation_0-auc:0.99231
[177]	validation_0-auc:0.99232
[178]	validation_0-auc:0.99230
[179]	validation_0-auc:0.99232
[180]	validation_0-auc:0.99234
[181]	validation_0-auc:0.99236
[182]	validation_0-auc:0.99246
[183]	validation_0-auc:0.99250
[184]	validation_0-auc:0.99256
[185]	validation_0-auc:0.99264
[186]	validation_0-auc:0.99264
[187]	validation_0-auc:0.99270
[188]	validation_0-auc:0.99272
[189]	validation_0-auc:0.99284
[190]	validation_0-auc:0.99296
[191]	validation_0-auc:0.99296
[192]	validation_0-auc:0.99296
[193]	validation_0-auc:0.99299
[194]	validation_0-auc:0.99301
[195]	validation_0-auc:0.99301
[196]	va

[429]	validation_0-auc:0.99521
[430]	validation_0-auc:0.99521
[431]	validation_0-auc:0.99521
[432]	validation_0-auc:0.99521
[433]	validation_0-auc:0.99521
[434]	validation_0-auc:0.99520
[435]	validation_0-auc:0.99520
[436]	validation_0-auc:0.99521
[437]	validation_0-auc:0.99520
[438]	validation_0-auc:0.99521
[439]	validation_0-auc:0.99521
[440]	validation_0-auc:0.99521
[441]	validation_0-auc:0.99522
[442]	validation_0-auc:0.99522
[443]	validation_0-auc:0.99522
[444]	validation_0-auc:0.99520
[445]	validation_0-auc:0.99520
[446]	validation_0-auc:0.99520
[447]	validation_0-auc:0.99520
[448]	validation_0-auc:0.99520
[449]	validation_0-auc:0.99520
[450]	validation_0-auc:0.99520
[451]	validation_0-auc:0.99520
[452]	validation_0-auc:0.99521
[453]	validation_0-auc:0.99521
[454]	validation_0-auc:0.99521
[455]	validation_0-auc:0.99521
[456]	validation_0-auc:0.99521
[457]	validation_0-auc:0.99521
[458]	validation_0-auc:0.99522
[459]	validation_0-auc:0.99523
[460]	validation_0-auc:0.99524
[461]	va

[14]	validation_0-auc:0.95606
[15]	validation_0-auc:0.95705
[16]	validation_0-auc:0.96208
[17]	validation_0-auc:0.96283
[18]	validation_0-auc:0.96335
[19]	validation_0-auc:0.96494
[20]	validation_0-auc:0.96528
[21]	validation_0-auc:0.96700
[22]	validation_0-auc:0.96770
[23]	validation_0-auc:0.97034
[24]	validation_0-auc:0.97096
[25]	validation_0-auc:0.97230
[26]	validation_0-auc:0.97359
[27]	validation_0-auc:0.97389
[28]	validation_0-auc:0.97529
[29]	validation_0-auc:0.97590
[30]	validation_0-auc:0.97603
[31]	validation_0-auc:0.97695
[32]	validation_0-auc:0.97760
[33]	validation_0-auc:0.97772
[34]	validation_0-auc:0.97827
[35]	validation_0-auc:0.97875
[36]	validation_0-auc:0.97904
[37]	validation_0-auc:0.98007
[38]	validation_0-auc:0.98024
[39]	validation_0-auc:0.98031
[40]	validation_0-auc:0.98053
[41]	validation_0-auc:0.98106
[42]	validation_0-auc:0.98115
[43]	validation_0-auc:0.98128
[44]	validation_0-auc:0.98144
[45]	validation_0-auc:0.98164
[46]	validation_0-auc:0.98236
[47]	valid

[282]	validation_0-auc:0.99531
[283]	validation_0-auc:0.99531
[284]	validation_0-auc:0.99531
[285]	validation_0-auc:0.99531
[286]	validation_0-auc:0.99531
[287]	validation_0-auc:0.99531
[288]	validation_0-auc:0.99532
[289]	validation_0-auc:0.99532
[290]	validation_0-auc:0.99533
[291]	validation_0-auc:0.99533
[292]	validation_0-auc:0.99533
[293]	validation_0-auc:0.99533
[294]	validation_0-auc:0.99533
[295]	validation_0-auc:0.99533
[296]	validation_0-auc:0.99533
[297]	validation_0-auc:0.99533
[298]	validation_0-auc:0.99534
[299]	validation_0-auc:0.99534
[300]	validation_0-auc:0.99535
[301]	validation_0-auc:0.99536
[302]	validation_0-auc:0.99538
[303]	validation_0-auc:0.99539
[304]	validation_0-auc:0.99540
[305]	validation_0-auc:0.99543
[306]	validation_0-auc:0.99543
[307]	validation_0-auc:0.99545
[308]	validation_0-auc:0.99550
[309]	validation_0-auc:0.99553
[310]	validation_0-auc:0.99555
[311]	validation_0-auc:0.99555
[312]	validation_0-auc:0.99555
[313]	validation_0-auc:0.99558
[314]	va

[547]	validation_0-auc:0.99611
[548]	validation_0-auc:0.99611
[549]	validation_0-auc:0.99610
[550]	validation_0-auc:0.99610
[551]	validation_0-auc:0.99610
[552]	validation_0-auc:0.99611
[553]	validation_0-auc:0.99611
[554]	validation_0-auc:0.99610
[555]	validation_0-auc:0.99610
[556]	validation_0-auc:0.99610
[557]	validation_0-auc:0.99611
[558]	validation_0-auc:0.99611
[559]	validation_0-auc:0.99611
[560]	validation_0-auc:0.99611
[561]	validation_0-auc:0.99611
[562]	validation_0-auc:0.99611
[563]	validation_0-auc:0.99611
[564]	validation_0-auc:0.99611
[565]	validation_0-auc:0.99611
[566]	validation_0-auc:0.99611
[567]	validation_0-auc:0.99611
[568]	validation_0-auc:0.99611
[569]	validation_0-auc:0.99612
[570]	validation_0-auc:0.99612
[571]	validation_0-auc:0.99612
[572]	validation_0-auc:0.99611
[573]	validation_0-auc:0.99611
[574]	validation_0-auc:0.99611
[575]	validation_0-auc:0.99611
[576]	validation_0-auc:0.99611
[577]	validation_0-auc:0.99611
[578]	validation_0-auc:0.99612
[579]	va

[101]	validation_0-auc:0.99028
[102]	validation_0-auc:0.99035
[103]	validation_0-auc:0.99041
[104]	validation_0-auc:0.99043
[105]	validation_0-auc:0.99044
[106]	validation_0-auc:0.99047
[107]	validation_0-auc:0.99049
[108]	validation_0-auc:0.99059
[109]	validation_0-auc:0.99072
[110]	validation_0-auc:0.99087
[111]	validation_0-auc:0.99088
[112]	validation_0-auc:0.99089
[113]	validation_0-auc:0.99091
[114]	validation_0-auc:0.99091
[115]	validation_0-auc:0.99105
[116]	validation_0-auc:0.99119
[117]	validation_0-auc:0.99141
[118]	validation_0-auc:0.99151
[119]	validation_0-auc:0.99154
[120]	validation_0-auc:0.99154
[121]	validation_0-auc:0.99155
[122]	validation_0-auc:0.99162
[123]	validation_0-auc:0.99173
[124]	validation_0-auc:0.99173
[125]	validation_0-auc:0.99176
[126]	validation_0-auc:0.99181
[127]	validation_0-auc:0.99184
[128]	validation_0-auc:0.99193
[129]	validation_0-auc:0.99194
[130]	validation_0-auc:0.99195
[131]	validation_0-auc:0.99196
[132]	validation_0-auc:0.99200
[133]	va

[366]	validation_0-auc:0.99586
[367]	validation_0-auc:0.99587
[368]	validation_0-auc:0.99586
[369]	validation_0-auc:0.99587
[370]	validation_0-auc:0.99587
[371]	validation_0-auc:0.99587
[372]	validation_0-auc:0.99587
[373]	validation_0-auc:0.99587
[374]	validation_0-auc:0.99587
[375]	validation_0-auc:0.99586
[376]	validation_0-auc:0.99586
[377]	validation_0-auc:0.99586
[378]	validation_0-auc:0.99587
[379]	validation_0-auc:0.99587
[380]	validation_0-auc:0.99587
[381]	validation_0-auc:0.99587
[382]	validation_0-auc:0.99587
[383]	validation_0-auc:0.99587
[384]	validation_0-auc:0.99587
[385]	validation_0-auc:0.99587
[386]	validation_0-auc:0.99587
[387]	validation_0-auc:0.99587
[388]	validation_0-auc:0.99587
[389]	validation_0-auc:0.99587
[390]	validation_0-auc:0.99587
[391]	validation_0-auc:0.99587
[392]	validation_0-auc:0.99587
[393]	validation_0-auc:0.99587
[394]	validation_0-auc:0.99587
[395]	validation_0-auc:0.99587
[396]	validation_0-auc:0.99588
[397]	validation_0-auc:0.99588
[398]	va

[631]	validation_0-auc:0.99621
[632]	validation_0-auc:0.99621
[633]	validation_0-auc:0.99621
[634]	validation_0-auc:0.99621
[635]	validation_0-auc:0.99621
[636]	validation_0-auc:0.99621
[637]	validation_0-auc:0.99621
[638]	validation_0-auc:0.99621
[639]	validation_0-auc:0.99621
[640]	validation_0-auc:0.99621
[641]	validation_0-auc:0.99621
[642]	validation_0-auc:0.99621
[643]	validation_0-auc:0.99622
[644]	validation_0-auc:0.99622
[645]	validation_0-auc:0.99622
[646]	validation_0-auc:0.99622
[647]	validation_0-auc:0.99622
[648]	validation_0-auc:0.99622
[649]	validation_0-auc:0.99623
[650]	validation_0-auc:0.99623
[651]	validation_0-auc:0.99624
[652]	validation_0-auc:0.99624
[653]	validation_0-auc:0.99624
[654]	validation_0-auc:0.99624
[655]	validation_0-auc:0.99624
[656]	validation_0-auc:0.99624
[657]	validation_0-auc:0.99624
[658]	validation_0-auc:0.99624
[659]	validation_0-auc:0.99625
[660]	validation_0-auc:0.99625
[661]	validation_0-auc:0.99625
[662]	validation_0-auc:0.99625
[663]	va

[231]	validation_0-auc:0.99479
[232]	validation_0-auc:0.99483
[233]	validation_0-auc:0.99483
[234]	validation_0-auc:0.99483
[235]	validation_0-auc:0.99483
[236]	validation_0-auc:0.99484
[237]	validation_0-auc:0.99485
[238]	validation_0-auc:0.99486
[239]	validation_0-auc:0.99489
[240]	validation_0-auc:0.99490
[241]	validation_0-auc:0.99495
[242]	validation_0-auc:0.99498
[243]	validation_0-auc:0.99506
[244]	validation_0-auc:0.99510
[245]	validation_0-auc:0.99510
[246]	validation_0-auc:0.99511
[247]	validation_0-auc:0.99512
[248]	validation_0-auc:0.99512
[249]	validation_0-auc:0.99513
[250]	validation_0-auc:0.99513
[251]	validation_0-auc:0.99513
[252]	validation_0-auc:0.99513
[253]	validation_0-auc:0.99514
[254]	validation_0-auc:0.99513
[255]	validation_0-auc:0.99515
[256]	validation_0-auc:0.99513
[257]	validation_0-auc:0.99513
[258]	validation_0-auc:0.99514
[259]	validation_0-auc:0.99514
[260]	validation_0-auc:0.99514
[261]	validation_0-auc:0.99514
[262]	validation_0-auc:0.99514
[263]	va

[496]	validation_0-auc:0.99637
[497]	validation_0-auc:0.99638
[498]	validation_0-auc:0.99639
[499]	validation_0-auc:0.99640
[500]	validation_0-auc:0.99640
[501]	validation_0-auc:0.99640
[502]	validation_0-auc:0.99640
[503]	validation_0-auc:0.99640
[504]	validation_0-auc:0.99640
[505]	validation_0-auc:0.99640
[506]	validation_0-auc:0.99640
[507]	validation_0-auc:0.99640
[508]	validation_0-auc:0.99643
[509]	validation_0-auc:0.99643
[510]	validation_0-auc:0.99644
[511]	validation_0-auc:0.99643
[512]	validation_0-auc:0.99643
[513]	validation_0-auc:0.99642
[514]	validation_0-auc:0.99642
[515]	validation_0-auc:0.99643
[516]	validation_0-auc:0.99643
[517]	validation_0-auc:0.99642
[518]	validation_0-auc:0.99643
[519]	validation_0-auc:0.99643
[520]	validation_0-auc:0.99643
[521]	validation_0-auc:0.99642
[522]	validation_0-auc:0.99643
[523]	validation_0-auc:0.99641
[524]	validation_0-auc:0.99641
[525]	validation_0-auc:0.99641
[526]	validation_0-auc:0.99641
[527]	validation_0-auc:0.99640
[528]	va

[17]	validation_0-auc:0.96233
[18]	validation_0-auc:0.96305
[19]	validation_0-auc:0.96400
[20]	validation_0-auc:0.96442
[21]	validation_0-auc:0.96516
[22]	validation_0-auc:0.96840
[23]	validation_0-auc:0.96855
[24]	validation_0-auc:0.96996
[25]	validation_0-auc:0.97210
[26]	validation_0-auc:0.97259
[27]	validation_0-auc:0.97273
[28]	validation_0-auc:0.97323
[29]	validation_0-auc:0.97422
[30]	validation_0-auc:0.97539
[31]	validation_0-auc:0.97550
[32]	validation_0-auc:0.97642
[33]	validation_0-auc:0.97660
[34]	validation_0-auc:0.97715
[35]	validation_0-auc:0.97720
[36]	validation_0-auc:0.97788
[37]	validation_0-auc:0.97874
[38]	validation_0-auc:0.97922
[39]	validation_0-auc:0.97959
[40]	validation_0-auc:0.97966
[41]	validation_0-auc:0.97969
[42]	validation_0-auc:0.98040
[43]	validation_0-auc:0.98154
[44]	validation_0-auc:0.98160
[45]	validation_0-auc:0.98177
[46]	validation_0-auc:0.98181
[47]	validation_0-auc:0.98235
[48]	validation_0-auc:0.98241
[49]	validation_0-auc:0.98260
[50]	valid

[284]	validation_0-auc:0.99552
[285]	validation_0-auc:0.99553
[286]	validation_0-auc:0.99554
[287]	validation_0-auc:0.99554
[288]	validation_0-auc:0.99554
[289]	validation_0-auc:0.99554
[290]	validation_0-auc:0.99555
[291]	validation_0-auc:0.99555
[292]	validation_0-auc:0.99555
[293]	validation_0-auc:0.99555
[294]	validation_0-auc:0.99555
[295]	validation_0-auc:0.99555
[296]	validation_0-auc:0.99555
[297]	validation_0-auc:0.99556
[298]	validation_0-auc:0.99556
[299]	validation_0-auc:0.99556
[300]	validation_0-auc:0.99557
[301]	validation_0-auc:0.99560
[302]	validation_0-auc:0.99560
[303]	validation_0-auc:0.99559
[304]	validation_0-auc:0.99561
[305]	validation_0-auc:0.99561
[306]	validation_0-auc:0.99562
[307]	validation_0-auc:0.99563
[308]	validation_0-auc:0.99564
[309]	validation_0-auc:0.99564
[310]	validation_0-auc:0.99564
[311]	validation_0-auc:0.99564
[312]	validation_0-auc:0.99561
[313]	validation_0-auc:0.99563
[314]	validation_0-auc:0.99565
[315]	validation_0-auc:0.99564
[316]	va

[549]	validation_0-auc:0.99643
[550]	validation_0-auc:0.99643
[551]	validation_0-auc:0.99644
[552]	validation_0-auc:0.99644
[553]	validation_0-auc:0.99644
[554]	validation_0-auc:0.99644
[555]	validation_0-auc:0.99644
[556]	validation_0-auc:0.99644
[557]	validation_0-auc:0.99644
[558]	validation_0-auc:0.99644
[559]	validation_0-auc:0.99646
[560]	validation_0-auc:0.99647
[561]	validation_0-auc:0.99647
[562]	validation_0-auc:0.99647
[563]	validation_0-auc:0.99647
[564]	validation_0-auc:0.99648
[565]	validation_0-auc:0.99649
[566]	validation_0-auc:0.99650
[567]	validation_0-auc:0.99650
[568]	validation_0-auc:0.99649
[569]	validation_0-auc:0.99649
[570]	validation_0-auc:0.99650
[571]	validation_0-auc:0.99652
[572]	validation_0-auc:0.99653
[573]	validation_0-auc:0.99653
[574]	validation_0-auc:0.99654
[575]	validation_0-auc:0.99654
[576]	validation_0-auc:0.99654
[577]	validation_0-auc:0.99653
[578]	validation_0-auc:0.99653
[579]	validation_0-auc:0.99654
[580]	validation_0-auc:0.99654
[581]	va

[118]	validation_0-auc:0.99140
[119]	validation_0-auc:0.99141
[120]	validation_0-auc:0.99143
[121]	validation_0-auc:0.99144
[122]	validation_0-auc:0.99151
[123]	validation_0-auc:0.99151
[124]	validation_0-auc:0.99153
[125]	validation_0-auc:0.99165
[126]	validation_0-auc:0.99180
[127]	validation_0-auc:0.99191
[128]	validation_0-auc:0.99197
[129]	validation_0-auc:0.99198
[130]	validation_0-auc:0.99199
[131]	validation_0-auc:0.99200
[132]	validation_0-auc:0.99201
[133]	validation_0-auc:0.99204
[134]	validation_0-auc:0.99204
[135]	validation_0-auc:0.99207
[136]	validation_0-auc:0.99209
[137]	validation_0-auc:0.99214
[138]	validation_0-auc:0.99215
[139]	validation_0-auc:0.99215
[140]	validation_0-auc:0.99224
[141]	validation_0-auc:0.99224
[142]	validation_0-auc:0.99227
[143]	validation_0-auc:0.99228
[144]	validation_0-auc:0.99230
[145]	validation_0-auc:0.99247
[146]	validation_0-auc:0.99248
[147]	validation_0-auc:0.99263
[148]	validation_0-auc:0.99269
[149]	validation_0-auc:0.99270
[150]	va

[383]	validation_0-auc:0.99569
[384]	validation_0-auc:0.99569
[385]	validation_0-auc:0.99569
[386]	validation_0-auc:0.99569
[387]	validation_0-auc:0.99571
[388]	validation_0-auc:0.99571
[389]	validation_0-auc:0.99573
[390]	validation_0-auc:0.99573
[391]	validation_0-auc:0.99573
[392]	validation_0-auc:0.99574
[393]	validation_0-auc:0.99573
[394]	validation_0-auc:0.99574
[395]	validation_0-auc:0.99573
[396]	validation_0-auc:0.99574
[397]	validation_0-auc:0.99574
[398]	validation_0-auc:0.99574
[399]	validation_0-auc:0.99574
[400]	validation_0-auc:0.99574
[401]	validation_0-auc:0.99574
[402]	validation_0-auc:0.99574
[403]	validation_0-auc:0.99574
[404]	validation_0-auc:0.99574
[405]	validation_0-auc:0.99575
[406]	validation_0-auc:0.99576
[407]	validation_0-auc:0.99576
[408]	validation_0-auc:0.99575
[409]	validation_0-auc:0.99576
[410]	validation_0-auc:0.99576
[411]	validation_0-auc:0.99577
[412]	validation_0-auc:0.99577
[413]	validation_0-auc:0.99577
[414]	validation_0-auc:0.99577
[415]	va

[648]	validation_0-auc:0.99610
[649]	validation_0-auc:0.99610
[650]	validation_0-auc:0.99611
[651]	validation_0-auc:0.99611
[652]	validation_0-auc:0.99611
[653]	validation_0-auc:0.99611
[654]	validation_0-auc:0.99612
[655]	validation_0-auc:0.99612
[656]	validation_0-auc:0.99613
[657]	validation_0-auc:0.99613
[658]	validation_0-auc:0.99613
[659]	validation_0-auc:0.99613
[660]	validation_0-auc:0.99613
[661]	validation_0-auc:0.99613
[662]	validation_0-auc:0.99613
[663]	validation_0-auc:0.99613
[664]	validation_0-auc:0.99613
[665]	validation_0-auc:0.99613
[666]	validation_0-auc:0.99613
[667]	validation_0-auc:0.99614
[668]	validation_0-auc:0.99614
[669]	validation_0-auc:0.99614
[670]	validation_0-auc:0.99614
[671]	validation_0-auc:0.99614
[672]	validation_0-auc:0.99614
[673]	validation_0-auc:0.99614
[674]	validation_0-auc:0.99614
[675]	validation_0-auc:0.99614
[676]	validation_0-auc:0.99615
[677]	validation_0-auc:0.99615
[678]	validation_0-auc:0.99614
[679]	validation_0-auc:0.99614
[680]	va

[42]	validation_0-auc:0.98125
[43]	validation_0-auc:0.98141
[44]	validation_0-auc:0.98154
[45]	validation_0-auc:0.98179
[46]	validation_0-auc:0.98215
[47]	validation_0-auc:0.98222
[48]	validation_0-auc:0.98227
[49]	validation_0-auc:0.98269
[50]	validation_0-auc:0.98286
[51]	validation_0-auc:0.98291
[52]	validation_0-auc:0.98406
[53]	validation_0-auc:0.98411
[54]	validation_0-auc:0.98425
[55]	validation_0-auc:0.98479
[56]	validation_0-auc:0.98525
[57]	validation_0-auc:0.98534
[58]	validation_0-auc:0.98545
[59]	validation_0-auc:0.98551
[60]	validation_0-auc:0.98563
[61]	validation_0-auc:0.98571
[62]	validation_0-auc:0.98575
[63]	validation_0-auc:0.98581
[64]	validation_0-auc:0.98635
[65]	validation_0-auc:0.98639
[66]	validation_0-auc:0.98664
[67]	validation_0-auc:0.98683
[68]	validation_0-auc:0.98686
[69]	validation_0-auc:0.98703
[70]	validation_0-auc:0.98705
[71]	validation_0-auc:0.98729
[72]	validation_0-auc:0.98771
[73]	validation_0-auc:0.98797
[74]	validation_0-auc:0.98800
[75]	valid

[309]	validation_0-auc:0.99504
[310]	validation_0-auc:0.99504
[311]	validation_0-auc:0.99504
[312]	validation_0-auc:0.99505
[313]	validation_0-auc:0.99505
[314]	validation_0-auc:0.99507
[315]	validation_0-auc:0.99507
[316]	validation_0-auc:0.99507
[317]	validation_0-auc:0.99508
[318]	validation_0-auc:0.99508
[319]	validation_0-auc:0.99508
[320]	validation_0-auc:0.99514
[321]	validation_0-auc:0.99522
[322]	validation_0-auc:0.99527
[323]	validation_0-auc:0.99530
[324]	validation_0-auc:0.99535
[325]	validation_0-auc:0.99534
[326]	validation_0-auc:0.99534
[327]	validation_0-auc:0.99535
[328]	validation_0-auc:0.99537
[329]	validation_0-auc:0.99538
[330]	validation_0-auc:0.99538
[331]	validation_0-auc:0.99538
[332]	validation_0-auc:0.99538
[333]	validation_0-auc:0.99538
[334]	validation_0-auc:0.99543
[335]	validation_0-auc:0.99546
[336]	validation_0-auc:0.99550
[337]	validation_0-auc:0.99550
[338]	validation_0-auc:0.99553
[339]	validation_0-auc:0.99554
[340]	validation_0-auc:0.99554
[341]	va

[574]	validation_0-auc:0.99607
[575]	validation_0-auc:0.99607
[576]	validation_0-auc:0.99606
[577]	validation_0-auc:0.99606
[578]	validation_0-auc:0.99606
[579]	validation_0-auc:0.99606
[580]	validation_0-auc:0.99606
[581]	validation_0-auc:0.99607
[582]	validation_0-auc:0.99607
[583]	validation_0-auc:0.99606
[584]	validation_0-auc:0.99606
[585]	validation_0-auc:0.99606
[586]	validation_0-auc:0.99607
[587]	validation_0-auc:0.99607
[588]	validation_0-auc:0.99607
[589]	validation_0-auc:0.99607
[590]	validation_0-auc:0.99607
[591]	validation_0-auc:0.99608
[592]	validation_0-auc:0.99608
[593]	validation_0-auc:0.99608
[594]	validation_0-auc:0.99607
[595]	validation_0-auc:0.99607
[596]	validation_0-auc:0.99608
[597]	validation_0-auc:0.99608
[598]	validation_0-auc:0.99608
[599]	validation_0-auc:0.99608
[600]	validation_0-auc:0.99608
[601]	validation_0-auc:0.99608
[602]	validation_0-auc:0.99608
[603]	validation_0-auc:0.99608
[604]	validation_0-auc:0.99608
[605]	validation_0-auc:0.99608
[606]	va

[XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.3, max_delta_step=0,
               max_depth=6, min_child_weight=0.0, missing=nan,
               monotone_constraints='()', n_estimators=724, n_jobs=4,
               num_parallel_tree=1, predictor='auto', random_state=123,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', use_label_encoder=False,
               validate_parameters=1, verbosity=None),
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.3, max_delta_step=0,
               max_depth=6, 

In [14]:
clfs = _

In [15]:
clfs

[XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.3, max_delta_step=0,
               max_depth=6, min_child_weight=0.0, missing=nan,
               monotone_constraints='()', n_estimators=724, n_jobs=4,
               num_parallel_tree=1, predictor='auto', random_state=123,
               reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
               tree_method='exact', use_label_encoder=False,
               validate_parameters=1, verbosity=None),
 XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
               colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
               gamma=0, gpu_id=-1, importance_type=None,
               interaction_constraints='', learning_rate=0.3, max_delta_step=0,
               max_depth=6, 

In [18]:
X_valid.shape

(700000, 49)

In [20]:
pd_holdout = pd.DataFrame()
pd_holdout['target'] = y_holdout
for isplit in range(5):
    clf = clfs[isplit]
    clf.save_model(f'xgboost_model_{isplit}.jason')
    pd_holdout[f'{isplit}'] = clf.predict_proba(X_holdout)[:,1]

In [78]:
pd_holdout

Unnamed: 0,target,0,1,2,3,4
0,1,9.981301e-01,9.994736e-01,9.998108e-01,9.992404e-01,9.991647e-01
1,0,2.463605e-13,5.056467e-12,2.153519e-11,3.594727e-12,3.941856e-13
2,1,9.997821e-01,9.983550e-01,9.999689e-01,9.997231e-01,9.998863e-01
3,1,9.405545e-01,7.908471e-01,7.948405e-01,8.422685e-01,8.670253e-01
4,1,9.961311e-01,9.985809e-01,9.978264e-01,9.945808e-01,9.991590e-01
...,...,...,...,...,...,...
89995,0,2.091083e-02,1.243336e-01,5.927117e-02,3.214750e-02,7.120175e-02
89996,1,9.992112e-01,9.963323e-01,9.987859e-01,9.983611e-01,9.986625e-01
89997,0,1.163625e-05,7.199454e-07,6.471937e-06,5.103946e-06,1.222858e-06
89998,1,5.346077e-03,5.313155e-03,8.368831e-03,1.185667e-02,1.582110e-02


In [79]:
pd_holdout['xgboost_prob'] = pd_holdout[['0','1','2','3','4']].mean(axis=1)

In [80]:
pd_holdout

Unnamed: 0,target,0,1,2,3,4,xgboost_prob
0,1,9.981301e-01,9.994736e-01,9.998108e-01,9.992404e-01,9.991647e-01,9.991639e-01
1,0,2.463605e-13,5.056467e-12,2.153519e-11,3.594727e-12,3.941856e-13,6.165386e-12
2,1,9.997821e-01,9.983550e-01,9.999689e-01,9.997231e-01,9.998863e-01,9.995431e-01
3,1,9.405545e-01,7.908471e-01,7.948405e-01,8.422685e-01,8.670253e-01,8.471072e-01
4,1,9.961311e-01,9.985809e-01,9.978264e-01,9.945808e-01,9.991590e-01,9.972556e-01
...,...,...,...,...,...,...,...
89995,0,2.091083e-02,1.243336e-01,5.927117e-02,3.214750e-02,7.120175e-02,6.157296e-02
89996,1,9.992112e-01,9.963323e-01,9.987859e-01,9.983611e-01,9.986625e-01,9.982706e-01
89997,0,1.163625e-05,7.199454e-07,6.471937e-06,5.103946e-06,1.222858e-06,5.030987e-06
89998,1,5.346077e-03,5.313155e-03,8.368831e-03,1.185667e-02,1.582110e-02,9.341167e-03


In [81]:
pd_holdout.to_csv('xgboost_holdout.csv',index=False)

In [24]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

In [59]:
params = {'C':[1e+2]}
gclf = GridSearchCV(LogisticRegression(), params)
gclf.fit(pd_holdout.drop(columns=['target']).values, pd_holdout['target'].values)

GridSearchCV(estimator=LogisticRegression(), param_grid={'C': [100.0]})

In [60]:
gclf.best_params_

{'C': 100.0}

In [61]:
gclf.best_estimator_.coef_

array([[2.20486323, 2.04525043, 1.94556462, 1.67723271, 1.64406094]])

In [62]:
from sklearn.metrics import roc_auc_score

In [63]:
roc_auc_score(pd_holdout['target'].values, gclf.predict_proba(pd_holdout.drop(columns=['target']).values)[:,1])

0.996471229686484

In [74]:
pd_valid = pd.DataFrame()
for isplit in range(5):
    clf = clfs[isplit]
    pd_valid[f'{isplit}'] = clf.predict_proba(X_valid)[:,1]

In [75]:
pd_valid

Unnamed: 0,0,1,2,3,4
0,9.999982e-01,9.999952e-01,9.999976e-01,9.999998e-01,9.999982e-01
1,9.998547e-01,9.996252e-01,9.998382e-01,9.998249e-01,9.998053e-01
2,6.889388e-13,4.669355e-12,8.870184e-11,1.000568e-12,3.132794e-12
3,2.101903e-04,6.217847e-04,4.240502e-04,3.268347e-04,1.051252e-04
4,9.997417e-01,9.990722e-01,9.989582e-01,9.986223e-01,9.997450e-01
...,...,...,...,...,...
699995,7.955589e-01,6.853474e-01,5.630970e-01,6.886402e-01,4.319434e-01
699996,9.997144e-01,9.991259e-01,9.994416e-01,9.992867e-01,9.994414e-01
699997,1.375818e-01,3.400037e-01,1.655561e-02,1.130546e-02,1.321191e-01
699998,1.708009e-05,5.609909e-05,1.817060e-04,1.775464e-04,3.804470e-06


In [76]:
pd_valid['target'] = pd_valid[['0','1','2','3','4']].values.mean(axis=1)
pd_valid['id'] = valid_series['id']
pd_valid

Unnamed: 0,0,1,2,3,4,target,id
0,9.999982e-01,9.999952e-01,9.999976e-01,9.999998e-01,9.999982e-01,9.999978e-01,900000
1,9.998547e-01,9.996252e-01,9.998382e-01,9.998249e-01,9.998053e-01,9.997897e-01,900001
2,6.889388e-13,4.669355e-12,8.870184e-11,1.000568e-12,3.132794e-12,1.963870e-11,900002
3,2.101903e-04,6.217847e-04,4.240502e-04,3.268347e-04,1.051252e-04,3.375970e-04,900003
4,9.997417e-01,9.990722e-01,9.989582e-01,9.986223e-01,9.997450e-01,9.992279e-01,900004
...,...,...,...,...,...,...,...
699995,7.955589e-01,6.853474e-01,5.630970e-01,6.886402e-01,4.319434e-01,6.329173e-01,1599995
699996,9.997144e-01,9.991259e-01,9.994416e-01,9.992867e-01,9.994414e-01,9.994020e-01,1599996
699997,1.375818e-01,3.400037e-01,1.655561e-02,1.130546e-02,1.321191e-01,1.275131e-01,1599997
699998,1.708009e-05,5.609909e-05,1.817060e-04,1.775464e-04,3.804470e-06,8.724722e-05,1599998


In [77]:
pd_valid[['id','target']].to_csv('xgboost_submit_blend.csv',index=False)