In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from logging import getLogger, StreamHandler, DEBUG, Formatter, FileHandler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, ParameterGrid
from sklearn.metrics import log_loss, roc_auc_score, roc_curve, auc
import xgboost as xgb

In [2]:
TRAIN_DATA = 'D:/ml/kaggle/Porto Seguro’s Safe Driver Prediction/train.csv'
TEST_DATA = 'D:/ml/kaggle/Porto Seguro’s Safe Driver Prediction/test.csv'

In [3]:
logger = getLogger(__name__)

In [4]:
def read_csv(path):
    logger.debug('enter')
    df = pd.read_csv(path, engine='python')
    logger.debug('exit')
    return df

In [5]:
def load_train_data():
    logger.debug('enter')
    df = read_csv(TRAIN_DATA)
    logger.debug('exit')
    return df

In [6]:
def load_test_data():
    logger.debug('enter')
    df = read_csv(TEST_DATA)
    logger.debug('exit')
    return df

In [7]:
def gini(y, pred):
    fpr, tpr, thr = roc_curve(y, pred, pos_label=1)
    g = 2 * auc(fpr, tpr) - 1
    return g

In [8]:
def gini_xgb(pred, y):
    y = y.get_label()
    return 'gini', -gini(y, pred)

In [9]:
print(load_train_data().head())
print(load_test_data().head())

   id  target  ps_ind_01  ps_ind_02_cat  ps_ind_03  ps_ind_04_cat  \
0   7       0          2              2          5              1   
1   9       0          1              1          7              0   
2  13       0          5              4          9              1   
3  16       0          0              1          2              0   
4  17       0          0              2          0              1   

   ps_ind_05_cat  ps_ind_06_bin  ps_ind_07_bin  ps_ind_08_bin       ...        \
0              0              0              1              0       ...         
1              0              0              0              1       ...         
2              0              0              0              1       ...         
3              0              1              0              0       ...         
4              0              1              0              0       ...         

   ps_calc_11  ps_calc_12  ps_calc_13  ps_calc_14  ps_calc_15_bin  \
0           9           1    

In [10]:
DIR = 'result_tmp/'
SAMPLE_SUBMIT_FILE = 'D:/ml/kaggle/Porto Seguro’s Safe Driver Prediction/sample_submission.csv'

In [11]:
log_fmt = Formatter('%(asctime)s %(name)s %(lineno)d [%(levelname)s][%(funcName)s] %(message)s')
handler = StreamHandler()
handler.setLevel('INFO')
handler.setFormatter(log_fmt)
logger.addHandler(handler)

handler = FileHandler(DIR + 'train.ipynb.log', 'a')
handler.setLevel(DEBUG)
handler.setFormatter(log_fmt)
logger.setLevel(DEBUG)

In [51]:
logger.info('start')

df = load_train_data()

cols_binary = [
    #'ps_ind_06_bin', 'ps_ind_07_bin', 'ps_ind_08_bin', 'ps_ind_09_bin', 'ps_ind_11_bin', 
    #'ps_ind_12_bin', 'ps_ind_13_bin', 'ps_ind_16_bin', 'ps_ind_17_bin', 'ps_ind_18_bin',
    'ps_calc_15_bin', 'ps_calc_16_bin', 'ps_calc_17_bin', 'ps_calc_18_bin',
    'ps_calc_19_bin', 'ps_calc_20_bin',
    ]
cols_category = [
    'ps_ind_01', 'ps_ind_14', 'ps_ind_15', 'ps_reg_01', 'ps_reg_02', 'ps_car_15',
    'ps_ind_03', 'ps_car_11',
    'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
    'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 'ps_car_04_cat', 'ps_car_05_cat', 
    'ps_car_06_cat', 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 'ps_car_10_cat', 
    'ps_car_11_cat',
    ]
cols_numeric = [
    #'ps_ind_01', 'ps_ind_03', 'ps_ind_14', 'ps_ind_15',
    #'ps_reg_01', 'ps_reg_02', 'ps_reg_03',
    #'ps_car_11', 'ps_car_12', 'ps_car_13', 'ps_car_14', 'ps_car_15',
    'ps_calc_01', 'ps_calc_02', 'ps_calc_03', 'ps_calc_04', 'ps_calc_05', 'ps_calc_06', 
    'ps_calc_07', 'ps_calc_08', 'ps_calc_09', 'ps_calc_10', 'ps_calc_11', 'ps_calc_12', 
    'ps_calc_13', 'ps_calc_14',
    ]
cols_id = [
    'id',
]
cols_target = [
    'target'
]

drop_cols = cols_binary
drop_cols.extend(cols_numeric)
drop_cols.extend(cols_target)
drop_cols.extend(cols_id)
print(drop_cols)


x_train = df.drop(drop_cols, axis = 1)
y_train = df['target'].values

drop_cols.pop()
print(drop_cols)

use_cols = df.drop(drop_cols, axis = 1).columns.values
#use_cols = x_train.columns.values

logger.debug('train columns: {}'.format(use_cols.shape, use_cols))

logger.info('data preparation end {}'.format(x_train.shape))

2019-02-06 12:58:19,895 __main__ 1 [INFO][<module>] start


['ps_calc_15_bin', 'ps_calc_16_bin', 'ps_calc_17_bin', 'ps_calc_18_bin', 'ps_calc_19_bin', 'ps_calc_20_bin', 'ps_calc_01', 'ps_calc_02', 'ps_calc_03', 'ps_calc_04', 'ps_calc_05', 'ps_calc_06', 'ps_calc_07', 'ps_calc_08', 'ps_calc_09', 'ps_calc_10', 'ps_calc_11', 'ps_calc_12', 'ps_calc_13', 'ps_calc_14', 'target', 'id']
['ps_calc_15_bin', 'ps_calc_16_bin', 'ps_calc_17_bin', 'ps_calc_18_bin', 'ps_calc_19_bin', 'ps_calc_20_bin', 'ps_calc_01', 'ps_calc_02', 'ps_calc_03', 'ps_calc_04', 'ps_calc_05', 'ps_calc_06', 'ps_calc_07', 'ps_calc_08', 'ps_calc_09', 'ps_calc_10', 'ps_calc_11', 'ps_calc_12', 'ps_calc_13', 'ps_calc_14', 'target']


2019-02-06 12:58:45,400 __main__ 52 [INFO][<module>] data preparation end (595212, 37)


In [52]:
print(x_train.head())
print(y_train)

   ps_ind_01  ps_ind_02_cat  ps_ind_03  ps_ind_04_cat  ps_ind_05_cat  \
0          2              2          5              1              0   
1          1              1          7              0              0   
2          5              4          9              1              0   
3          0              1          2              0              0   
4          0              2          0              1              0   

   ps_ind_06_bin  ps_ind_07_bin  ps_ind_08_bin  ps_ind_09_bin  ps_ind_10_bin  \
0              0              1              0              0              0   
1              0              0              1              0              0   
2              0              0              1              0              0   
3              1              0              0              0              0   
4              1              0              0              0              0   

     ...      ps_car_07_cat  ps_car_08_cat  ps_car_09_cat  ps_car_10_cat  \
0    ...  

In [53]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

all_params = {'max_depth': [5],
              'learning_rate':[0.1],
              'min_child_weight':[5],
              'n_estimators':[1000],
              'colsample_bytree': [0.9],
              'colsample_bylevel':[0.9],
              'reg_alpha':[0.1],
              'max_delta_step':[0.1],
              'n_jobs':[-1],
              'seed':[0]}

min_score = 100
min_params = None

for params in tqdm(list(ParameterGrid(all_params))):
    logger.info('params: {}'.format(params))
    
    list_gini_score = []
    list_logloss_score = []
    list_best_iterations = []

    for train_idx, valid_idx in cv.split(x_train, y_train):
        trn_x = x_train.iloc[train_idx, :]
        val_x = x_train.iloc[valid_idx, :]
    
        trn_y = y_train[train_idx]
        val_y = y_train[valid_idx]
    
        clf = xgb.sklearn.XGBClassifier(**params)
        clf.fit(trn_x, trn_y, eval_set=[(val_x, val_y)], early_stopping_rounds=100, eval_metric=gini_xgb)
    
        pred = clf.predict_proba(val_x, ntree_limit=clf.best_iteration)[:, 1]
        sc_logloss = log_loss(val_y, pred)
        sc_gini = - gini(val_y, pred)
    
        list_logloss_score.append(sc_logloss)
        list_gini_score.append(sc_gini)
        list_best_iterations.append(clf.best_iteration)
        logger.debug('logloss: {}, gini: {}'.format(sc_logloss, sc_gini))
        #break
    params['n_estimators'] = int(np.mean(list_best_iterations))
    sc_logloss = np.mean(list_logloss_score)
    sc_gini = np.mean(list_gini_score)
    logger.info('logloss: {}, gini: {}'.format(sc_logloss, sc_gini))
    if min_score > sc_gini:
        min_score = sc_gini
        min_params = params
    logger.info('logloss: {}, gini: {}'.format(sc_logloss, sc_gini))
    logger.info('current min score: {}, params: {}'.format(min_score, min_params))
        
logger.info('minimum params: {}'.format(min_params))
logger.info('minimum gini: {}'.format(min_score))


clf = xgb.sklearn.XGBClassifier(**min_params)
clf.fit(x_train, y_train)

logger.info('train end')

  0%|                                                    | 0/1 [00:00<?, ?it/s]2019-02-06 12:58:45,426 __main__ 18 [INFO][<module>] params: {'colsample_bylevel': 0.9, 'n_estimators': 1000, 'max_delta_step': 0.1, 'colsample_bytree': 0.9, 'n_jobs': -1, 'seed': 0, 'learning_rate': 0.1, 'max_depth': 5, 'reg_alpha': 0.1, 'min_child_weight': 5}


[0]	validation_0-error:0.036449	validation_0-gini:-0
Multiple eval metrics have been passed: 'validation_0-gini' will be used for early stopping.

Will train until validation_0-gini hasn't improved in 100 rounds.
[1]	validation_0-error:0.036449	validation_0-gini:-0
[2]	validation_0-error:0.036449	validation_0-gini:-0
[3]	validation_0-error:0.036449	validation_0-gini:-0
[4]	validation_0-error:0.036449	validation_0-gini:-0
[5]	validation_0-error:0.036449	validation_0-gini:-0
[6]	validation_0-error:0.036449	validation_0-gini:-0
[7]	validation_0-error:0.036449	validation_0-gini:-0
[8]	validation_0-error:0.036449	validation_0-gini:-0
[9]	validation_0-error:0.036449	validation_0-gini:-0
[10]	validation_0-error:0.036449	validation_0-gini:-0
[11]	validation_0-error:0.036449	validation_0-gini:-0
[12]	validation_0-error:0.036449	validation_0-gini:-0
[13]	validation_0-error:0.036449	validation_0-gini:-0
[14]	validation_0-error:0.036449	validation_0-gini:-0
[15]	validation_0-error:0.036449	validat

[137]	validation_0-error:0.036449	validation_0-gini:-0.001774
[138]	validation_0-error:0.036449	validation_0-gini:-0.001774
[139]	validation_0-error:0.036449	validation_0-gini:-0.001774
[140]	validation_0-error:0.036449	validation_0-gini:-0.001774
[141]	validation_0-error:0.036449	validation_0-gini:-0.001774
[142]	validation_0-error:0.036457	validation_0-gini:-0.001774
[143]	validation_0-error:0.036449	validation_0-gini:-0.001774
[144]	validation_0-error:0.036457	validation_0-gini:-0.001757
[145]	validation_0-error:0.036457	validation_0-gini:-0.001757
[146]	validation_0-error:0.036457	validation_0-gini:-0.001757
[147]	validation_0-error:0.036457	validation_0-gini:-0.001757
[148]	validation_0-error:0.036457	validation_0-gini:-0.001757
[149]	validation_0-error:0.036457	validation_0-gini:-0.001757
[150]	validation_0-error:0.036457	validation_0-gini:-0.001757
[151]	validation_0-error:0.036457	validation_0-gini:-0.001757
[152]	validation_0-error:0.036457	validation_0-gini:-0.001757
[153]	va

[270]	validation_0-error:0.036466	validation_0-gini:-0.04642
[271]	validation_0-error:0.036466	validation_0-gini:-0.047407
[272]	validation_0-error:0.036466	validation_0-gini:-0.061512
[273]	validation_0-error:0.036466	validation_0-gini:-0.066224
[274]	validation_0-error:0.036466	validation_0-gini:-0.068883
[275]	validation_0-error:0.036466	validation_0-gini:-0.070427
[276]	validation_0-error:0.036466	validation_0-gini:-0.071578
[277]	validation_0-error:0.036466	validation_0-gini:-0.071951
[278]	validation_0-error:0.036466	validation_0-gini:-0.075425
[279]	validation_0-error:0.036466	validation_0-gini:-0.075138
[280]	validation_0-error:0.036466	validation_0-gini:-0.07517
[281]	validation_0-error:0.036466	validation_0-gini:-0.101898
[282]	validation_0-error:0.036466	validation_0-gini:-0.109191
[283]	validation_0-error:0.036466	validation_0-gini:-0.116223
[284]	validation_0-error:0.036466	validation_0-gini:-0.116025
[285]	validation_0-error:0.036466	validation_0-gini:-0.128042
[286]	vali

[403]	validation_0-error:0.036441	validation_0-gini:-0.278425
[404]	validation_0-error:0.036441	validation_0-gini:-0.27863
[405]	validation_0-error:0.036441	validation_0-gini:-0.278559
[406]	validation_0-error:0.036441	validation_0-gini:-0.278395
[407]	validation_0-error:0.036441	validation_0-gini:-0.278367
[408]	validation_0-error:0.036441	validation_0-gini:-0.278311
[409]	validation_0-error:0.036441	validation_0-gini:-0.2785
[410]	validation_0-error:0.036441	validation_0-gini:-0.278631
[411]	validation_0-error:0.036441	validation_0-gini:-0.278758
[412]	validation_0-error:0.036441	validation_0-gini:-0.2788
[413]	validation_0-error:0.036441	validation_0-gini:-0.278953
[414]	validation_0-error:0.036441	validation_0-gini:-0.278959
[415]	validation_0-error:0.036441	validation_0-gini:-0.279252
[416]	validation_0-error:0.036441	validation_0-gini:-0.279492
[417]	validation_0-error:0.036441	validation_0-gini:-0.279652
[418]	validation_0-error:0.036441	validation_0-gini:-0.279801
[419]	validat

[536]	validation_0-error:0.036432	validation_0-gini:-0.285818
[537]	validation_0-error:0.036432	validation_0-gini:-0.285784
[538]	validation_0-error:0.036432	validation_0-gini:-0.285791
[539]	validation_0-error:0.036432	validation_0-gini:-0.285804
[540]	validation_0-error:0.036432	validation_0-gini:-0.285883
[541]	validation_0-error:0.036432	validation_0-gini:-0.285888
[542]	validation_0-error:0.036432	validation_0-gini:-0.285865
[543]	validation_0-error:0.036432	validation_0-gini:-0.285863
[544]	validation_0-error:0.036432	validation_0-gini:-0.285865
[545]	validation_0-error:0.036432	validation_0-gini:-0.285839
[546]	validation_0-error:0.036432	validation_0-gini:-0.285858
[547]	validation_0-error:0.036432	validation_0-gini:-0.285859
[548]	validation_0-error:0.036432	validation_0-gini:-0.285832
[549]	validation_0-error:0.036432	validation_0-gini:-0.285853
[550]	validation_0-error:0.036432	validation_0-gini:-0.28587
[551]	validation_0-error:0.036432	validation_0-gini:-0.285914
[552]	val

[669]	validation_0-error:0.036432	validation_0-gini:-0.28697
[670]	validation_0-error:0.036432	validation_0-gini:-0.286962
[671]	validation_0-error:0.036432	validation_0-gini:-0.286949
[672]	validation_0-error:0.036432	validation_0-gini:-0.286894
[673]	validation_0-error:0.036432	validation_0-gini:-0.28691
[674]	validation_0-error:0.036432	validation_0-gini:-0.286906
[675]	validation_0-error:0.036432	validation_0-gini:-0.286903
[676]	validation_0-error:0.036432	validation_0-gini:-0.286908
[677]	validation_0-error:0.036432	validation_0-gini:-0.286882
[678]	validation_0-error:0.036432	validation_0-gini:-0.286874
[679]	validation_0-error:0.036432	validation_0-gini:-0.286853
[680]	validation_0-error:0.036432	validation_0-gini:-0.286853
[681]	validation_0-error:0.036432	validation_0-gini:-0.286853
[682]	validation_0-error:0.036432	validation_0-gini:-0.286836
[683]	validation_0-error:0.036432	validation_0-gini:-0.286822
[684]	validation_0-error:0.036432	validation_0-gini:-0.286816
[685]	vali

[89]	validation_0-error:0.036449	validation_0-gini:-0.001748
[90]	validation_0-error:0.036449	validation_0-gini:-0.001748
[91]	validation_0-error:0.036449	validation_0-gini:-0.001748
[92]	validation_0-error:0.036449	validation_0-gini:-0.001748
[93]	validation_0-error:0.036449	validation_0-gini:-0.001748
[94]	validation_0-error:0.036466	validation_0-gini:-0.001748
[95]	validation_0-error:0.036466	validation_0-gini:-0.001748
[96]	validation_0-error:0.036466	validation_0-gini:-0.001748
[97]	validation_0-error:0.036466	validation_0-gini:-0.001748
[98]	validation_0-error:0.036466	validation_0-gini:-0.001748
[99]	validation_0-error:0.036466	validation_0-gini:-0.001748
[100]	validation_0-error:0.036466	validation_0-gini:-0.001748
[101]	validation_0-error:0.036466	validation_0-gini:-0.001748
[102]	validation_0-error:0.036466	validation_0-gini:-0.001748
[103]	validation_0-error:0.036466	validation_0-gini:-0.001748
[104]	validation_0-error:0.036474	validation_0-gini:-0.001748
[105]	validation_0-

[222]	validation_0-error:0.036457	validation_0-gini:-0.003191
[223]	validation_0-error:0.036457	validation_0-gini:-0.003191
[224]	validation_0-error:0.036457	validation_0-gini:-0.003191
[225]	validation_0-error:0.036457	validation_0-gini:-0.003191
[226]	validation_0-error:0.036457	validation_0-gini:-0.003191
[227]	validation_0-error:0.036457	validation_0-gini:-0.003191
[228]	validation_0-error:0.036457	validation_0-gini:-0.003174
[229]	validation_0-error:0.036457	validation_0-gini:-0.003174
[230]	validation_0-error:0.036457	validation_0-gini:-0.003174
[231]	validation_0-error:0.036457	validation_0-gini:-0.003174
[232]	validation_0-error:0.036457	validation_0-gini:-0.003174
[233]	validation_0-error:0.036457	validation_0-gini:-0.003174
[234]	validation_0-error:0.036457	validation_0-gini:-0.003165
[235]	validation_0-error:0.036457	validation_0-gini:-0.003165
[236]	validation_0-error:0.036457	validation_0-gini:-0.003165
[237]	validation_0-error:0.036457	validation_0-gini:-0.003165
[238]	va

[355]	validation_0-error:0.036474	validation_0-gini:-0.262019
[356]	validation_0-error:0.036474	validation_0-gini:-0.263025
[357]	validation_0-error:0.036474	validation_0-gini:-0.262594
[358]	validation_0-error:0.036474	validation_0-gini:-0.263391
[359]	validation_0-error:0.036474	validation_0-gini:-0.26398
[360]	validation_0-error:0.036474	validation_0-gini:-0.264483
[361]	validation_0-error:0.036474	validation_0-gini:-0.265114
[362]	validation_0-error:0.036474	validation_0-gini:-0.265154
[363]	validation_0-error:0.036474	validation_0-gini:-0.265553
[364]	validation_0-error:0.036474	validation_0-gini:-0.265792
[365]	validation_0-error:0.036474	validation_0-gini:-0.266197
[366]	validation_0-error:0.036474	validation_0-gini:-0.266227
[367]	validation_0-error:0.036474	validation_0-gini:-0.266062
[368]	validation_0-error:0.036466	validation_0-gini:-0.266144
[369]	validation_0-error:0.036466	validation_0-gini:-0.266298
[370]	validation_0-error:0.036466	validation_0-gini:-0.267073
[371]	val

[488]	validation_0-error:0.036474	validation_0-gini:-0.27879
[489]	validation_0-error:0.036474	validation_0-gini:-0.278783
[490]	validation_0-error:0.036474	validation_0-gini:-0.278777
[491]	validation_0-error:0.036474	validation_0-gini:-0.278844
[492]	validation_0-error:0.036474	validation_0-gini:-0.279034
[493]	validation_0-error:0.036474	validation_0-gini:-0.279094
[494]	validation_0-error:0.036474	validation_0-gini:-0.279152
[495]	validation_0-error:0.036474	validation_0-gini:-0.279169
[496]	validation_0-error:0.036474	validation_0-gini:-0.279222
[497]	validation_0-error:0.036474	validation_0-gini:-0.279263
[498]	validation_0-error:0.036474	validation_0-gini:-0.279301
[499]	validation_0-error:0.036474	validation_0-gini:-0.27926
[500]	validation_0-error:0.036474	validation_0-gini:-0.279291
[501]	validation_0-error:0.036474	validation_0-gini:-0.279287
[502]	validation_0-error:0.036474	validation_0-gini:-0.279305
[503]	validation_0-error:0.036474	validation_0-gini:-0.27933
[504]	valid

[621]	validation_0-error:0.036474	validation_0-gini:-0.280689
[622]	validation_0-error:0.036474	validation_0-gini:-0.280699
[623]	validation_0-error:0.036474	validation_0-gini:-0.280731
[624]	validation_0-error:0.036474	validation_0-gini:-0.280728
[625]	validation_0-error:0.036474	validation_0-gini:-0.280726
[626]	validation_0-error:0.036474	validation_0-gini:-0.280717
[627]	validation_0-error:0.036474	validation_0-gini:-0.280667
[628]	validation_0-error:0.036474	validation_0-gini:-0.280654
[629]	validation_0-error:0.036474	validation_0-gini:-0.280746
[630]	validation_0-error:0.036474	validation_0-gini:-0.280999
[631]	validation_0-error:0.036474	validation_0-gini:-0.280895
[632]	validation_0-error:0.036474	validation_0-gini:-0.280933
[633]	validation_0-error:0.036483	validation_0-gini:-0.281045
[634]	validation_0-error:0.036474	validation_0-gini:-0.281093
[635]	validation_0-error:0.036474	validation_0-gini:-0.281005
[636]	validation_0-error:0.036474	validation_0-gini:-0.281002
[637]	va

[754]	validation_0-error:0.036483	validation_0-gini:-0.281167
[755]	validation_0-error:0.036483	validation_0-gini:-0.281147
[756]	validation_0-error:0.036483	validation_0-gini:-0.281175
[757]	validation_0-error:0.036483	validation_0-gini:-0.281196
[758]	validation_0-error:0.036483	validation_0-gini:-0.281152
[759]	validation_0-error:0.036483	validation_0-gini:-0.281152
[760]	validation_0-error:0.036483	validation_0-gini:-0.281107
[761]	validation_0-error:0.036483	validation_0-gini:-0.281101
[762]	validation_0-error:0.036483	validation_0-gini:-0.281075
[763]	validation_0-error:0.036483	validation_0-gini:-0.281082
[764]	validation_0-error:0.036483	validation_0-gini:-0.281066
[765]	validation_0-error:0.036483	validation_0-gini:-0.281064
[766]	validation_0-error:0.036483	validation_0-gini:-0.281061
[767]	validation_0-error:0.036483	validation_0-gini:-0.281049
[768]	validation_0-error:0.036483	validation_0-gini:-0.28103
[769]	validation_0-error:0.036483	validation_0-gini:-0.281029
[770]	val

[67]	validation_0-error:0.036441	validation_0-gini:-0.000817
[68]	validation_0-error:0.036441	validation_0-gini:-0.000817
[69]	validation_0-error:0.036441	validation_0-gini:-0.000817
[70]	validation_0-error:0.036441	validation_0-gini:-0.000817
[71]	validation_0-error:0.036441	validation_0-gini:-0.000817
[72]	validation_0-error:0.036441	validation_0-gini:-0.000817
[73]	validation_0-error:0.036441	validation_0-gini:-0.000817
[74]	validation_0-error:0.036441	validation_0-gini:-0.000817
[75]	validation_0-error:0.036441	validation_0-gini:-0.000817
[76]	validation_0-error:0.036441	validation_0-gini:-0.000817
[77]	validation_0-error:0.036441	validation_0-gini:-0.000817
[78]	validation_0-error:0.036441	validation_0-gini:-0.000817
[79]	validation_0-error:0.036441	validation_0-gini:-0.000817
[80]	validation_0-error:0.036441	validation_0-gini:-0.000817
[81]	validation_0-error:0.036441	validation_0-gini:-0.000817
[82]	validation_0-error:0.036441	validation_0-gini:-0.000817
[83]	validation_0-error:

[74]	validation_0-error:0.036449	validation_0-gini:-0.00063
[75]	validation_0-error:0.036449	validation_0-gini:-0.00063
[76]	validation_0-error:0.036449	validation_0-gini:-0.00063
[77]	validation_0-error:0.036449	validation_0-gini:-0.00063
[78]	validation_0-error:0.036449	validation_0-gini:-0.00063
[79]	validation_0-error:0.036449	validation_0-gini:-0.00063
[80]	validation_0-error:0.036458	validation_0-gini:-0.00063
[81]	validation_0-error:0.036458	validation_0-gini:-0.00063
[82]	validation_0-error:0.036458	validation_0-gini:-0.00063
[83]	validation_0-error:0.036458	validation_0-gini:-0.00063
[84]	validation_0-error:0.036458	validation_0-gini:-0.00063
[85]	validation_0-error:0.036466	validation_0-gini:-0.00063
[86]	validation_0-error:0.036466	validation_0-gini:-0.00063
[87]	validation_0-error:0.036466	validation_0-gini:-0.00063
[88]	validation_0-error:0.036466	validation_0-gini:-0.00063
[89]	validation_0-error:0.036466	validation_0-gini:-0.00063
[90]	validation_0-error:0.036466	validat

[79]	validation_0-error:0.036441	validation_0-gini:-0.001553
[80]	validation_0-error:0.036441	validation_0-gini:-0.001553
[81]	validation_0-error:0.036441	validation_0-gini:-0.001553
[82]	validation_0-error:0.036441	validation_0-gini:-0.001553
[83]	validation_0-error:0.036441	validation_0-gini:-0.001553
[84]	validation_0-error:0.036441	validation_0-gini:-0.001553
[85]	validation_0-error:0.036441	validation_0-gini:-0.001553
[86]	validation_0-error:0.036441	validation_0-gini:-0.001766
[87]	validation_0-error:0.036441	validation_0-gini:-0.001766
[88]	validation_0-error:0.036441	validation_0-gini:-0.001766
[89]	validation_0-error:0.036441	validation_0-gini:-0.001766
[90]	validation_0-error:0.036441	validation_0-gini:-0.001766
[91]	validation_0-error:0.036441	validation_0-gini:-0.001766
[92]	validation_0-error:0.036441	validation_0-gini:-0.001766
[93]	validation_0-error:0.036441	validation_0-gini:-0.001766
[94]	validation_0-error:0.036441	validation_0-gini:-0.001766
[95]	validation_0-error:

[213]	validation_0-error:0.03645	validation_0-gini:-0.002783
[214]	validation_0-error:0.03645	validation_0-gini:-0.002775
[215]	validation_0-error:0.03645	validation_0-gini:-0.002766
[216]	validation_0-error:0.03645	validation_0-gini:-0.002766
[217]	validation_0-error:0.03645	validation_0-gini:-0.002766
[218]	validation_0-error:0.03645	validation_0-gini:-0.002766
[219]	validation_0-error:0.03645	validation_0-gini:-0.002748
[220]	validation_0-error:0.03645	validation_0-gini:-0.002748
[221]	validation_0-error:0.03645	validation_0-gini:-0.002748
[222]	validation_0-error:0.03645	validation_0-gini:-0.002748
[223]	validation_0-error:0.03645	validation_0-gini:-0.002748
[224]	validation_0-error:0.03645	validation_0-gini:-0.002748
[225]	validation_0-error:0.03645	validation_0-gini:-0.002748
[226]	validation_0-error:0.03645	validation_0-gini:-0.002748
[227]	validation_0-error:0.03645	validation_0-gini:-0.002748
[228]	validation_0-error:0.03645	validation_0-gini:-0.002748
[229]	validation_0-error

[348]	validation_0-error:0.03645	validation_0-gini:-0.253828
[349]	validation_0-error:0.03645	validation_0-gini:-0.255207
[350]	validation_0-error:0.03645	validation_0-gini:-0.255717
[351]	validation_0-error:0.03645	validation_0-gini:-0.255803
[352]	validation_0-error:0.03645	validation_0-gini:-0.256244
[353]	validation_0-error:0.03645	validation_0-gini:-0.256777
[354]	validation_0-error:0.03645	validation_0-gini:-0.257665
[355]	validation_0-error:0.03645	validation_0-gini:-0.258084
[356]	validation_0-error:0.03645	validation_0-gini:-0.259075
[357]	validation_0-error:0.03645	validation_0-gini:-0.259915
[358]	validation_0-error:0.03645	validation_0-gini:-0.260471
[359]	validation_0-error:0.03645	validation_0-gini:-0.260885
[360]	validation_0-error:0.03645	validation_0-gini:-0.261176
[361]	validation_0-error:0.03645	validation_0-gini:-0.261533
[362]	validation_0-error:0.03645	validation_0-gini:-0.26173
[363]	validation_0-error:0.03645	validation_0-gini:-0.261776
[364]	validation_0-error:

[483]	validation_0-error:0.036433	validation_0-gini:-0.277613
[484]	validation_0-error:0.036441	validation_0-gini:-0.277698
[485]	validation_0-error:0.036433	validation_0-gini:-0.277606
[486]	validation_0-error:0.036433	validation_0-gini:-0.277525
[487]	validation_0-error:0.036433	validation_0-gini:-0.277404
[488]	validation_0-error:0.036433	validation_0-gini:-0.277576
[489]	validation_0-error:0.036433	validation_0-gini:-0.277559
[490]	validation_0-error:0.036433	validation_0-gini:-0.277579
[491]	validation_0-error:0.036433	validation_0-gini:-0.277545
[492]	validation_0-error:0.036433	validation_0-gini:-0.277643
[493]	validation_0-error:0.036433	validation_0-gini:-0.277563
[494]	validation_0-error:0.036433	validation_0-gini:-0.277557
[495]	validation_0-error:0.036441	validation_0-gini:-0.277763
[496]	validation_0-error:0.036441	validation_0-gini:-0.27775
[497]	validation_0-error:0.036441	validation_0-gini:-0.277746
[498]	validation_0-error:0.036441	validation_0-gini:-0.277733
[499]	val

[616]	validation_0-error:0.036433	validation_0-gini:-0.278053
[617]	validation_0-error:0.036433	validation_0-gini:-0.278045
[618]	validation_0-error:0.036433	validation_0-gini:-0.278044
[619]	validation_0-error:0.036433	validation_0-gini:-0.278059
[620]	validation_0-error:0.036433	validation_0-gini:-0.278082
[621]	validation_0-error:0.036433	validation_0-gini:-0.278077
[622]	validation_0-error:0.036433	validation_0-gini:-0.278061
[623]	validation_0-error:0.036433	validation_0-gini:-0.278024
[624]	validation_0-error:0.036433	validation_0-gini:-0.278025
[625]	validation_0-error:0.036433	validation_0-gini:-0.278047
[626]	validation_0-error:0.036433	validation_0-gini:-0.278031
[627]	validation_0-error:0.036433	validation_0-gini:-0.27809
[628]	validation_0-error:0.036433	validation_0-gini:-0.278066
[629]	validation_0-error:0.036433	validation_0-gini:-0.278072
[630]	validation_0-error:0.036433	validation_0-gini:-0.278079
[631]	validation_0-error:0.036433	validation_0-gini:-0.278038
[632]	val

2019-02-06 13:09:59,918 __main__ 46 [INFO][<module>] logloss: 0.3195515538980959, gini: -0.1695858791571368
2019-02-06 13:09:59,919 __main__ 50 [INFO][<module>] logloss: 0.3195515538980959, gini: -0.1695858791571368
2019-02-06 13:09:59,920 __main__ 51 [INFO][<module>] current min score: -0.1695858791571368, params: {'colsample_bylevel': 0.9, 'n_estimators': 392, 'max_delta_step': 0.1, 'colsample_bytree': 0.9, 'n_jobs': -1, 'seed': 0, 'learning_rate': 0.1, 'max_depth': 5, 'reg_alpha': 0.1, 'min_child_weight': 5}
100%|███████████████████████████████████████████| 1/1 [11:14<00:00, 674.49s/it]
2019-02-06 13:09:59,925 __main__ 53 [INFO][<module>] minimum params: {'colsample_bylevel': 0.9, 'n_estimators': 392, 'max_delta_step': 0.1, 'colsample_bytree': 0.9, 'n_jobs': -1, 'seed': 0, 'learning_rate': 0.1, 'max_depth': 5, 'reg_alpha': 0.1, 'min_child_weight': 5}
2019-02-06 13:09:59,926 __main__ 54 [INFO][<module>] minimum gini: -0.1695858791571368
2019-02-06 13:11:36,965 __main__ 60 [INFO][<mod

In [54]:
df = load_test_data()

In [55]:
x_test = df[use_cols].sort_values('id')

logger.info('test data load end {}'.format(x_test.shape))

2019-02-06 13:12:15,005 __main__ 3 [INFO][<module>] test data load end (892816, 38)


In [56]:
pred_test = clf.predict_proba(x_test)[:,1]

ValueError: feature_names mismatch: ['ps_ind_01', 'ps_ind_02_cat', 'ps_ind_03', 'ps_ind_04_cat', 'ps_ind_05_cat', 'ps_ind_06_bin', 'ps_ind_07_bin', 'ps_ind_08_bin', 'ps_ind_09_bin', 'ps_ind_10_bin', 'ps_ind_11_bin', 'ps_ind_12_bin', 'ps_ind_13_bin', 'ps_ind_14', 'ps_ind_15', 'ps_ind_16_bin', 'ps_ind_17_bin', 'ps_ind_18_bin', 'ps_reg_01', 'ps_reg_02', 'ps_reg_03', 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 'ps_car_10_cat', 'ps_car_11_cat', 'ps_car_11', 'ps_car_12', 'ps_car_13', 'ps_car_14', 'ps_car_15'] ['id', 'ps_ind_01', 'ps_ind_02_cat', 'ps_ind_03', 'ps_ind_04_cat', 'ps_ind_05_cat', 'ps_ind_06_bin', 'ps_ind_07_bin', 'ps_ind_08_bin', 'ps_ind_09_bin', 'ps_ind_10_bin', 'ps_ind_11_bin', 'ps_ind_12_bin', 'ps_ind_13_bin', 'ps_ind_14', 'ps_ind_15', 'ps_ind_16_bin', 'ps_ind_17_bin', 'ps_ind_18_bin', 'ps_reg_01', 'ps_reg_02', 'ps_reg_03', 'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat', 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat', 'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 'ps_car_10_cat', 'ps_car_11_cat', 'ps_car_11', 'ps_car_12', 'ps_car_13', 'ps_car_14', 'ps_car_15']
training data did not have the following fields: id

In [None]:
df_submit = read_csv(SAMPLE_SUBMIT_FILE).sort_values('id')
df_submit['target'] = pred_test
df_submit.to_csv(DIR + 'submit.csv', index=False)