In [1]:
import numpy as np
import util
import pickle
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import matplotlib.pyplot as plt
import scipy.stats



In [2]:
seed=229
state = np.random.RandomState(seed)

trainfile = 'data/train.csv'
header, ids, X, Y = util.fetch_data(trainfile)

test_size = 0.20
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=state)

# Training: Randomized Search
Use 10% of the data to conduct a search over XGBoost parameters, then test 

In [None]:
best_xgb = None

try:
    best_xgb = pickle.load(open('models/xgb_randomized.pickle'))
except IOError:
    pass 

In [None]:
m_search = X_train.shape[0]/10
X_paramsearch = X_train[:m_search,:]; Y_paramsearch = y_train[:m_search]

param_distros = {'learning_rate': scipy.stats.uniform(loc=0.1,scale=0.1),
                 'min_child_weight': scipy.stats.uniform(loc=0.1,scale=0.9),
                'max_depth': scipy.stats.binom(n=10, p=0.6), 
                 #'gamma': scipy.stats.uniform(loc=0, scale=1),
                 'subsample': scipy.stats.uniform(loc=0.5,scale=0.5),
                 'colsample_bytree': scipy.stats.uniform(loc=0.5, scale=0.5),
                }
xgb = XGBClassifier(silent=0)
cv_random = RandomizedSearchCV(xgb, n_iter=500, param_distributions=param_distros,
                               scoring=util.gini_proba_scorer, verbose=2, n_jobs=-1)
cv_random.fit(X_paramsearch, Y_paramsearch)

best_xgb_score = (-float('inf') if best_xgb == None 
                  else util.gini_proba_scorer(best_xgb, X_paramsearch, Y_paramsearch)) 
                  #.gini_normalized(Y_paramsearch, best_xgb.predict_proba(X_paramsearch)[1:]))

# Update best estimator
if cv_random.best_score_ > best_xgb_score: 
    best_xgb = cv_random.best_estimator_

xgb_out = open('models/xgb_randomized.pickle', 'wb')
pickle.dump(best_xgb, xgb_out)
xgb_out.close()

Fitting 3 folds for each of 500 candidates, totalling 1500 fits
[CV] subsample=0.889730870102, learning_rate=0.173855720703, colsample_bytree=0.54851249975, max_depth=5, min_child_weight=0.322788105024 
[CV] subsample=0.889730870102, learning_rate=0.173855720703, colsample_bytree=0.54851249975, max_depth=5, min_child_weight=0.322788105024 
[CV] subsample=0.889730870102, learning_rate=0.173855720703, colsample_bytree=0.54851249975, max_depth=5, min_child_weight=0.322788105024 
[CV] subsample=0.982999550166, learning_rate=0.137186577101, colsample_bytree=0.735231720695, max_depth=5, min_child_weight=0.418744731918 
[CV]  subsample=0.889730870102, learning_rate=0.173855720703, colsample_bytree=0.54851249975, max_depth=5, min_child_weight=0.322788105024, total=  12.5s
[CV] subsample=0.982999550166, learning_rate=0.137186577101, colsample_bytree=0.735231720695, max_depth=5, min_child_weight=0.418744731918 
[CV]  subsample=0.889730870102, learning_rate=0.173855720703, colsample_bytree=0.5485

[CV]  subsample=0.561122243029, learning_rate=0.143046835273, colsample_bytree=0.52923777809, max_depth=6, min_child_weight=0.973872991351, total=  11.3s
[CV] subsample=0.533261059725, learning_rate=0.17718667901, colsample_bytree=0.630179376688, max_depth=3, min_child_weight=0.875647373597 
[CV]  subsample=0.777015072759, learning_rate=0.191522506061, colsample_bytree=0.658817213399, max_depth=7, min_child_weight=0.724202968472, total=  14.5s
[CV] subsample=0.533261059725, learning_rate=0.17718667901, colsample_bytree=0.630179376688, max_depth=3, min_child_weight=0.875647373597 
[CV]  subsample=0.561122243029, learning_rate=0.143046835273, colsample_bytree=0.52923777809, max_depth=6, min_child_weight=0.973872991351, total=   9.7s
[CV] subsample=0.533261059725, learning_rate=0.17718667901, colsample_bytree=0.630179376688, max_depth=3, min_child_weight=0.875647373597 
[CV]  subsample=0.561122243029, learning_rate=0.143046835273, colsample_bytree=0.52923777809, max_depth=6, min_child_wei

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  2.3min


[CV] subsample=0.809532949307, learning_rate=0.13793884758, colsample_bytree=0.5909635491, max_depth=6, min_child_weight=0.828339309912 
[CV]  subsample=0.801945281925, learning_rate=0.154408375081, colsample_bytree=0.564876245659, max_depth=7, min_child_weight=0.317725550604, total=  13.3s
[CV] subsample=0.809532949307, learning_rate=0.13793884758, colsample_bytree=0.5909635491, max_depth=6, min_child_weight=0.828339309912 
[CV]  subsample=0.801945281925, learning_rate=0.154408375081, colsample_bytree=0.564876245659, max_depth=7, min_child_weight=0.317725550604, total=  12.7s
[CV] subsample=0.809532949307, learning_rate=0.13793884758, colsample_bytree=0.5909635491, max_depth=6, min_child_weight=0.828339309912 
[CV]  subsample=0.801945281925, learning_rate=0.154408375081, colsample_bytree=0.564876245659, max_depth=7, min_child_weight=0.317725550604, total=  12.9s
[CV] subsample=0.956398038061, learning_rate=0.160424324996, colsample_bytree=0.597792021882, max_depth=8, min_child_weight=

[CV] subsample=0.986374147801, learning_rate=0.138610669489, colsample_bytree=0.720476399962, max_depth=7, min_child_weight=0.596487831092 
[CV]  subsample=0.82926808507, learning_rate=0.116508635242, colsample_bytree=0.687990741379, max_depth=2, min_child_weight=0.916433283003, total=   4.6s
[CV] subsample=0.986374147801, learning_rate=0.138610669489, colsample_bytree=0.720476399962, max_depth=7, min_child_weight=0.596487831092 
[CV]  subsample=0.664026972365, learning_rate=0.195744262214, colsample_bytree=0.653144811908, max_depth=8, min_child_weight=0.563616119745, total=  17.7s
[CV] subsample=0.640801766241, learning_rate=0.140289752909, colsample_bytree=0.602312306636, max_depth=3, min_child_weight=0.982119376533 
[CV]  subsample=0.640801766241, learning_rate=0.140289752909, colsample_bytree=0.602312306636, max_depth=3, min_child_weight=0.982119376533, total=   5.9s
[CV] subsample=0.640801766241, learning_rate=0.140289752909, colsample_bytree=0.602312306636, max_depth=3, min_child

[CV] subsample=0.679845810832, learning_rate=0.154326019624, colsample_bytree=0.689102190004, max_depth=5, min_child_weight=0.982427942053 
[CV]  subsample=0.712528724392, learning_rate=0.185407640793, colsample_bytree=0.792774596976, max_depth=4, min_child_weight=0.867925888153, total=   9.2s
[CV] subsample=0.838780464982, learning_rate=0.113079881248, colsample_bytree=0.966509169639, max_depth=5, min_child_weight=0.790786805381 
[CV]  subsample=0.679845810832, learning_rate=0.154326019624, colsample_bytree=0.689102190004, max_depth=5, min_child_weight=0.982427942053, total=  10.0s
[CV] subsample=0.838780464982, learning_rate=0.113079881248, colsample_bytree=0.966509169639, max_depth=5, min_child_weight=0.790786805381 
[CV]  subsample=0.679845810832, learning_rate=0.154326019624, colsample_bytree=0.689102190004, max_depth=5, min_child_weight=0.982427942053, total=  11.4s
[CV] subsample=0.838780464982, learning_rate=0.113079881248, colsample_bytree=0.966509169639, max_depth=5, min_chil

[CV] subsample=0.837835048755, learning_rate=0.122572133757, colsample_bytree=0.705239868182, max_depth=5, min_child_weight=0.389013903525 
[CV]  subsample=0.674005481638, learning_rate=0.156419684677, colsample_bytree=0.886360558524, max_depth=8, min_child_weight=0.474253032078, total=  19.8s
[CV] subsample=0.837835048755, learning_rate=0.122572133757, colsample_bytree=0.705239868182, max_depth=5, min_child_weight=0.389013903525 
[CV]  subsample=0.642446584853, learning_rate=0.128790512765, colsample_bytree=0.951406375269, max_depth=3, min_child_weight=0.201064128751, total=   8.6s
[CV] subsample=0.837835048755, learning_rate=0.122572133757, colsample_bytree=0.705239868182, max_depth=5, min_child_weight=0.389013903525 
[CV]  subsample=0.642446584853, learning_rate=0.128790512765, colsample_bytree=0.951406375269, max_depth=3, min_child_weight=0.201064128751, total=   8.4s
[CV] subsample=0.921087991869, learning_rate=0.104619219356, colsample_bytree=0.953528944288, max_depth=5, min_chil

[CV] subsample=0.70346977932, learning_rate=0.113690295187, colsample_bytree=0.827861236154, max_depth=6, min_child_weight=0.889394827625 
[CV]  subsample=0.906118712523, learning_rate=0.163327978557, colsample_bytree=0.921813158736, max_depth=5, min_child_weight=0.682443203531, total=  12.5s
[CV]  subsample=0.906118712523, learning_rate=0.163327978557, colsample_bytree=0.921813158736, max_depth=5, min_child_weight=0.682443203531, total=  12.2s
[CV] subsample=0.70346977932, learning_rate=0.113690295187, colsample_bytree=0.827861236154, max_depth=6, min_child_weight=0.889394827625 
[CV] subsample=0.678596649316, learning_rate=0.155326601316, colsample_bytree=0.802146628293, max_depth=7, min_child_weight=0.609632923774 
[CV]  subsample=0.70346977932, learning_rate=0.113690295187, colsample_bytree=0.827861236154, max_depth=6, min_child_weight=0.889394827625, total=  13.8s
[CV] subsample=0.678596649316, learning_rate=0.155326601316, colsample_bytree=0.802146628293, max_depth=7, min_child_w

[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  9.3min


[CV]  subsample=0.678040090946, learning_rate=0.156102966497, colsample_bytree=0.874973031054, max_depth=5, min_child_weight=0.557258853067, total=  11.3s
[CV] subsample=0.922584509597, learning_rate=0.177201673878, colsample_bytree=0.645972842052, max_depth=7, min_child_weight=0.775338822964 
[CV]  subsample=0.678040090946, learning_rate=0.156102966497, colsample_bytree=0.874973031054, max_depth=5, min_child_weight=0.557258853067, total=  10.9s
[CV] subsample=0.617467392502, learning_rate=0.173732056798, colsample_bytree=0.531852864139, max_depth=4, min_child_weight=0.529951084511 
[CV]  subsample=0.922584509597, learning_rate=0.177201673878, colsample_bytree=0.645972842052, max_depth=7, min_child_weight=0.775338822964, total=  11.3s
[CV] subsample=0.617467392502, learning_rate=0.173732056798, colsample_bytree=0.531852864139, max_depth=4, min_child_weight=0.529951084511 
[CV]  subsample=0.922584509597, learning_rate=0.177201673878, colsample_bytree=0.645972842052, max_depth=7, min_chi

[CV]  subsample=0.602044804011, learning_rate=0.107704380385, colsample_bytree=0.741999662908, max_depth=5, min_child_weight=0.219388578863, total=  10.3s
[CV] subsample=0.606307956298, learning_rate=0.11963776228, colsample_bytree=0.532697652385, max_depth=7, min_child_weight=0.849208031747 
[CV]  subsample=0.711825992564, learning_rate=0.105949159125, colsample_bytree=0.815538386919, max_depth=8, min_child_weight=0.207072128068, total=  17.2s
[CV] subsample=0.606307956298, learning_rate=0.11963776228, colsample_bytree=0.532697652385, max_depth=7, min_child_weight=0.849208031747 
[CV]  subsample=0.606307956298, learning_rate=0.11963776228, colsample_bytree=0.532697652385, max_depth=7, min_child_weight=0.849208031747, total=  11.9s
[CV] subsample=0.606307956298, learning_rate=0.11963776228, colsample_bytree=0.532697652385, max_depth=7, min_child_weight=0.849208031747 
[CV]  subsample=0.711825992564, learning_rate=0.105949159125, colsample_bytree=0.815538386919, max_depth=8, min_child_w

[CV]  subsample=0.768361548557, learning_rate=0.152806784273, colsample_bytree=0.841886791805, max_depth=5, min_child_weight=0.741035956203, total=  14.7s
[CV] subsample=0.995298608702, learning_rate=0.116367200414, colsample_bytree=0.749497318359, max_depth=7, min_child_weight=0.337422094379 
[CV]  subsample=0.768361548557, learning_rate=0.152806784273, colsample_bytree=0.841886791805, max_depth=5, min_child_weight=0.741035956203, total=  12.4s
[CV] subsample=0.995298608702, learning_rate=0.116367200414, colsample_bytree=0.749497318359, max_depth=7, min_child_weight=0.337422094379 
[CV]  subsample=0.768361548557, learning_rate=0.152806784273, colsample_bytree=0.841886791805, max_depth=5, min_child_weight=0.741035956203, total=  12.4s
[CV] subsample=0.517963109656, learning_rate=0.193995399475, colsample_bytree=0.575911788142, max_depth=5, min_child_weight=0.137969394778 
[CV]  subsample=0.995298608702, learning_rate=0.116367200414, colsample_bytree=0.749497318359, max_depth=7, min_chi

[CV]  subsample=0.798018583074, learning_rate=0.103819956492, colsample_bytree=0.987697751988, max_depth=4, min_child_weight=0.962260757894, total=  10.1s
[CV] subsample=0.63778406659, learning_rate=0.140588318571, colsample_bytree=0.948018059626, max_depth=5, min_child_weight=0.843116187809 
[CV]  subsample=0.798018583074, learning_rate=0.103819956492, colsample_bytree=0.987697751988, max_depth=4, min_child_weight=0.962260757894, total=  10.1s
[CV] subsample=0.763840094645, learning_rate=0.104068665754, colsample_bytree=0.898572507966, max_depth=7, min_child_weight=0.166559775427 
[CV]  subsample=0.63778406659, learning_rate=0.140588318571, colsample_bytree=0.948018059626, max_depth=5, min_child_weight=0.843116187809, total=  12.5s
[CV] subsample=0.763840094645, learning_rate=0.104068665754, colsample_bytree=0.898572507966, max_depth=7, min_child_weight=0.166559775427 
[CV]  subsample=0.63778406659, learning_rate=0.140588318571, colsample_bytree=0.948018059626, max_depth=5, min_child_

[CV]  subsample=0.876253694029, learning_rate=0.127027579596, colsample_bytree=0.604495919764, max_depth=9, min_child_weight=0.414208439034, total=  16.4s
[CV] subsample=0.648635095042, learning_rate=0.141683030923, colsample_bytree=0.960316491539, max_depth=7, min_child_weight=0.334068812783 
[CV]  subsample=0.876253694029, learning_rate=0.127027579596, colsample_bytree=0.604495919764, max_depth=9, min_child_weight=0.414208439034, total=  15.5s
[CV] subsample=0.648635095042, learning_rate=0.141683030923, colsample_bytree=0.960316491539, max_depth=7, min_child_weight=0.334068812783 
[CV]  subsample=0.791298734167, learning_rate=0.168112480913, colsample_bytree=0.594965871172, max_depth=6, min_child_weight=0.494217232405, total=   9.8s
[CV] subsample=0.648635095042, learning_rate=0.141683030923, colsample_bytree=0.960316491539, max_depth=7, min_child_weight=0.334068812783 
[CV]  subsample=0.791298734167, learning_rate=0.168112480913, colsample_bytree=0.594965871172, max_depth=6, min_chi

[CV]  subsample=0.649829857562, learning_rate=0.191568213943, colsample_bytree=0.571730999956, max_depth=6, min_child_weight=0.130597182361, total=  12.1s
[CV]  subsample=0.649829857562, learning_rate=0.191568213943, colsample_bytree=0.571730999956, max_depth=6, min_child_weight=0.130597182361, total=  12.6s
[CV] subsample=0.800889046667, learning_rate=0.186441514166, colsample_bytree=0.77031786597, max_depth=8, min_child_weight=0.818739958044 
[CV] subsample=0.800889046667, learning_rate=0.186441514166, colsample_bytree=0.77031786597, max_depth=8, min_child_weight=0.818739958044 
[CV]  subsample=0.649829857562, learning_rate=0.191568213943, colsample_bytree=0.571730999956, max_depth=6, min_child_weight=0.130597182361, total=  11.7s
[CV] subsample=0.566969602169, learning_rate=0.153950830152, colsample_bytree=0.975216406768, max_depth=5, min_child_weight=0.525275938391 
[CV]  subsample=0.800889046667, learning_rate=0.186441514166, colsample_bytree=0.77031786597, max_depth=8, min_child_

[CV] subsample=0.530897638182, learning_rate=0.102021462609, colsample_bytree=0.663650881449, max_depth=8, min_child_weight=0.488842139566 
[CV] subsample=0.530897638182, learning_rate=0.102021462609, colsample_bytree=0.663650881449, max_depth=8, min_child_weight=0.488842139566 
[CV]  subsample=0.915011101887, learning_rate=0.194969997064, colsample_bytree=0.762102957558, max_depth=5, min_child_weight=0.230349811255, total=  10.9s
[CV] subsample=0.544063532728, learning_rate=0.1597919708, colsample_bytree=0.997065011942, max_depth=8, min_child_weight=0.618582013956 
[CV]  subsample=0.530897638182, learning_rate=0.102021462609, colsample_bytree=0.663650881449, max_depth=8, min_child_weight=0.488842139566, total=  16.8s
[CV] subsample=0.544063532728, learning_rate=0.1597919708, colsample_bytree=0.997065011942, max_depth=8, min_child_weight=0.618582013956 
[CV]  subsample=0.530897638182, learning_rate=0.102021462609, colsample_bytree=0.663650881449, max_depth=8, min_child_weight=0.4888421

[CV]  subsample=0.808672648737, learning_rate=0.149484666742, colsample_bytree=0.940867482694, max_depth=5, min_child_weight=0.823136729877, total=  11.5s
[CV] subsample=0.727932770357, learning_rate=0.159427177684, colsample_bytree=0.96582788393, max_depth=6, min_child_weight=0.516087591178 
[CV]  subsample=0.862192302078, learning_rate=0.155181424232, colsample_bytree=0.690329111641, max_depth=5, min_child_weight=0.205555042061, total=   9.2s
[CV] subsample=0.727932770357, learning_rate=0.159427177684, colsample_bytree=0.96582788393, max_depth=6, min_child_weight=0.516087591178 
[CV]  subsample=0.862192302078, learning_rate=0.155181424232, colsample_bytree=0.690329111641, max_depth=5, min_child_weight=0.205555042061, total=   9.1s
[CV] subsample=0.727932770357, learning_rate=0.159427177684, colsample_bytree=0.96582788393, max_depth=6, min_child_weight=0.516087591178 
[CV]  subsample=0.862192302078, learning_rate=0.155181424232, colsample_bytree=0.690329111641, max_depth=5, min_child_

[Parallel(n_jobs=-1)]: Done 357 tasks      | elapsed: 20.7min


[CV]  subsample=0.849741898975, learning_rate=0.109620482227, colsample_bytree=0.733879594676, max_depth=8, min_child_weight=0.547159953893, total=  15.5s
[CV] subsample=0.789252763784, learning_rate=0.100909205248, colsample_bytree=0.590200078894, max_depth=5, min_child_weight=0.236566982286 
[CV]  subsample=0.849741898975, learning_rate=0.109620482227, colsample_bytree=0.733879594676, max_depth=8, min_child_weight=0.547159953893, total=  15.7s
[CV] subsample=0.789252763784, learning_rate=0.100909205248, colsample_bytree=0.590200078894, max_depth=5, min_child_weight=0.236566982286 
[CV]  subsample=0.789252763784, learning_rate=0.100909205248, colsample_bytree=0.590200078894, max_depth=5, min_child_weight=0.236566982286, total=   8.9s
[CV] subsample=0.798160233375, learning_rate=0.129670895578, colsample_bytree=0.8349888242, max_depth=6, min_child_weight=0.129904704165 
[CV]  subsample=0.789252763784, learning_rate=0.100909205248, colsample_bytree=0.590200078894, max_depth=5, min_child

[CV]  subsample=0.501991923689, learning_rate=0.197878745452, colsample_bytree=0.653895832794, max_depth=6, min_child_weight=0.749470917091, total=  15.1s
[CV] subsample=0.501991923689, learning_rate=0.197878745452, colsample_bytree=0.653895832794, max_depth=6, min_child_weight=0.749470917091 
[CV]  subsample=0.501991923689, learning_rate=0.197878745452, colsample_bytree=0.653895832794, max_depth=6, min_child_weight=0.749470917091, total=  14.5s
[CV]  subsample=0.607798896201, learning_rate=0.144387687179, colsample_bytree=0.793280358765, max_depth=7, min_child_weight=0.262527503336, total=  20.6s
[CV] subsample=0.765972705866, learning_rate=0.149408896004, colsample_bytree=0.528355231956, max_depth=7, min_child_weight=0.951260510166 
[CV] subsample=0.765972705866, learning_rate=0.149408896004, colsample_bytree=0.528355231956, max_depth=7, min_child_weight=0.951260510166 
[CV]  subsample=0.607798896201, learning_rate=0.144387687179, colsample_bytree=0.793280358765, max_depth=7, min_chi

[CV]  subsample=0.682912707698, learning_rate=0.155585186279, colsample_bytree=0.662900650837, max_depth=5, min_child_weight=0.880268801537, total=  21.6s
[CV] subsample=0.502417009194, learning_rate=0.198983987856, colsample_bytree=0.988129867003, max_depth=8, min_child_weight=0.609479074056 
[CV]  subsample=0.751859197545, learning_rate=0.176559494439, colsample_bytree=0.725619714622, max_depth=8, min_child_weight=0.817087911835, total=  23.7s
[CV] subsample=0.502417009194, learning_rate=0.198983987856, colsample_bytree=0.988129867003, max_depth=8, min_child_weight=0.609479074056 
[CV]  subsample=0.751859197545, learning_rate=0.176559494439, colsample_bytree=0.725619714622, max_depth=8, min_child_weight=0.817087911835, total=  22.4s
[CV] subsample=0.502417009194, learning_rate=0.198983987856, colsample_bytree=0.988129867003, max_depth=8, min_child_weight=0.609479074056 
[CV]  subsample=0.751859197545, learning_rate=0.176559494439, colsample_bytree=0.725619714622, max_depth=8, min_chi

[CV]  subsample=0.718586739072, learning_rate=0.11402720954, colsample_bytree=0.687470265522, max_depth=8, min_child_weight=0.457225928845, total=  18.0s
[CV] subsample=0.531292684401, learning_rate=0.188725129989, colsample_bytree=0.823628098061, max_depth=7, min_child_weight=0.769505536297 
[CV]  subsample=0.718586739072, learning_rate=0.11402720954, colsample_bytree=0.687470265522, max_depth=8, min_child_weight=0.457225928845, total=  19.0s
[CV] subsample=0.531292684401, learning_rate=0.188725129989, colsample_bytree=0.823628098061, max_depth=7, min_child_weight=0.769505536297 
[CV]  subsample=0.718586739072, learning_rate=0.11402720954, colsample_bytree=0.687470265522, max_depth=8, min_child_weight=0.457225928845, total=  27.3s
[CV] subsample=0.784822013761, learning_rate=0.146748678074, colsample_bytree=0.992192238797, max_depth=6, min_child_weight=0.776215927784 
[CV]  subsample=0.531292684401, learning_rate=0.188725129989, colsample_bytree=0.823628098061, max_depth=7, min_child_

[CV]  subsample=0.660609816328, learning_rate=0.181176125435, colsample_bytree=0.906715687067, max_depth=8, min_child_weight=0.976469389057, total=  23.8s
[CV] subsample=0.899096358975, learning_rate=0.126292296427, colsample_bytree=0.60174898544, max_depth=4, min_child_weight=0.373567219086 
[CV]  subsample=0.899096358975, learning_rate=0.126292296427, colsample_bytree=0.60174898544, max_depth=4, min_child_weight=0.373567219086, total=   9.1s
[CV] subsample=0.901427379761, learning_rate=0.114329431061, colsample_bytree=0.577928517896, max_depth=7, min_child_weight=0.694035282314 
[CV]  subsample=0.991676342556, learning_rate=0.198478048285, colsample_bytree=0.955815712721, max_depth=5, min_child_weight=0.170739590005, total=  14.6s
[CV] subsample=0.901427379761, learning_rate=0.114329431061, colsample_bytree=0.577928517896, max_depth=7, min_child_weight=0.694035282314 
[CV]  subsample=0.899096358975, learning_rate=0.126292296427, colsample_bytree=0.60174898544, max_depth=4, min_child_

[CV]  subsample=0.81023706927, learning_rate=0.116907113645, colsample_bytree=0.65969010322, max_depth=8, min_child_weight=0.254642537521, total=  16.1s
[CV] subsample=0.567608148907, learning_rate=0.161656179318, colsample_bytree=0.546000192267, max_depth=6, min_child_weight=0.408174444236 
[CV]  subsample=0.81023706927, learning_rate=0.116907113645, colsample_bytree=0.65969010322, max_depth=8, min_child_weight=0.254642537521, total=  16.0s
[CV] subsample=0.567608148907, learning_rate=0.161656179318, colsample_bytree=0.546000192267, max_depth=6, min_child_weight=0.408174444236 
[CV]  subsample=0.644647192413, learning_rate=0.188123212323, colsample_bytree=0.989804219865, max_depth=4, min_child_weight=0.978437028167, total=  12.8s
[CV] subsample=0.567608148907, learning_rate=0.161656179318, colsample_bytree=0.546000192267, max_depth=6, min_child_weight=0.408174444236 
[CV]  subsample=0.644647192413, learning_rate=0.188123212323, colsample_bytree=0.989804219865, max_depth=4, min_child_w

[CV]  subsample=0.555968178778, learning_rate=0.188046486492, colsample_bytree=0.791268078253, max_depth=7, min_child_weight=0.44482732439, total=  16.6s
[CV] subsample=0.551003795509, learning_rate=0.125676129392, colsample_bytree=0.846606584217, max_depth=8, min_child_weight=0.379508420307 
[CV]  subsample=0.555968178778, learning_rate=0.188046486492, colsample_bytree=0.791268078253, max_depth=7, min_child_weight=0.44482732439, total=  16.8s
[CV] subsample=0.551003795509, learning_rate=0.125676129392, colsample_bytree=0.846606584217, max_depth=8, min_child_weight=0.379508420307 
[CV]  subsample=0.555968178778, learning_rate=0.188046486492, colsample_bytree=0.791268078253, max_depth=7, min_child_weight=0.44482732439, total=  16.5s
[CV] subsample=0.918062111533, learning_rate=0.139454098969, colsample_bytree=0.599245323161, max_depth=5, min_child_weight=0.471394099356 
[CV]  subsample=0.551003795509, learning_rate=0.125676129392, colsample_bytree=0.846606584217, max_depth=8, min_child_

[CV]  subsample=0.70815909798, learning_rate=0.102447797496, colsample_bytree=0.807132291559, max_depth=5, min_child_weight=0.427720354343, total=  11.2s
[CV] subsample=0.580053930356, learning_rate=0.140350199917, colsample_bytree=0.73808574558, max_depth=5, min_child_weight=0.855479765469 
[CV]  subsample=0.580053930356, learning_rate=0.140350199917, colsample_bytree=0.73808574558, max_depth=5, min_child_weight=0.855479765469, total=  10.7s
[CV] subsample=0.987177441134, learning_rate=0.135212527568, colsample_bytree=0.927540374023, max_depth=7, min_child_weight=0.552248768709 
[CV]  subsample=0.580053930356, learning_rate=0.140350199917, colsample_bytree=0.73808574558, max_depth=5, min_child_weight=0.855479765469, total=  10.8s
[CV] subsample=0.987177441134, learning_rate=0.135212527568, colsample_bytree=0.927540374023, max_depth=7, min_child_weight=0.552248768709 
[CV]  subsample=0.70815909798, learning_rate=0.102447797496, colsample_bytree=0.807132291559, max_depth=5, min_child_we

[CV]  subsample=0.791150232264, learning_rate=0.119514706051, colsample_bytree=0.74308763289, max_depth=8, min_child_weight=0.252467311841, total=  16.5s
[CV] subsample=0.722005294341, learning_rate=0.135627359353, colsample_bytree=0.796547411362, max_depth=4, min_child_weight=0.453575572201 
[CV]  subsample=0.791150232264, learning_rate=0.119514706051, colsample_bytree=0.74308763289, max_depth=8, min_child_weight=0.252467311841, total=  16.6s
[CV] subsample=0.722005294341, learning_rate=0.135627359353, colsample_bytree=0.796547411362, max_depth=4, min_child_weight=0.453575572201 
[CV]  subsample=0.579300659597, learning_rate=0.145798323679, colsample_bytree=0.800743695275, max_depth=5, min_child_weight=0.455373844167, total=  11.4s
[CV] subsample=0.722005294341, learning_rate=0.135627359353, colsample_bytree=0.796547411362, max_depth=4, min_child_weight=0.453575572201 
[CV]  subsample=0.579300659597, learning_rate=0.145798323679, colsample_bytree=0.800743695275, max_depth=5, min_child

[CV]  subsample=0.661522340424, learning_rate=0.154572039841, colsample_bytree=0.980279322504, max_depth=5, min_child_weight=0.144872897776, total=  14.5s
[CV] subsample=0.959273123296, learning_rate=0.123090587389, colsample_bytree=0.678380020004, max_depth=7, min_child_weight=0.445245952792 
[CV]  subsample=0.661522340424, learning_rate=0.154572039841, colsample_bytree=0.980279322504, max_depth=5, min_child_weight=0.144872897776, total=  14.8s
[CV] subsample=0.959273123296, learning_rate=0.123090587389, colsample_bytree=0.678380020004, max_depth=7, min_child_weight=0.445245952792 
[CV]  subsample=0.959273123296, learning_rate=0.123090587389, colsample_bytree=0.678380020004, max_depth=7, min_child_weight=0.445245952792, total=  13.8s
[CV] subsample=0.853223797753, learning_rate=0.183622275118, colsample_bytree=0.825572295947, max_depth=7, min_child_weight=0.684183674004 
[CV]  subsample=0.661522340424, learning_rate=0.154572039841, colsample_bytree=0.980279322504, max_depth=5, min_chi

In [None]:
trainsizes, traingini, testgini = util.learning_curves(best_xgb, X_train, X_test, y_train, y_test)

In [None]:
print(traingini)
print(testgini)

In [None]:
plt.figure()
plt.plot(trainsizes, traingini, label='train gini')
plt.plot(trainsizes, testgini, label='test gini')
plt.xlabel('Training set size')
plt.ylabel('Normalized Gini coefficient')
plt.title('XGBoost')
plt.legend()
plt.savefig('figures/learning_curves/xgboost_randomized.eps')

In [None]:
plt.show()

In [None]:
xgb_model

In [None]:
xgb_model.predict_proba(X_train)