In [31]:
from sklearn.datasets import make_classification
seed=342
X, y = make_classification(n_samples=1000, n_features=20, n_informative=8, n_redundant=3, n_repeated=2, random_state=seed)

In [32]:
from sklearn.cross_validation import StratifiedKFold
cv=StratifiedKFold(y,n_folds=10,shuffle=True,random_state=seed)

In [33]:
import numpy as np
params_grid={
    'n_estimators':[5,10,25,50],
    'max_depth':[1,2,3],
    'learning_rate':np.linspace(1e-16,1,3)
}

In [34]:
from xgboost.sklearn import XGBClassifier
params_fixed={
    'objective':'binary:logistic',
    'silent':1
}

In [35]:
from sklearn.grid_search import GridSearchCV,RandomizedSearchCV
bst_grid=GridSearchCV(XGBClassifier(**params_fixed,seed=seed),param_grid=params_grid,cv=cv,scoring='accuracy')

In [36]:
bst_grid.fit(X,y)

GridSearchCV(cv=sklearn.cross_validation.StratifiedKFold(labels=[1 0 ... 0 1], n_folds=10, shuffle=True, random_state=342),
       error_score='raise',
       estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=342, silent=1,
       subsample=1),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'n_estimators': [5, 10, 25, 50], 'max_depth': [1, 2, 3], 'learning_rate': array([1.e-16, 5.e-01, 1.e+00])},
       pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)

In [37]:
bst_grid.grid_scores_

[mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 1, 'n_estimators': 5},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 1, 'n_estimators': 10},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 1, 'n_estimators': 25},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 1, 'n_estimators': 50},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 2, 'n_estimators': 5},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 2, 'n_estimators': 10},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 2, 'n_estimators': 25},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 2, 'n_estimators': 50},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 3, 'n_estimators': 5},
 mean: 0.50400, std: 0.00200, params: {'learning_rate': 1e-16, 'max_depth': 3, 'n_estimators': 10},
 me

In [38]:
bst_grid.best_score_,bst_grid.best_params_

(0.872, {'learning_rate': 0.5, 'max_depth': 3, 'n_estimators': 50})

In [39]:
#using randomized search

from scipy.stats import randint, uniform
params_dist_grid={
    'max_depth':[1,2,3,4],
    'gamma':[0,0.5,1],
    'n_estimators':randint(1,1001),
    'learning_rate':uniform(),
    'subsample':uniform(),
    'colsample_bytree':uniform()
}

In [43]:
rs_grid=RandomizedSearchCV(XGBClassifier(**params_fixed,seed=seed),
                           param_distributions=params_dist_grid,random_state=seed,
                           n_iter=10,cv=cv,scoring='accuracy')

In [44]:
rs_grid.fit(X,y)

RandomizedSearchCV(cv=sklearn.cross_validation.StratifiedKFold(labels=[1 0 ... 0 1], n_folds=10, shuffle=True, random_state=342),
          error_score='raise',
          estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=342, silent=1,
       subsample=1),
          fit_params={}, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'max_depth': [1, 2, 3, 4], 'gamma': [0, 0.5, 1], 'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f499b4f2278>, 'learning_rate': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f499b4f2da0>, 'subsample': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f499b4f2860>, 'colsample_bytree': <scipy.stats._di

In [45]:
rs_grid.grid_scores_

[mean: 0.86500, std: 0.03379, params: {'colsample_bytree': 0.5278159882398795, 'gamma': 0, 'learning_rate': 0.26653471854090915, 'max_depth': 3, 'n_estimators': 634, 'subsample': 0.5217456290851137},
 mean: 0.80600, std: 0.02538, params: {'colsample_bytree': 0.6006045682293562, 'gamma': 1, 'learning_rate': 0.17437351476260976, 'max_depth': 1, 'n_estimators': 605, 'subsample': 0.1089928063068154},
 mean: 0.86600, std: 0.03216, params: {'colsample_bytree': 0.6525771618348499, 'gamma': 0, 'learning_rate': 0.14504787754035398, 'max_depth': 4, 'n_estimators': 606, 'subsample': 0.2568924710760617},
 mean: 0.80000, std: 0.03050, params: {'colsample_bytree': 0.925553938897666, 'gamma': 1, 'learning_rate': 0.5607144850720772, 'max_depth': 1, 'n_estimators': 135, 'subsample': 0.5310234799648094},
 mean: 0.79400, std: 0.03434, params: {'colsample_bytree': 0.4270399689639486, 'gamma': 1, 'learning_rate': 0.7844610802917251, 'max_depth': 2, 'n_estimators': 119, 'subsample': 0.2155284802082965},
 me

In [46]:
rs_grid.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.6525771618348499, gamma=0,
       learning_rate=0.14504787754035398, max_delta_step=0, max_depth=4,
       min_child_weight=1, missing=None, n_estimators=606, n_jobs=1,
       nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=342, silent=1,
       subsample=0.2568924710760617)

In [47]:
rs_grid.best_score_,rs_grid.best_params_

(0.866,
 {'colsample_bytree': 0.6525771618348499,
  'gamma': 0,
  'learning_rate': 0.14504787754035398,
  'max_depth': 4,
  'n_estimators': 606,
  'subsample': 0.2568924710760617})