In [23]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import make_scorer
from evaluate import get_score
from xgboost import XGBClassifier as XGB
from lightgbm import LGBMClassifier as LGBM
from pprint import pprint

In [24]:
train = pd.read_csv('data/clean_train.csv')
train_x = train.drop(columns = ['AdoptionSpeed'])
train_y = train['AdoptionSpeed']

In [46]:
XGB_GRID = {
    'max_depth': [6, 7],
    'n_jobs': [-1],
    'n_estimators': [200],
    'reg_alpha': [0],
    'reg_lambda': [0, 0.001, 0.003, 0.1],
}
xgb = XGB()

In [49]:
LGBM_GRID = {
    'num_leaves': [20, 60, 100],
    'max_depth': [5, 6, 7],
    'n_estimators': [200],
    'subsample_for_bin': [2000],
    'min_child_samples': [25, 35, 45],
    'reg_alpha': [0],
    'reg_lambda': [0, 0.01, 0.03, 0.1]
}
lgbm = LGBM()

In [48]:
scorer = make_scorer(get_score, greater_is_better = True)
ss = ShuffleSplit(n_splits = 5, test_size = 0.3)

In [14]:
grid_search = GridSearchCV(estimator = xgb, param_grid = XGB_GRID, scoring = scorer, cv = ss, n_jobs = -1)
grid_search.fit(train_x.values, train_y.values)
cv_df = pd.DataFrame(grid_search.cv_results_)
cv_df.sort_values(by = 'rank_test_score')



Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_n_jobs,param_reg_alpha,param_reg_lambda,params,split0_test_score,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,6.812458,0.102364,0.224831,0.013961,5,-1,0,0.0,"{'max_depth': 5, 'n_jobs': -1, 'reg_alpha': 0,...",0.346765,...,0.353378,0.008682,12,0.484415,0.475842,0.471835,0.457933,0.486312,0.475267,0.010181
1,6.841955,0.071919,0.231715,0.024886,5,-1,0,0.001,"{'max_depth': 5, 'n_jobs': -1, 'reg_alpha': 0,...",0.35309,...,0.353685,0.010021,9,0.481882,0.473885,0.471283,0.462248,0.48846,0.475552,0.008996
2,6.952263,0.103218,0.240376,0.018413,5,-1,0,0.003,"{'max_depth': 5, 'n_jobs': -1, 'reg_alpha': 0,...",0.350658,...,0.353413,0.009004,11,0.479622,0.472924,0.471249,0.46081,0.485341,0.473989,0.008282
3,7.034726,0.13094,0.211239,0.011607,5,-1,0,0.1,"{'max_depth': 5, 'n_jobs': -1, 'reg_alpha': 0,...",0.347171,...,0.354332,0.009302,6,0.482498,0.47464,0.467009,0.4592,0.485723,0.473814,0.009775
4,8.99072,0.094852,0.26808,0.073979,6,-1,0,0.0,"{'max_depth': 6, 'n_jobs': -1, 'reg_alpha': 0,...",0.350414,...,0.356059,0.007127,2,0.55761,0.548535,0.544047,0.536883,0.554875,0.54839,0.007461
5,9.033196,0.179563,0.241547,0.01495,6,-1,0,0.001,"{'max_depth': 6, 'n_jobs': -1, 'reg_alpha': 0,...",0.346441,...,0.35505,0.006893,5,0.559425,0.547265,0.539566,0.533458,0.547703,0.545483,0.008742
6,9.054611,0.123707,0.24416,0.009712,6,-1,0,0.003,"{'max_depth': 6, 'n_jobs': -1, 'reg_alpha': 0,...",0.34936,...,0.355166,0.004724,4,0.551756,0.553889,0.542496,0.538253,0.548604,0.547,0.005821
7,9.101112,0.113295,0.240998,0.005216,6,-1,0,0.1,"{'max_depth': 6, 'n_jobs': -1, 'reg_alpha': 0,...",0.346765,...,0.354072,0.007305,8,0.556001,0.549805,0.544185,0.53291,0.543858,0.545352,0.007637
8,11.428845,0.205042,0.299131,0.01176,7,-1,0,0.0,"{'max_depth': 7, 'n_jobs': -1, 'reg_alpha': 0,...",0.351144,...,0.355728,0.007138,3,0.6407,0.632691,0.634286,0.615559,0.628497,0.630346,0.00837
9,11.344843,0.213858,0.285381,0.011017,7,-1,0,0.001,"{'max_depth': 7, 'n_jobs': -1, 'reg_alpha': 0,...",0.356334,...,0.358755,0.007766,1,0.636831,0.627405,0.629357,0.611449,0.627041,0.626417,0.008277


In [50]:
grid_search = GridSearchCV(estimator = lgbm, param_grid = LGBM_GRID, scoring = scorer, cv = ss, n_jobs = -1)
grid_search.fit(train_x.values, train_y.values)
cv_df = pd.DataFrame(grid_search.cv_results_)
cv_df.sort_values(by = 'rank_test_score')



Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_child_samples,param_n_estimators,param_num_leaves,param_reg_alpha,param_reg_lambda,...,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
2,2.586810,0.090390,1.377802,0.616812,5,25,200,20,0,0.03,...,0.354134,0.018863,1,0.467628,0.494343,0.486502,0.501380,0.492749,0.488521,0.011469
25,3.361885,0.093860,0.773443,0.177707,5,45,200,20,0,0.01,...,0.353708,0.013695,2,0.462259,0.477810,0.478445,0.466374,0.476651,0.472308,0.006678
29,3.346321,0.340385,1.070059,0.289267,5,45,200,60,0,0.01,...,0.353080,0.016172,3,0.472206,0.484272,0.478719,0.478683,0.488802,0.480536,0.005629
33,2.975133,0.244356,1.064945,0.628296,5,45,200,100,0,0.01,...,0.353080,0.016172,3,0.472206,0.484272,0.478719,0.478683,0.488802,0.480536,0.005629
30,3.011384,0.372976,0.725788,0.221689,5,45,200,60,0,0.03,...,0.352855,0.009380,5,0.469349,0.488981,0.475119,0.480774,0.484820,0.479809,0.006951
34,2.743007,0.130837,1.577508,0.415883,5,45,200,100,0,0.03,...,0.352855,0.009380,5,0.469349,0.488981,0.475119,0.480774,0.484820,0.479809,0.006951
0,2.350501,0.031151,1.951162,0.620504,5,25,200,20,0,0,...,0.352786,0.014391,7,0.473548,0.487881,0.489862,0.491814,0.494328,0.487487,0.007289
38,3.335792,0.492478,1.264366,0.558518,6,25,200,20,0,0.03,...,0.352612,0.015734,8,0.500568,0.513901,0.506938,0.508786,0.513481,0.508735,0.004882
36,3.284814,0.775595,1.425345,0.739899,6,25,200,20,0,0,...,0.352408,0.016414,9,0.497986,0.513523,0.515441,0.515883,0.522715,0.513110,0.008176
14,3.107888,0.156893,0.747811,0.188127,5,35,200,20,0,0.03,...,0.351961,0.014709,10,0.471931,0.489290,0.478650,0.483586,0.488699,0.482431,0.006515
