In [1]:
import joblib
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

tr_features = pd.read_csv('train_features.csv')
tr_labels = pd.read_csv('train_labels.csv')

  from pandas import MultiIndex, Int64Index


In [2]:
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [3]:
xgb_model = xgb.XGBClassifier()
parameters = {
    'n_estimators': [5, 50, 250, 500],
    'objective':['binary:logistic'],
    'learning_rate': [0.01, 0.1, 1, 10, 100]
}

cv = GridSearchCV(xgb_model, parameters, cv=5)
cv.fit(tr_features, tr_labels.values.ravel())

print_results(cv)







BEST PARAMS: {'learning_rate': 0.01, 'n_estimators': 250, 'objective': 'binary:logistic'}

0.561 (+/-0.016) for {'learning_rate': 0.01, 'n_estimators': 5, 'objective': 'binary:logistic'}
0.567 (+/-0.012) for {'learning_rate': 0.01, 'n_estimators': 50, 'objective': 'binary:logistic'}
0.567 (+/-0.008) for {'learning_rate': 0.01, 'n_estimators': 250, 'objective': 'binary:logistic'}
0.565 (+/-0.012) for {'learning_rate': 0.01, 'n_estimators': 500, 'objective': 'binary:logistic'}
0.567 (+/-0.012) for {'learning_rate': 0.1, 'n_estimators': 5, 'objective': 'binary:logistic'}
0.566 (+/-0.012) for {'learning_rate': 0.1, 'n_estimators': 50, 'objective': 'binary:logistic'}
0.549 (+/-0.013) for {'learning_rate': 0.1, 'n_estimators': 250, 'objective': 'binary:logistic'}
0.539 (+/-0.013) for {'learning_rate': 0.1, 'n_estimators': 500, 'objective': 'binary:logistic'}
0.552 (+/-0.015) for {'learning_rate': 1, 'n_estimators': 5, 'objective': 'binary:logistic'}
0.528 (+/-0.025) for {'learning_rate': 1, 

In [4]:
cv.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.01, max_delta_step=0,
              max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=250, n_jobs=4,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [5]:
joblib.dump(cv.best_estimator_, 'XGB_model.pkl')

['XGB_model.pkl']