In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score
from category_encoders.target_encoder import TargetEncoder
from xgboost import XGBRegressor

In [None]:
import matplotlib.pyplot as plt
from IPython.display import FileLink
import seaborn as sns
import pickle

In [None]:
!pwd
!ls -l
!ls -lR ../input/

In [None]:
#train_df = pd.read_hdf('../input/mlcourse-ai-fall-2019-xgboost/train.h5')
#test_df = pd.read_hdf('../input/mlcourse-ai-fall-2019-xgboost/test.h5')

In [None]:
train_df = pd.read_hdf('../input/mlcourse-ai-fall-2019-xgboost/train_enc.h5')
test_df = pd.read_hdf('../input/mlcourse-ai-fall-2019-xgboost/test_enc.h5')
y = pd.read_hdf('../input/mlcourse-ai-fall-2019-xgboost/y.h5')

**Train XGBoost**

In [None]:
# measure performance (GPU)
%%time
XGBRegressor(tree_method='gpu_hist').fit(train_df[:10000], y[:10000])

In [None]:
# measure performance (CPU)
%%time
XGBRegressor().fit(train_df[:10000], y[:10000])

Use a different **random_state** here.

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(train_df, y, test_size=0.3, random_state=42, stratify=y)
X_train.shape

In [None]:
%%time
params = {'max_depth': range(1, 11), 'n_estimators': range(1, 101, 10)}
grid = GridSearchCV(XGBRegressor(random_state=17, tree_method='gpu_hist'), params, cv=3, scoring='roc_auc', verbose=True)
grid.fit(X_train, y_train)
print(grid.best_score_)
print(roc_auc_score(y_valid, grid.best_estimator_.predict(X_valid)))
plt.figure(figsize=(16,4))
plt.plot([str(x) for x in grid.cv_results_['params']], grid.cv_results_['mean_test_score'])
plt.xticks(rotation=90)
plt.title('ROC AUC / train params')
plt.show()