In [1]:
from pandas import read_csv
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import matplotlib

In [2]:
matplotlib.use('Agg')
from matplotlib import pyplot
import numpy

In [3]:
data = read_csv('train.csv')
dataset = data.values

In [4]:
X = dataset[:,0:94]
y = dataset[:,94]

label_encoded_y = LabelEncoder().fit_transform(y)


In [12]:
model = XGBClassifier()
n_estimators = [50]
max_depth = [2]
print(max_depth)

[2]


In [13]:
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring="neg_log_loss", n_jobs=-1, cv=kfold, verbose=1)

In [14]:
grid_result = grid_search.fit(X, label_encoded_y)

Fitting 10 folds for each of 1 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:  4.4min finished


In [15]:
label_encoded_y

array([0, 0, 0, ..., 8, 8, 8])

In [16]:
print("Best: %f using %s" %(grid_result.best_score_, grid_result.best_params_))


Best: -0.012115 using {'max_depth': 2, 'n_estimators': 50}


In [17]:
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']


In [18]:
for mean, stdev, param in zip(means, stds, params):
  print("%f (%f) with: %r" %(mean, stdev, param))

-0.012115 (0.000634) with: {'max_depth': 2, 'n_estimators': 50}


In [19]:
scores = numpy.array(means).reshape(len(max_depth),len(n_estimators))

In [20]:
for i, value in enumerate(max_depth):
  pyplot.plot(n_estimators, scores[i], label='depth: ' + str(value))

In [21]:
pyplot.legend()
pyplot.xlabel('n_estmators')
pyplot.ylabel('Log Loss')
pyplot.savefig('n_estimators_vs_max_depth.png')

In [22]:
scores

array([[-0.01211467]])