Skip to content

Commit

Permalink
[python] add flag of displaying train loss for lgb.cv() (#2089)
Browse files Browse the repository at this point in the history
* [python] displaying train loss during training with lgb.cv

* modifying only display running type when disp_train_loss==True

* Add test for display train loss

* del .idea files

* Rename disp_train_loss to show_train_loss and revise comment.

* Change aug name show_train_loss -> eval_train_metric , and add a test item.

* Modifying comment of eval_train_metric.
  • Loading branch information
matsuken92 authored and henry0312 committed Apr 16, 2019
1 parent 8ffd8d8 commit ca85b67
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
25 changes: 18 additions & 7 deletions python-package/lightgbm/engine.py
Expand Up @@ -267,7 +267,8 @@ def handler_function(*args, **kwargs):
return handler_function


def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True):
def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True,
shuffle=True, eval_train_metric=False):
"""Make a n-fold list of Booster from random indices."""
full_data = full_data.construct()
num_data = full_data.num_data()
Expand Down Expand Up @@ -317,19 +318,25 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
else:
tparam = params
cvbooster = Booster(tparam, train_set)
if eval_train_metric:
cvbooster.add_valid(train_set, 'train')
cvbooster.add_valid(valid_set, 'valid')
ret.append(cvbooster)
return ret


def _agg_cv_result(raw_results):
def _agg_cv_result(raw_results, eval_train_metric=False):
"""Aggregate cross-validation results."""
cvmap = collections.defaultdict(list)
metric_type = {}
for one_result in raw_results:
for one_line in one_result:
metric_type[one_line[1]] = one_line[3]
cvmap[one_line[1]].append(one_line[2])
if eval_train_metric:
key = "{} {}".format(one_line[0], one_line[1])
else:
key = one_line[1]
metric_type[key] = one_line[3]
cvmap[key].append(one_line[2])
return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]


Expand All @@ -339,7 +346,7 @@ def cv(params, train_set, num_boost_round=100,
feature_name='auto', categorical_feature='auto',
early_stopping_rounds=None, fpreproc=None,
verbose_eval=None, show_stdv=True, seed=0,
callbacks=None):
callbacks=None, eval_train_metric=False):
"""Perform the cross-validation with given paramaters.
Parameters
Expand Down Expand Up @@ -412,6 +419,9 @@ def cv(params, train_set, num_boost_round=100,
callbacks : list of callables or None, optional (default=None)
List of callback functions that are applied at each iteration.
See Callbacks in Python API for more information.
eval_train_metric : bool, optional (default=False)
Whether to display the train metric in progress.
The score of the metric is calculated again after each training step, so there is some impact on performance.
Returns
-------
Expand Down Expand Up @@ -459,7 +469,8 @@ def cv(params, train_set, num_boost_round=100,
results = collections.defaultdict(list)
cvfolds = _make_n_folds(train_set, folds=folds, nfold=nfold,
params=params, seed=seed, fpreproc=fpreproc,
stratified=stratified, shuffle=shuffle)
stratified=stratified, shuffle=shuffle,
eval_train_metric=eval_train_metric)

# setup callbacks
if callbacks is None:
Expand Down Expand Up @@ -489,7 +500,7 @@ def cv(params, train_set, num_boost_round=100,
end_iteration=num_boost_round,
evaluation_result_list=None))
cvfolds.update(fobj=fobj)
res = _agg_cv_result(cvfolds.eval_valid(feval))
res = _agg_cv_result(cvfolds.eval_valid(feval), eval_train_metric)
for _, key, mean, _, std in res:
results[key + '-mean'].append(mean)
results[key + '-stdv'].append(std)
Expand Down
10 changes: 10 additions & 0 deletions tests/python_package_test/test_engine.py
Expand Up @@ -465,6 +465,16 @@ def test_cv(self):
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
self.assertIn('l1-mean', cv_res)
self.assertEqual(len(cv_res['l1-mean']), 10)
# enable display training loss
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False, eval_train_metric=True)
self.assertIn('train l1-mean', cv_res)
self.assertIn('valid l1-mean', cv_res)
self.assertNotIn('train l2-mean', cv_res)
self.assertNotIn('valid l2-mean', cv_res)
self.assertEqual(len(cv_res['train l1-mean']), 10)
self.assertEqual(len(cv_res['valid l1-mean']), 10)
# self defined folds
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
Expand Down

0 comments on commit ca85b67

Please sign in to comment.