microsoft · StrikerRUS · Sep 15, 2019 · Jun 1, 2019 · Jun 1, 2019 · Jun 1, 2019
@@ -1628,7 +1628,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None,
         self.handle = None
         self.network = False
         self.__need_reload_eval_info = True
-        self.__train_data_name = "training"
+        self._train_data_name = "training"
         self.__attr = {}
         self.__set_objective_to_none = False
         self.best_iteration = -1
@@ -1817,7 +1817,7 @@ def set_train_data_name(self, name):
         self : Booster
             Booster with set training Dataset name.
         """
-        self.__train_data_name = name
+        self._train_data_name = name
         return self
 
     def add_valid(self, data, name):
@@ -2044,7 +2044,7 @@ def eval(self, data, name, feval=None):
                 eval_data : Dataset
                     The evaluation dataset.
                 eval_name : string
-                    The name of evaluation function.
+                    The name of evaluation function (without whitespaces).
                 eval_result : float
                     The eval result.
                 is_higher_better : bool
@@ -2090,7 +2090,7 @@ def eval_train(self, feval=None):
                 train_data : Dataset
                     The training dataset.
                 eval_name : string
-                    The name of evaluation function.
+                    The name of evaluation function (without whitespaces).
                 eval_result : float
                     The eval result.
                 is_higher_better : bool
@@ -2104,7 +2104,7 @@ def eval_train(self, feval=None):
         result : list
             List with evaluation results.
         """
-        return self.__inner_eval(self.__train_data_name, 0, feval)
+        return self.__inner_eval(self._train_data_name, 0, feval)
 
     def eval_valid(self, feval=None):
         """Evaluate for validation data.
@@ -2121,7 +2121,7 @@ def eval_valid(self, feval=None):
                 valid_data : Dataset
                     The validation dataset.
                 eval_name : string
-                    The name of evaluation function.
+                    The name of evaluation function (without whitespaces).
                 eval_result : float
                     The eval result.
                 is_higher_better : bool

@@ -89,12 +89,13 @@ def record_evaluation(eval_result):
         The callback that records the evaluation history into the passed dictionary.
     """
     if not isinstance(eval_result, dict):
-        raise TypeError('Eval_result should be a dictionary')
+        raise TypeError('eval_result should be a dictionary')
     eval_result.clear()
 
     def _init(env):
-        for data_name, _, _, _ in env.evaluation_result_list:
-            eval_result.setdefault(data_name, collections.defaultdict(list))
+        for data_name, eval_name, _, _ in env.evaluation_result_list:
+            eval_result.setdefault(data_name, collections.OrderedDict())
+            eval_result[data_name].setdefault(eval_name, [])
 
     def _callback(env):
         if not eval_result:
@@ -132,7 +133,7 @@ def _callback(env):
             if key in ['num_class', 'num_classes',
                        'boosting', 'boost', 'boosting_type',
                        'metric', 'metrics', 'metric_types']:
-                raise RuntimeError("cannot reset {} during training".format(repr(key)))
+                raise RuntimeError("Cannot reset {} during training".format(repr(key)))
             if isinstance(value, list):
                 if len(value) != env.end_iteration - env.begin_iteration:
                     raise ValueError("Length of list {} has to equal to 'num_boost_round'."
@@ -182,6 +183,7 @@ def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
     best_score_list = []
     cmp_op = []
     enabled = [True]
+    first_metric = ['']
 
     def _init(env):
         enabled[0] = not any((boost_alias in env.params
@@ -196,9 +198,11 @@ def _init(env):
                              'at least one dataset and eval metric is required for evaluation')
 
         if verbose:
-            msg = "Training until validation scores don't improve for {} rounds."
+            msg = "Training until validation scores don't improve for {} rounds"
             print(msg.format(stopping_rounds))
 
+        # split is needed for "<dataset type> <metric>" case (e.g. "train l1")
+        first_metric[0] = env.evaluation_result_list[0][1].split(" ")[-1]
         for eval_ret in env.evaluation_result_list:
             best_iter.append(0)
             best_score_list.append(None)
@@ -209,6 +213,15 @@ def _init(env):
                 best_score.append(float('inf'))
                 cmp_op.append(lt)
 
+    def _final_iteration_check(env, eval_name_splitted, i):
+        if env.iteration == env.end_iteration - 1:
+            if verbose:
+                print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
+                    best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
+                if first_metric_only:
+                    print("Evaluated only: {}".format(eval_name_splitted[-1]))
+            raise EarlyStopException(best_iter[i], best_score_list[i])
+
     def _callback(env):
         if not cmp_op:
             _init(env)
@@ -220,17 +233,21 @@ def _callback(env):
                 best_score[i] = score
                 best_iter[i] = env.iteration
                 best_score_list[i] = env.evaluation_result_list
+            # split is needed for "<dataset type> <metric>" case (e.g. "train l1")
+            eval_name_splitted = env.evaluation_result_list[i][1].split(" ")
+            if first_metric_only and first_metric[0] != eval_name_splitted[-1]:
+                continue  # use only the first metric for early stopping
+            if ((env.evaluation_result_list[i][0] == "cv_agg" and eval_name_splitted[0] == "train"
+                 or env.evaluation_result_list[i][0] == env.model._train_data_name)):
+                _final_iteration_check(env, eval_name_splitted, i)
+                continue  # train data for lgb.cv or sklearn wrapper (underlying lgb.train)
             elif env.iteration - best_iter[i] >= stopping_rounds:
                 if verbose:
                     print('Early stopping, best iteration is:\n[%d]\t%s' % (
                         best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
+                    if first_metric_only:
+                        print("Evaluated only: {}".format(eval_name_splitted[-1]))
                 raise EarlyStopException(best_iter[i], best_score_list[i])
-            if env.iteration == env.end_iteration - 1:
-                if verbose:
-                    print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
-                        best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
-                raise EarlyStopException(best_iter[i], best_score_list[i])
-            if first_metric_only:  # the only first metric is used for early stopping
-                break
+            _final_iteration_check(env, eval_name_splitted, i)
     _callback.order = 30
     return _callback
@@ -65,7 +65,7 @@ def train(params, train_set, num_boost_round=100,
             train_data : Dataset
                 The training dataset.
             eval_name : string
-                The name of evaluation function.
+                The name of evaluation function (without whitespaces).
             eval_result : float
                 The eval result.
             is_higher_better : bool
@@ -266,7 +266,7 @@ def train(params, train_set, num_boost_round=100,
             booster.best_iteration = earlyStopException.best_iteration + 1
             evaluation_result_list = earlyStopException.best_score
             break
-    booster.best_score = collections.defaultdict(dict)
+    booster.best_score = collections.defaultdict(collections.OrderedDict)
     for dataset_name, eval_name, score, _ in evaluation_result_list:
         booster.best_score[dataset_name][eval_name] = score
     if not keep_training_booster:
@@ -356,7 +356,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
 
 def _agg_cv_result(raw_results, eval_train_metric=False):
     """Aggregate cross-validation results."""
-    cvmap = collections.defaultdict(list)
+    cvmap = collections.OrderedDict()
     metric_type = {}
     for one_result in raw_results:
         for one_line in one_result:
@@ -365,6 +365,7 @@ def _agg_cv_result(raw_results, eval_train_metric=False):
             else:
                 key = one_line[1]
             metric_type[key] = one_line[3]
+            cvmap.setdefault(key, [])
             cvmap[key].append(one_line[2])
     return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]
 
@@ -429,7 +430,7 @@ def cv(params, train_set, num_boost_round=100,
             train_data : Dataset
                 The training dataset.
             eval_name : string
-                The name of evaluation function.
+                The name of evaluation function (without whitespaces).
             eval_result : float
                 The eval result.
             is_higher_better : bool

@@ -121,7 +121,7 @@ def __init__(self, func):
                 group : array-like
                     Group/query data, used for ranking task.
                 eval_name : string
-                    The name of evaluation function.
+                    The name of evaluation function (without whitespaces).
                 eval_result : float
                     The eval result.
                 is_higher_better : bool
@@ -147,7 +147,7 @@ def __call__(self, preds, dataset):
         Returns
         -------
         eval_name : string
-            The name of evaluation function.
+            The name of evaluation function (without whitespaces).
         eval_result : float
             The eval result.
         is_higher_better : bool
@@ -464,7 +464,7 @@ def fit(self, X, y,
             group : array-like
                 Group/query data, used for ranking task.
             eval_name : string
-                The name of evaluation function.
+                The name of evaluation function (without whitespaces).
             eval_result : float
                 The eval result.
             is_higher_better : bool
@@ -524,7 +524,8 @@ def fit(self, X, y,
             # concatenate metric from params (or default if not provided in params) and eval_metric
             original_metric = [original_metric] if isinstance(original_metric, (string_type, type(None))) else original_metric
             eval_metric = [eval_metric] if isinstance(eval_metric, (string_type, type(None))) else eval_metric
-            params['metric'] = set(original_metric + eval_metric)
+            params['metric'] = [e for e in eval_metric if e not in original_metric] + original_metric
+            params['metric'] = [metric for metric in params['metric'] if metric is not None]
 
         if not isinstance(X, (DataFrame, DataTable)):
             _X, _y = _LGBMCheckXY(X, y, accept_sparse=True, force_all_finite=False, ensure_min_samples=2)