fix nan in eval results (#1973)

* always save the score of the first round in early stopping fix #1971 * avoid using std::log on non-positive numbers * remove unnecessary changes * add tests * Update test_sklearn.py * enhanced tests
microsoft · Jan 30, 2019 · feeaf38 · feeaf38
1 parent fe77eac
commit feeaf38
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 5 deletions.
diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h
@@ -866,6 +866,15 @@ static int Sign(T x) {
   return (x > T(0)) - (x < T(0));
 }
 
+template <typename T>
+static T SafeLog(T x) {
+  if (x > 0) {
+    return std::log(x);
+  } else {
+    return -INFINITY;
+  }
+}
+
 }  // namespace Common
 
 }  // namespace LightGBM

diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py
@@ -213,7 +213,7 @@ def _callback(env):
             return
         for i in range_(len(env.evaluation_result_list)):
             score = env.evaluation_result_list[i][2]
-            if cmp_op[i](score, best_score[i]):
+            if best_score_list[i] is None or cmp_op[i](score, best_score[i]):
                 best_score[i] = score
                 best_iter[i] = env.iteration
                 best_score_list[i] = env.evaluation_result_list

diff --git a/src/metric/regression_metric.hpp b/src/metric/regression_metric.hpp
@@ -45,6 +45,9 @@ class RegressionMetric: public Metric {
         sum_weights_ += weights_[i];
       }
     }
+    for (data_size_t i = 0; i < num_data_; ++i) {
+      PointWiseLossCalculator::CheckLabel(label_[i]);
+    }
   }
 
   std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override {
@@ -90,6 +93,10 @@ class RegressionMetric: public Metric {
   inline static double AverageLoss(double sum_loss, double sum_weights) {
     return sum_loss / sum_weights;
   }
+
+  inline static void CheckLabel(label_t) {
+  }
+
 private:
   /*! \brief Number of data */
   data_size_t num_data_;
@@ -251,13 +258,17 @@ class GammaMetric : public RegressionMetric<GammaMetric> {
     const double psi = 1.0;
     const double theta = -1.0 / score;
     const double a = psi;
-    const double b = -std::log(-theta);
-    const double c = 1. / psi * std::log(label / psi) - std::log(label) - 0; // 0 = std::lgamma(1.0 / psi) = std::lgamma(1.0);
+    const double b = -Common::SafeLog(-theta);
+    const double c = 1. / psi * Common::SafeLog(label / psi) - Common::SafeLog(label) - 0; // 0 = std::lgamma(1.0 / psi) = std::lgamma(1.0);
     return -((label * theta - b) / a + c);
   }
   inline static const char* Name() {
     return "gamma";
   }
+
+  inline static void CheckLabel(label_t label) {
+    CHECK(label > 0);
+  }
 };
 
 
@@ -269,14 +280,17 @@ class GammaDevianceMetric : public RegressionMetric<GammaDevianceMetric> {
   inline static double LossOnPoint(label_t label, double score, const Config&) {
     const double epsilon = 1.0e-9;
     const double tmp = label / (score + epsilon);
-    return tmp - std::log(tmp) - 1;
+    return tmp - Common::SafeLog(tmp) - 1;
   }
   inline static const char* Name() {
     return "gamma-deviance";
   }
   inline static double AverageLoss(double sum_loss, double) {
     return sum_loss * 2;
   }
+  inline static void CheckLabel(label_t label) {
+    CHECK(label > 0);
+  }
 };
 
 class TweedieMetric : public RegressionMetric<TweedieMetric> {

diff --git a/src/objective/regression_objective.hpp b/src/objective/regression_objective.hpp
@@ -472,7 +472,7 @@ class RegressionPoissonLoss: public RegressionL2loss {
   }
 
   double BoostFromScore(int) const override {
-    return std::log(RegressionL2loss::BoostFromScore(0));
+    return Common::SafeLog(RegressionL2loss::BoostFromScore(0));
   }
 
   bool IsConstantHessian() const override {

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
@@ -550,3 +550,27 @@ def custom_metric(y_true, y_pred):
                                  **params).fit(eval_metric='multi_logloss', **params_fit)
         self.assertEqual(len(gbm.evals_result_['training']), 1)
         self.assertIn('binary_logloss', gbm.evals_result_['training'])
+
+    def test_inf_handle(self):
+        nrows = 1000
+        ncols = 10
+        X = np.random.randn(nrows, ncols)
+        y = np.random.randn(nrows) + np.full(nrows, 1e30)
+        weight = np.full(nrows, 1e10)
+        params = {'n_estimators': 20, 'verbose': -1}
+        params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
+                      'verbose': False, 'early_stopping_rounds': 5}
+        gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
+        np.testing.assert_array_equal(gbm.evals_result_['training']['l2'], np.inf)
+
+    def test_nan_handle(self):
+        nrows = 1000
+        ncols = 10
+        X = np.random.randn(nrows, ncols)
+        y = np.random.randn(nrows) + np.full(nrows, 1e30)
+        weight = np.zeros(nrows)
+        params = {'n_estimators': 20, 'verbose': -1}
+        params_fit = {'X': X, 'y': y, 'sample_weight': weight, 'eval_set': (X, y),
+                      'verbose': False, 'early_stopping_rounds': 5}
+        gbm = lgb.LGBMRegressor(**params).fit(**params_fit)
+        np.testing.assert_array_equal(gbm.evals_result_['training']['l2'], np.nan)