sigmoid_ in grad and hess for rank objective (#2322)

* Lambdas and hessians need to factor sigmoid_ into the computation. Additionally, the sigmoid function has an arbitrary factor of 2 in the exponent; it is not just non-standard but the gradients are not computed correctly anyway. * Update unit test * Also remove a heuristic that normalizes the gradient by the difference in scores. * Also fix unit test after removing the heuristic
microsoft · Aug 17, 2019 · aee92f6 · aee92f6
1 parent c421f89
commit aee92f6
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 17 deletions.
diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp
@@ -104,13 +104,6 @@ class LambdarankNDCG: public ObjectiveFunction {
     }
     std::stable_sort(sorted_idx.begin(), sorted_idx.end(),
                      [score](data_size_t a, data_size_t b) { return score[a] > score[b]; });
-    // get best and worst score
-    const double best_score = score[sorted_idx[0]];
-    data_size_t worst_idx = cnt - 1;
-    if (worst_idx > 0 && score[sorted_idx[worst_idx]] == kMinScore) {
-      worst_idx -= 1;
-    }
-    const double wrost_score = score[sorted_idx[worst_idx]];
     // start accmulate lambdas by pairs
     for (data_size_t i = 0; i < cnt; ++i) {
       const data_size_t high = sorted_idx[i];
@@ -141,16 +134,12 @@ class LambdarankNDCG: public ObjectiveFunction {
         const double paired_discount = fabs(high_discount - low_discount);
         // get delta NDCG
         double delta_pair_NDCG = dcg_gap * paired_discount * inverse_max_dcg;
-        // regular the delta_pair_NDCG by score distance
-        if (high_label != low_label && best_score != wrost_score) {
-          delta_pair_NDCG /= (0.01f + fabs(delta_score));
-        }
         // calculate lambda for this pair
         double p_lambda = GetSigmoid(delta_score);
-        double p_hessian = p_lambda * (2.0f - p_lambda);
+        double p_hessian = p_lambda * (1.0f - p_lambda);
         // update
-        p_lambda *= -delta_pair_NDCG;
-        p_hessian *= 2 * delta_pair_NDCG;
+        p_lambda *= -sigmoid_ * delta_pair_NDCG;
+        p_hessian *= sigmoid_ * sigmoid_ * delta_pair_NDCG;
         high_sum_lambda += p_lambda;
         high_sum_hessian += p_hessian;
         lambdas[low] -= static_cast<score_t>(p_lambda);
@@ -193,7 +182,7 @@ class LambdarankNDCG: public ObjectiveFunction {
     // cache
     for (size_t i = 0; i < _sigmoid_bins; ++i) {
       const double score = i / sigmoid_table_idx_factor_ + min_sigmoid_input_;
-      sigmoid_table_[i] = 2.0f / (1.0f + std::exp(2.0f * score * sigmoid_));
+      sigmoid_table_[i] = 1.0f / (1.0f + std::exp(score * sigmoid_));
     }
   }
 

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
@@ -82,8 +82,8 @@ def test_lambdarank(self):
                 eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
                 callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
         self.assertLessEqual(gbm.best_iteration_, 12)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.65)
-        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.65)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6173)
+        self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6479)
 
     def test_regression_with_custom_objective(self):
         def objective_ls(y_true, y_pred):