Implement greedy descent policy and relevant test for it.

ResolvableFunctionType API changes
mlpack · Aug 13, 2017 · 8439bf3 · 8439bf3
1 parent 42dba55
commit 8439bf3
Show file tree

Hide file tree

Showing 7 changed files with 75 additions and 27 deletions.
diff --git a/src/mlpack/core/optimizers/parallel_sgd/sparse_test_function.hpp b/src/mlpack/core/optimizers/parallel_sgd/sparse_test_function.hpp
@@ -74,10 +74,9 @@ class SparseTestFunction
   //! Evaluate the gradient of a feature function.
   void FeatureGradient(const arma::mat& coordinates,
                        const size_t j,
-                       arma::sp_mat& gradient) const
+                       double& gradient) const
   {
-    gradient = arma::sp_mat(coordinates.n_rows, 1);
-    gradient[j] = 2 * coordinates[j] + bi[j];
+    gradient = 2 * coordinates[j] + bi[j];
   }
 
  private:

diff --git a/src/mlpack/core/optimizers/scd/descent_policies/greedy_descent.hpp b/src/mlpack/core/optimizers/scd/descent_policies/greedy_descent.hpp
@@ -20,7 +20,18 @@ namespace optimization {
 /**
  * Greedy descent policy for Stochastic Co-ordinate Descent(SCD). This
  * descent scheme picks a the co-ordinate for the descent with the maximum
- * guaranteed descent.
+ * guaranteed descent, according to the Gauss-Southwell rule. This is a
+ * deterministic approach and is generally more expensive to calculate.
+ *
+ * For more information, refer to the following.
+ * @misc{1506.00552,
+ *   Author = {Julie Nutini and Mark Schmidt and Issam H.
+ *             Laradji and Michael Friedlander and Hoyt Koepke},
+ *   Title = {Coordinate Descent Converges Faster with the Gauss-Southwell Rule
+ *            Than Random Selection},
+ *   Year = {2015},
+ *   Eprint = {arXiv:1506.00552}
+ * }
  */
 class GreedyDescent
 {
@@ -36,17 +47,27 @@ class GreedyDescent
    * @param function The function to be optimized.
    * @return The index of the coordinate to be descended.
    */
-
-  // TODO: Find a way to implement this.
   template <typename ResolvableFunctionType>
-  size_t DescentFeature(const size_t numEpoch,
+  size_t DescentFeature(const size_t /* numEpoch */,
                         const arma::mat& iterate,
                         const ResolvableFunctionType& function)
   {
+    size_t bestFeature = 0;
+    double bestDescent = 0;
     for (size_t i = 0; i < function.NumFeatures(); ++i)
     {
-      double featureGrad = function.FeatureGradient(iterate, i);
+      double fGrad;
+
+      function.FeatureGradient(iterate, i, fGrad);
+
+      if (fGrad > bestDescent)
+      {
+        bestFeature = i;
+        bestDescent = fGrad;
+      }
     }
+
+    return bestFeature;
   }
 };
 

diff --git a/src/mlpack/core/optimizers/scd/scd.hpp b/src/mlpack/core/optimizers/scd/scd.hpp
@@ -36,7 +36,7 @@ namespace optimization {
  *  double Evaluate(const arma::mat& coordinates);
  *  void FeatureGradient(const arma::mat& coordinates,
  *                       const size_t j,
- *                       arma::sp_mat& gradient);
+ *                       double& gradient);
  *
  *  NumFeatures() should return the number of features in the decision variable.
  *  Evaluate gives the value of the loss function at the current decision

diff --git a/src/mlpack/core/optimizers/scd/scd_impl.hpp b/src/mlpack/core/optimizers/scd/scd_impl.hpp
@@ -41,7 +41,7 @@ double SCD<DescentPolicyType>::Optimize(ResolvableFunctionType& function,
   double overallObjective = 0;
   double lastObjective = DBL_MAX;
 
-  arma::sp_mat gradient;
+  double gradient;
 
   // Start iterating.
   for (size_t i = 1; i != maxIterations; ++i)
@@ -53,7 +53,7 @@ double SCD<DescentPolicyType>::Optimize(ResolvableFunctionType& function,
     function.FeatureGradient(iterate, featureIdx, gradient);
 
     // Update the decision variable with the partial gradient.
-    iterate -= stepSize * gradient;
+    iterate[featureIdx] -= stepSize * gradient;
 
     // Check for convergence.
     if (i % updateInterval == 0)

diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_function.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_function.hpp
@@ -116,20 +116,20 @@ class LogisticRegressionFunction
    * @param parameters Vector of logistic regression parameters.
    * @param j Index of the feature with respect to which the gradient is to
    *    be computed.
-   * @param gradient Vector to output gradient into.
+   * @param gradient Double to output gradient into.
    */
   void FeatureGradient(const arma::mat& parameters,
                        const size_t j,
-                       arma::sp_mat& gradient) const;
+                       double& gradient) const;
 
   //! Return the initial point for the optimization.
   const arma::mat& GetInitialPoint() const { return initialPoint; }
 
   //! Return the number of separable functions (the number of predictor points).
   size_t NumFunctions() const { return predictors.n_cols; }
 
-  //! Return the number of features.
-  size_t NumFeatures() const { return predictors.n_rows; }
+  //! Return the number of features(add 1 for the intercept term).
+  size_t NumFeatures() const { return predictors.n_rows + 1; }
 
  private:
   //! The initial point, from which to start the optimization.

diff --git a/src/mlpack/methods/logistic_regression/logistic_regression_function_impl.hpp b/src/mlpack/methods/logistic_regression/logistic_regression_function_impl.hpp
@@ -179,22 +179,21 @@ template <typename MatType>
 void LogisticRegressionFunction<MatType>::FeatureGradient(
     const arma::mat& parameters,
     const size_t j,
-    arma::sp_mat& gradient) const
+    double& gradient) const
 {
-  // Regularization term.
-  double regularization;
-  regularization = lambda * parameters(j + 1, 0);
-
   const arma::rowvec sigmoids = (1 / (1 + arma::exp(-parameters(0, 0)
       - parameters.col(0).subvec(1, parameters.n_elem - 1).t() * predictors)));
 
-  gradient.set_size(parameters.n_elem);
   arma::mat diffs = responses - sigmoids;
-
-  gradient[0] = -arma::accu(diffs);
-
-  double grad = arma::dot(-predictors.row(j), diffs);
-  gradient(j + 1, 0) = grad + regularization;
+  if (j == 0)
+  {
+    gradient = -arma::accu(diffs);
+  }
+  else
+  {
+    double regularization = lambda * parameters(j, 0);
+    gradient = arma::dot(-predictors.row(j - 1), diffs) + regularization;
+  }
 }
 
 } // namespace regression

diff --git a/src/mlpack/tests/scd_test.cpp b/src/mlpack/tests/scd_test.cpp
@@ -11,6 +11,7 @@
  */
 #include <mlpack/core.hpp>
 #include <mlpack/core/optimizers/scd/scd.hpp>
+#include <mlpack/core/optimizers/scd/descent_policies/greedy_descent.hpp>
 #include <mlpack/core/optimizers/parallel_sgd/sparse_test_function.hpp>
 #include <mlpack/methods/logistic_regression/logistic_regression_function.hpp>
 
@@ -26,21 +27,29 @@ using namespace mlpack::regression;
 
 BOOST_AUTO_TEST_SUITE(SCDTest);
 
+/**
+ * Test the correctness of the SCD implementation by using a dataset with a
+ * precalculated minima.
+ */
 BOOST_AUTO_TEST_CASE(PreCalcSCDTest)
 {
   arma::mat predictors("0 0 0.4; 0 0 0.6; 0 0.3 0; 0.2 0 0; 0.2 -0.5 0;");
   arma::Row<size_t> responses("1  1  0;");
 
   LogisticRegressionFunction<arma::mat> f(predictors, responses, 0.0001);
 
-  SCD<> s(0.01, 50000, 1e-5);
+  SCD<> s(0.01, 60000, 1e-5);
   arma::mat iterate = f.InitialPoint();
 
   double objective = s.Optimize(f, iterate);
 
   BOOST_REQUIRE_LE(objective, 0.055);
 }
 
+/**
+ * Test the correctness of the SCD implemenation by using the sparse test
+ * function, with dijoint features which optimize to a precalculated minima.
+ */
 BOOST_AUTO_TEST_CASE(DisjointFeatureTest)
 {
   // The test function for parallel SGD should work with SCD, as the gradients
@@ -62,4 +71,24 @@ BOOST_AUTO_TEST_CASE(DisjointFeatureTest)
   BOOST_REQUIRE_CLOSE(iterate[3], 4, 0.02);
 }
 
+/**
+ * Test the greedy descent policy.
+ */
+BOOST_AUTO_TEST_CASE(GreedyDescentTest)
+{
+  // In the sparse test function, the given point has the maximum gradient at
+  // the feature with index 2.
+  arma::mat point("1; 2; 3; 4;");
+
+  SparseTestFunction f;
+
+  GreedyDescent descentPolicy;
+
+  BOOST_REQUIRE_EQUAL(descentPolicy.DescentFeature(0, point, f), 2);
+
+  point[1] = 10;
+
+  BOOST_REQUIRE_EQUAL(descentPolicy.DescentFeature(0, point, f), 1);
+}
+
 BOOST_AUTO_TEST_SUITE_END();