Skip to content

Commit

Permalink
Implement greedy descent policy and relevant test for it.
Browse files Browse the repository at this point in the history
ResolvableFunctionType API changes
  • Loading branch information
shikharbhardwaj committed Aug 13, 2017
1 parent 42dba55 commit 8439bf3
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 27 deletions.
Expand Up @@ -74,10 +74,9 @@ class SparseTestFunction
//! Evaluate the gradient of a feature function.
void FeatureGradient(const arma::mat& coordinates,
const size_t j,
arma::sp_mat& gradient) const
double& gradient) const
{
gradient = arma::sp_mat(coordinates.n_rows, 1);
gradient[j] = 2 * coordinates[j] + bi[j];
gradient = 2 * coordinates[j] + bi[j];
}

private:
Expand Down
31 changes: 26 additions & 5 deletions src/mlpack/core/optimizers/scd/descent_policies/greedy_descent.hpp
Expand Up @@ -20,7 +20,18 @@ namespace optimization {
/**
* Greedy descent policy for Stochastic Co-ordinate Descent(SCD). This
* descent scheme picks a the co-ordinate for the descent with the maximum
* guaranteed descent.
* guaranteed descent, according to the Gauss-Southwell rule. This is a
* deterministic approach and is generally more expensive to calculate.
*
* For more information, refer to the following.
* @misc{1506.00552,
* Author = {Julie Nutini and Mark Schmidt and Issam H.
* Laradji and Michael Friedlander and Hoyt Koepke},
* Title = {Coordinate Descent Converges Faster with the Gauss-Southwell Rule
* Than Random Selection},
* Year = {2015},
* Eprint = {arXiv:1506.00552}
* }
*/
class GreedyDescent
{
Expand All @@ -36,17 +47,27 @@ class GreedyDescent
* @param function The function to be optimized.
* @return The index of the coordinate to be descended.
*/

// TODO: Find a way to implement this.
template <typename ResolvableFunctionType>
size_t DescentFeature(const size_t numEpoch,
size_t DescentFeature(const size_t /* numEpoch */,
const arma::mat& iterate,
const ResolvableFunctionType& function)
{
size_t bestFeature = 0;
double bestDescent = 0;
for (size_t i = 0; i < function.NumFeatures(); ++i)
{
double featureGrad = function.FeatureGradient(iterate, i);
double fGrad;

function.FeatureGradient(iterate, i, fGrad);

if (fGrad > bestDescent)
{
bestFeature = i;
bestDescent = fGrad;
}
}

return bestFeature;
}
};

Expand Down
2 changes: 1 addition & 1 deletion src/mlpack/core/optimizers/scd/scd.hpp
Expand Up @@ -36,7 +36,7 @@ namespace optimization {
* double Evaluate(const arma::mat& coordinates);
* void FeatureGradient(const arma::mat& coordinates,
* const size_t j,
* arma::sp_mat& gradient);
* double& gradient);
*
* NumFeatures() should return the number of features in the decision variable.
* Evaluate gives the value of the loss function at the current decision
Expand Down
4 changes: 2 additions & 2 deletions src/mlpack/core/optimizers/scd/scd_impl.hpp
Expand Up @@ -41,7 +41,7 @@ double SCD<DescentPolicyType>::Optimize(ResolvableFunctionType& function,
double overallObjective = 0;
double lastObjective = DBL_MAX;

arma::sp_mat gradient;
double gradient;

// Start iterating.
for (size_t i = 1; i != maxIterations; ++i)
Expand All @@ -53,7 +53,7 @@ double SCD<DescentPolicyType>::Optimize(ResolvableFunctionType& function,
function.FeatureGradient(iterate, featureIdx, gradient);

// Update the decision variable with the partial gradient.
iterate -= stepSize * gradient;
iterate[featureIdx] -= stepSize * gradient;

// Check for convergence.
if (i % updateInterval == 0)
Expand Down
Expand Up @@ -116,20 +116,20 @@ class LogisticRegressionFunction
* @param parameters Vector of logistic regression parameters.
* @param j Index of the feature with respect to which the gradient is to
* be computed.
* @param gradient Vector to output gradient into.
* @param gradient Double to output gradient into.
*/
void FeatureGradient(const arma::mat& parameters,
const size_t j,
arma::sp_mat& gradient) const;
double& gradient) const;

//! Return the initial point for the optimization.
const arma::mat& GetInitialPoint() const { return initialPoint; }

//! Return the number of separable functions (the number of predictor points).
size_t NumFunctions() const { return predictors.n_cols; }

//! Return the number of features.
size_t NumFeatures() const { return predictors.n_rows; }
//! Return the number of features(add 1 for the intercept term).
size_t NumFeatures() const { return predictors.n_rows + 1; }

private:
//! The initial point, from which to start the optimization.
Expand Down
Expand Up @@ -179,22 +179,21 @@ template <typename MatType>
void LogisticRegressionFunction<MatType>::FeatureGradient(
const arma::mat& parameters,
const size_t j,
arma::sp_mat& gradient) const
double& gradient) const
{
// Regularization term.
double regularization;
regularization = lambda * parameters(j + 1, 0);

const arma::rowvec sigmoids = (1 / (1 + arma::exp(-parameters(0, 0)
- parameters.col(0).subvec(1, parameters.n_elem - 1).t() * predictors)));

gradient.set_size(parameters.n_elem);
arma::mat diffs = responses - sigmoids;

gradient[0] = -arma::accu(diffs);

double grad = arma::dot(-predictors.row(j), diffs);
gradient(j + 1, 0) = grad + regularization;
if (j == 0)
{
gradient = -arma::accu(diffs);
}
else
{
double regularization = lambda * parameters(j, 0);
gradient = arma::dot(-predictors.row(j - 1), diffs) + regularization;
}
}

} // namespace regression
Expand Down
31 changes: 30 additions & 1 deletion src/mlpack/tests/scd_test.cpp
Expand Up @@ -11,6 +11,7 @@
*/
#include <mlpack/core.hpp>
#include <mlpack/core/optimizers/scd/scd.hpp>
#include <mlpack/core/optimizers/scd/descent_policies/greedy_descent.hpp>
#include <mlpack/core/optimizers/parallel_sgd/sparse_test_function.hpp>
#include <mlpack/methods/logistic_regression/logistic_regression_function.hpp>

Expand All @@ -26,21 +27,29 @@ using namespace mlpack::regression;

BOOST_AUTO_TEST_SUITE(SCDTest);

/**
* Test the correctness of the SCD implementation by using a dataset with a
* precalculated minima.
*/
BOOST_AUTO_TEST_CASE(PreCalcSCDTest)
{
arma::mat predictors("0 0 0.4; 0 0 0.6; 0 0.3 0; 0.2 0 0; 0.2 -0.5 0;");
arma::Row<size_t> responses("1 1 0;");

LogisticRegressionFunction<arma::mat> f(predictors, responses, 0.0001);

SCD<> s(0.01, 50000, 1e-5);
SCD<> s(0.01, 60000, 1e-5);
arma::mat iterate = f.InitialPoint();

double objective = s.Optimize(f, iterate);

BOOST_REQUIRE_LE(objective, 0.055);
}

/**
* Test the correctness of the SCD implemenation by using the sparse test
* function, with dijoint features which optimize to a precalculated minima.
*/
BOOST_AUTO_TEST_CASE(DisjointFeatureTest)
{
// The test function for parallel SGD should work with SCD, as the gradients
Expand All @@ -62,4 +71,24 @@ BOOST_AUTO_TEST_CASE(DisjointFeatureTest)
BOOST_REQUIRE_CLOSE(iterate[3], 4, 0.02);
}

/**
* Test the greedy descent policy.
*/
BOOST_AUTO_TEST_CASE(GreedyDescentTest)
{
// In the sparse test function, the given point has the maximum gradient at
// the feature with index 2.
arma::mat point("1; 2; 3; 4;");

SparseTestFunction f;

GreedyDescent descentPolicy;

BOOST_REQUIRE_EQUAL(descentPolicy.DescentFeature(0, point, f), 2);

point[1] = 10;

BOOST_REQUIRE_EQUAL(descentPolicy.DescentFeature(0, point, f), 1);
}

BOOST_AUTO_TEST_SUITE_END();

0 comments on commit 8439bf3

Please sign in to comment.