Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Adjusted R2 #2624

Merged
merged 18 commits into from
Dec 31, 2020
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
### mlpack ?.?.?
###### ????-??-??
* Add Adjusted R squared functionality to R2Score::Evaluate (#2624).

* Added an implementation to Stratify Data (#2671).

* Add `BUILD_DOCS` CMake option to control whether Doxygen documentation is
Expand Down
7 changes: 6 additions & 1 deletion src/mlpack/core/cv/metrics/r2_score.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,17 @@ namespace cv {
* where @f$ \bar{y} = frac{1}{y}\sum_{i=1}^{n} y_i @f$.
* For example, a model having R2Score = 0.85, explains 85 \% variability of
* the response data around its mean.
*
* @tparam AdjustedR2 If true, then the Adjusted R2 score will be used.
* Otherwise, the regular R2 score is used.
*/

rcurtin marked this conversation as resolved.
Show resolved Hide resolved
template<bool AdjustedR2>
class R2Score
{
public:
/**
* Run prediction and calculate the R squared error.
* Run prediction and calculate the R squared or Adjusted R squared error.
*
* @param model A regression model.
* @param data Column-major data containing test items.
Expand Down
20 changes: 16 additions & 4 deletions src/mlpack/core/cv/metrics/r2_score_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
namespace mlpack {
namespace cv {

template<bool AdjustedR2>
template<typename MLAlgorithm, typename DataType, typename ResponsesType>
double R2Score::Evaluate(MLAlgorithm& model,
const DataType& data,
const ResponsesType& responses)
double R2Score<AdjustedR2>::Evaluate(MLAlgorithm& model,
const DataType& data,
const ResponsesType& responses)
{
if (data.n_cols != responses.n_cols)
{
Expand Down Expand Up @@ -46,7 +47,18 @@ double R2Score::Evaluate(MLAlgorithm& model,
if (residualSumSquared == 0.0)
return totalSumSquared ? 1.0 : DBL_MIN;

return 1 - residualSumSquared / totalSumSquared;
if (AdjustedR2)
{
// Returning adjusted R-squared.
double rsq = 1 - (residualSumSquared / totalSumSquared);
return (1 - ((1 - rsq) * ((data.n_cols - 1) /
(data.n_cols - data.n_rows - 1))));
}
else
{
// Returning R-squared
return 1 - residualSumSquared / totalSumSquared;
}
}

} // namespace cv
Expand Down
23 changes: 22 additions & 1 deletion src/mlpack/tests/cv_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,31 @@ TEST_CASE("R2ScoreTest", "[CVTest]")

double expectedR2 = 0.99999779;

REQUIRE(R2Score::Evaluate(lr, data, responses)
REQUIRE(R2Score<false>::Evaluate(lr, data, responses)
== Approx(expectedR2).epsilon(1e-7));
}

/**
* Test the Adjusted R squared metric.
*/
TEST_CASE("AdjR2ScoreTest", "[CVTest]")
{
// Making two variables that define the linear function is
// f(x1, x2) = x1 + x2.
arma::mat X;
X << 1 << 2 << 3 << 4 << 5 << 6 << arma::endr
<< 2 << 3 << 4 << 5 << 6 << 7 << arma::endr;
arma::rowvec Y;
Y << 3 << 5 << 7 << 9 << 11 << 13;

LinearRegression lr(X, Y);

// Theoretically Adjusted R squared should be equal 1
double expAdjR2 = 1;
REQUIRE(std::abs(R2Score<true>::Evaluate(lr, X, Y) - expAdjR2)
<= 1e-7);
}

/**
* Test the mean squared error with matrix responses.
*/
Expand Down