diff --git a/stan/math/prim/fun.hpp b/stan/math/prim/fun.hpp index f4e51d5d75b..ef110d5fa34 100644 --- a/stan/math/prim/fun.hpp +++ b/stan/math/prim/fun.hpp @@ -102,6 +102,7 @@ #include #include #include +#include #include #include #include diff --git a/stan/math/prim/fun/generalized_inverse.hpp b/stan/math/prim/fun/generalized_inverse.hpp new file mode 100644 index 00000000000..0f6e1bb42bb --- /dev/null +++ b/stan/math/prim/fun/generalized_inverse.hpp @@ -0,0 +1,54 @@ +#ifndef STAN_MATH_PRIM_FUN_GENERALIZED_INVERSE_HPP +#define STAN_MATH_PRIM_FUN_GENERALIZED_INVERSE_HPP + +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** + * Returns the Moore-Penrose generalized inverse of the specified matrix. + * + * The method is based on the Cholesky computation of the transform as specified + * in + * + *
  • Courrieu, Pierre. 2008. Fast Computation of Moore-Penrose Inverse + Matrices. + * arXiv 0804.4809
+ * + * @tparam EigMat type of the matrix (must be derived from `Eigen::MatrixBase`) + * + * @param G specified matrix + * @return Generalized inverse of the matrix (an empty matrix if the specified + * matrix has size zero). + */ +template * = nullptr, + require_not_vt_var* = nullptr> +inline Eigen::Matrix, EigMat::ColsAtCompileTime, + EigMat::RowsAtCompileTime> +generalized_inverse(const EigMat& G) { + using value_t = value_type_t; + + if (G.size() == 0) + return {}; + + if (G.rows() == G.cols()) + return inverse(G); + + const auto& G_ref = to_ref(G); + + if (G.rows() < G.cols()) { + return (G_ref * G_ref.transpose()).ldlt().solve(G_ref).transpose(); + } else { + return (G_ref.transpose() * G_ref).ldlt().solve(G_ref.transpose()); + } +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/rev/fun.hpp b/stan/math/rev/fun.hpp index 4b9de3df0ba..eaf819cfc2c 100644 --- a/stan/math/rev/fun.hpp +++ b/stan/math/rev/fun.hpp @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/stan/math/rev/fun/generalized_inverse.hpp b/stan/math/rev/fun/generalized_inverse.hpp new file mode 100644 index 00000000000..9ec363dd5d5 --- /dev/null +++ b/stan/math/rev/fun/generalized_inverse.hpp @@ -0,0 +1,93 @@ +#ifndef STAN_MATH_REV_FUN_GENERALIZED_INVERSE_HPP +#define STAN_MATH_REV_FUN_GENERALIZED_INVERSE_HPP + +#include +#include +#include +#include + +namespace stan { +namespace math { + +namespace internal { +/* + * Reverse mode specialization of calculating the generalized inverse of a + * matrix. + *
  • Golub, G.H. and Pereyra, V. The Differentiation of Pseudo-Inverses + * and Nonlinear Least Squares Problems Whose Variables Separate. SIAM + * Journal on Numerical Analysis, Vol. 10, No. 2 (Apr., 1973), pp. + * 413-432
+ */ +template +inline auto generalized_inverse_lambda(T1& G_arena, T2& inv_G) { + return [G_arena, inv_G]() mutable { + G_arena.adj() + += -(inv_G.val_op().transpose() * inv_G.adj_op() + * inv_G.val_op().transpose()) + + (-G_arena.val_op() * inv_G.val_op() + + Eigen::MatrixXd::Identity(G_arena.rows(), inv_G.cols())) + * inv_G.adj_op().transpose() * inv_G.val_op() + * inv_G.val_op().transpose() + + inv_G.val_op().transpose() * inv_G.val_op() + * inv_G.adj_op().transpose() + * (-inv_G.val_op() * G_arena.val_op() + + Eigen::MatrixXd::Identity(inv_G.rows(), G_arena.cols())); + }; +} +} // namespace internal + +/* + * Reverse mode specialization of calculating the generalized inverse of a + * matrix. + * + * @param G specified matrix + * @return Generalized inverse of the matrix (an empty matrix if the specified + * matrix has size zero). + * + * @note For the derivatives of this function to exist the matrix must be + * of constant rank. + * Reverse mode differentiation algorithm reference: + * + *
  • Golub, G.H. and Pereyra, V. The Differentiation of Pseudo-Inverses + * and Nonlinear Least Squares Problems Whose Variables Separate. SIAM + * Journal on Numerical Analysis, Vol. 10, No. 2 (Apr., 1973), pp. + * 413-432
+ * + * Equation 4.12 in the paper + * + * See also + * http://mathoverflow.net/questions/25778/analytical-formula-for-numerical-derivative-of-the-matrix-pseudo-inverse + * + */ +template * = nullptr> +inline auto generalized_inverse(const VarMat& G) { + using value_t = value_type_t; + using ret_type = promote_var_matrix_t; + + if (G.size() == 0) + return ret_type(G); + + if (G.rows() == G.cols()) + return ret_type(inverse(G)); + + if (G.rows() < G.cols()) { + arena_t G_arena(G); + arena_t inv_G((G_arena.val_op() * G_arena.val_op().transpose()) + .ldlt() + .solve(G_arena.val_op()) + .transpose()); + reverse_pass_callback(internal::generalized_inverse_lambda(G_arena, inv_G)); + return ret_type(inv_G); + } else { + arena_t G_arena(G); + arena_t inv_G((G_arena.val_op().transpose() * G_arena.val_op()) + .ldlt() + .solve(G_arena.val_op().transpose())); + reverse_pass_callback(internal::generalized_inverse_lambda(G_arena, inv_G)); + return ret_type(inv_G); + } +} + +} // namespace math +} // namespace stan +#endif diff --git a/test/unit/math/mix/fun/generalized_inverse_test.cpp b/test/unit/math/mix/fun/generalized_inverse_test.cpp new file mode 100644 index 00000000000..f079f1366eb --- /dev/null +++ b/test/unit/math/mix/fun/generalized_inverse_test.cpp @@ -0,0 +1,54 @@ +#include +#include +#include + +TEST(mathMixMatFun, ad_tests) { + using stan::test::expect_ad; + using stan::test::expect_ad_matvar; + + auto f = [](const auto& G) { return stan::math::generalized_inverse(G); }; + + Eigen::MatrixXd t(0, 0); + expect_ad(f, t); + expect_ad_matvar(f, t); + + Eigen::MatrixXd u(1, 1); + u << 2; + expect_ad(f, u); + expect_ad_matvar(f, u); + + Eigen::MatrixXd v(2, 3); + v << 1, 3, 5, 2, 4, 6; + expect_ad(f, v); + expect_ad_matvar(f, v); + + v << 1.9, 1.3, 2.5, 0.4, 1.7, 0.1; + expect_ad(f, v); + expect_ad_matvar(f, v); + + Eigen::MatrixXd s(2, 4); + s << 3.4, 2, 5, 1.2, 2, 1, 3.2, 3.1; + expect_ad(f, s); + expect_ad_matvar(f, s); + + // issues around zero require looser tolerances for hessians + stan::test::ad_tolerances tols; + tols.hessian_hessian_ = 2.0; + tols.hessian_fvar_hessian_ = 2.0; + + Eigen::MatrixXd w(3, 4); + w << 2, 3, 5, 7, 11, 13, 17, 19, 23, 25, 27, 29; + expect_ad(tols, f, w); + expect_ad_matvar(f, w); + + Eigen::MatrixXd z(2, 2); + z << 1, 2, 5, std::numeric_limits::quiet_NaN(); + EXPECT_NO_THROW(stan::math::generalized_inverse(z)); + + // autodiff throws, so following fails (throw behavior must match to pass) + + Eigen::MatrixXd a(2, 2); + a << 1.9, 0.3, 0.3, std::numeric_limits::infinity(); + expect_ad(f, a); + expect_ad_matvar(f, a); +} diff --git a/test/unit/math/prim/fun/generalized_inverse_test.cpp b/test/unit/math/prim/fun/generalized_inverse_test.cpp new file mode 100644 index 00000000000..5f3bd6a7170 --- /dev/null +++ b/test/unit/math/prim/fun/generalized_inverse_test.cpp @@ -0,0 +1,47 @@ +#include +#include +#include + +TEST(MathMatrixPrim, Zero) { + stan::math::matrix_d m0(0, 0); + stan::math::matrix_d ginv = stan::math::generalized_inverse(m0); + EXPECT_EQ(0, ginv.rows()); + EXPECT_EQ(0, ginv.cols()); +} + +TEST(MathMatrixPrim, Singular) { + using stan::math::generalized_inverse; + + stan::math::matrix_d m1(2, 3); + m1 << 1, 2, 1, 2, 4, 2; + + stan::math::matrix_d m2 = m1 * generalized_inverse(m1) * m1; + EXPECT_MATRIX_NEAR(m1, m2, 1e-9); +} + +TEST(MathMatrixPrim, Equal1) { + using stan::math::generalized_inverse; + + stan::math::matrix_d m1(2, 3); + m1 << 1, 3, 5, 2, 4, 6; + + stan::math::matrix_d m2(2, 2); + m2 << 1, 0, 0, 1; + + stan::math::matrix_d m3 = m1 * generalized_inverse(m1); + EXPECT_MATRIX_NEAR(m2, m3, 1e-9); +} + +TEST(MathMatrixPrim, Equal2) { + using stan::math::generalized_inverse; + + stan::math::matrix_d m1(3, 2); + m1 << 1, 2, 2, 4, 1, 2; + + stan::math::matrix_d m2(3, 3); + m2 << 1.0 / 6.0, 1.0 / 3.0, 1.0 / 6.0, 1.0 / 3.0, 2.0 / 3.0, 1.0 / 3.0, + 1.0 / 6.0, 1.0 / 3.0, 1.0 / 6.0; + + stan::math::matrix_d m3 = m1 * generalized_inverse(m1); + EXPECT_MATRIX_NEAR(m2, m3, 1e-9); +}