From bb7242762b900f5576ebf61e1c94a5bae7025339 Mon Sep 17 00:00:00 2001 From: Wuwei Lin Date: Tue, 20 Feb 2018 01:12:23 +0800 Subject: [PATCH] Replace lapack with linalg in MahalanobisDistance (#4085) * Fix broken link in doc * Use linalg in MahalanobisDistance * Apply autoformatter * Remove MehalanobisDistance from LAPACK guarded list * Add lapack guard back * Update license header style * Save ldlt instead of inverse of cov * Replace explict casts and add type check * Remove unnecessary type checks --- cmake/FindMetaExamples.cmake | 1 - src/shogun/distance/MahalanobisDistance.cpp | 51 +++++++------ src/shogun/distance/MahalanobisDistance.h | 76 ++++++++++--------- .../distance/MahalanobisDistance_unittest.cc | 33 ++++++++ 4 files changed, 100 insertions(+), 61 deletions(-) create mode 100644 tests/unit/distance/MahalanobisDistance_unittest.cc diff --git a/cmake/FindMetaExamples.cmake b/cmake/FindMetaExamples.cmake index b107e14c07b..a584cf0f127 100644 --- a/cmake/FindMetaExamples.cmake +++ b/cmake/FindMetaExamples.cmake @@ -35,7 +35,6 @@ function(get_excluded_meta_examples) IF(NOT HAVE_LAPACK) LIST(APPEND EXCLUDED_META_EXAMPLES - distance/mahalanobis.sg ) ENDIF() diff --git a/src/shogun/distance/MahalanobisDistance.cpp b/src/shogun/distance/MahalanobisDistance.cpp index e98d3a0ac37..6ebf8fa24a3 100644 --- a/src/shogun/distance/MahalanobisDistance.cpp +++ b/src/shogun/distance/MahalanobisDistance.cpp @@ -7,14 +7,12 @@ #include -#ifdef HAVE_LAPACK - -#include -#include #include #include +#include +#include #include -#include +#include using namespace shogun; @@ -37,22 +35,31 @@ CMahalanobisDistance::~CMahalanobisDistance() bool CMahalanobisDistance::init(CFeatures* l, CFeatures* r) { - CRealDistance::init(l, r); + // FIXME: See comments in + // https://github.com/shogun-toolbox/shogun/pull/4085#discussion_r166254024 + ASSERT(CRealDistance::init(l, r)); + + SGMatrix cov; + auto feat_l = static_cast*>(l); + auto feat_r = static_cast*>(r); if ( l == r) { - mean = ((CDenseFeatures*) l)->get_mean(); - icov = ((CDenseFeatures*) l)->get_cov(); + mean = feat_l->get_mean(); + cov = feat_r->get_cov(); } else { - mean = ((CDenseFeatures*)l) - ->compute_mean((CDotFeatures*)lhs, (CDotFeatures*)rhs); - icov = CDotFeatures::compute_cov((CDotFeatures*) lhs, (CDotFeatures*) rhs); + mean = feat_l->compute_mean(feat_l, feat_r); + cov = CDotFeatures::compute_cov(feat_l, feat_r); } - SGMatrix::inverse(icov); + auto num_features = cov.num_rows; + chol_cov_L = SGMatrix(num_features, num_features); + chol_cov_d = SGVector(num_features); + chol_cov_p = SGVector(num_features); + linalg::ldlt_factor(cov, chol_cov_L, chol_cov_d, chol_cov_p); return true; } @@ -63,9 +70,10 @@ void CMahalanobisDistance::cleanup() float64_t CMahalanobisDistance::compute(int32_t idx_a, int32_t idx_b) { + auto feat_l = static_cast*>(lhs); + auto feat_r = static_cast*>(rhs); - SGVector bvec = ((CDenseFeatures*) rhs)-> - get_feature_vector(idx_b); + SGVector bvec = feat_r->get_feature_vector(idx_b); SGVector diff; SGVector avec; @@ -74,7 +82,7 @@ float64_t CMahalanobisDistance::compute(int32_t idx_a, int32_t idx_b) diff = mean.clone(); else { - avec = ((CDenseFeatures*) lhs)->get_feature_vector(idx_a); + avec = feat_l->get_feature_vector(idx_a); diff=avec.clone(); } @@ -83,17 +91,13 @@ float64_t CMahalanobisDistance::compute(int32_t idx_a, int32_t idx_b) for (int32_t i=0; i < diff.vlen; i++) diff[i] = bvec.vector[i] - diff[i]; - SGVector v = diff.clone(); - cblas_dgemv(CblasColMajor, CblasNoTrans, - icov.num_rows, icov.num_cols, 1.0, icov.matrix, - diff.vlen, diff.vector, 1, 0.0, v.vector, 1); - - float64_t result = cblas_ddot(v.vlen, v.vector, 1, diff.vector, 1); + auto v = linalg::ldlt_solver(chol_cov_L, chol_cov_d, chol_cov_p, diff); + auto result = linalg::dot(v, diff); if (!use_mean) - ((CDenseFeatures*) lhs)->free_feature_vector(avec, idx_a); + feat_l->free_feature_vector(avec, idx_a); - ((CDenseFeatures*) rhs)->free_feature_vector(bvec, idx_b); + feat_r->free_feature_vector(bvec, idx_b); if (disable_sqrt) return result; @@ -110,4 +114,3 @@ void CMahalanobisDistance::init() m_parameters->add(&use_mean, "use_mean", "If distance shall be computed between mean vector and vector from rhs or between lhs and rhs."); } -#endif /* HAVE_LAPACK */ diff --git a/src/shogun/distance/MahalanobisDistance.h b/src/shogun/distance/MahalanobisDistance.h index 4946b837b54..1344f0f30df 100644 --- a/src/shogun/distance/MahalanobisDistance.h +++ b/src/shogun/distance/MahalanobisDistance.h @@ -10,44 +10,45 @@ #include -#ifdef HAVE_LAPACK - #include #include namespace shogun { -/** @brief class MahalanobisDistance - * - * The Mahalanobis distance for real valued features computes the distance - * between a feature vector and a distribution of features characterized by its - * mean and covariance. - * - * \f[\displaystyle - * D = \sqrt{ (x_i - \mu)^T \Sigma^{-1} (x_i - \mu) } - * \f] - * - * The Mahalanobis Squared distance does not take the square root: - * - * \f[\displaystyle - * D = (x_i - \mu)^T \Sigma^{-1} (x_i - \mu) - * \f] - * - * If use_mean is set to false (which it is by default) the distance is computed - * as - * - * \f[\displaystyle - * D = \sqrt{ (x_i - x_i')^T \Sigma^{-1} (x_i - x_i') } - * \f] - * - * i.e., instead of the mean as reference two vector \f$x_i\f$ and \f$x_i'\f$ - * are compared. - * - * @see - * Wikipedia: Mahalanobis Distance - */ -class CMahalanobisDistance: public CRealDistance -{ + /** @brief class MahalanobisDistance + * + * The Mahalanobis distance for real valued features computes the distance + * between a feature vector and a distribution of features characterized by + * its + * mean and covariance. + * + * \f[\displaystyle + * D = \sqrt{ (x_i - \mu)^T \Sigma^{-1} (x_i - \mu) } + * \f] + * + * The Mahalanobis Squared distance does not take the square root: + * + * \f[\displaystyle + * D = (x_i - \mu)^T \Sigma^{-1} (x_i - \mu) + * \f] + * + * If use_mean is set to false (which it is by default) the distance is + * computed + * as + * + * \f[\displaystyle + * D = \sqrt{ (x_i - x_i')^T \Sigma^{-1} (x_i - x_i') } + * \f] + * + * i.e., instead of the mean as reference two vector \f$x_i\f$ and + * \f$x_i'\f$ + * are compared. + * + * @see + * Wikipedia: Mahalanobis Distance + */ + class CMahalanobisDistance : public CRealDistance + { public: /** default constructor */ CMahalanobisDistance(); @@ -141,10 +142,13 @@ class CMahalanobisDistance: public CRealDistance /** vector mean of the lhs feature vectors */ SGVector mean; - /** inverse of the covariance matrix of lhs feature vectors */ - SGMatrix icov; + + /** LDLT decomposition of the covariance matrix of lhs feature vectors + */ + SGMatrix chol_cov_L; + SGVector chol_cov_d; + SGVector chol_cov_p; }; } // namespace shogun -#endif /* HAVE_LAPACK */ #endif /* _MAHALANOBISDISTANCE_H__ */ diff --git a/tests/unit/distance/MahalanobisDistance_unittest.cc b/tests/unit/distance/MahalanobisDistance_unittest.cc new file mode 100644 index 00000000000..080ec7f1b15 --- /dev/null +++ b/tests/unit/distance/MahalanobisDistance_unittest.cc @@ -0,0 +1,33 @@ +/* + * This software is distributed under BSD 3-clause license (see LICENSE file). + * + * Authors: Wuwei Lin + */ + +#include +#include +#include +#include +#include + +using namespace shogun; + +TEST(MahalanobisDistance, compute_distance) +{ + // create four points (0,0) (2,10) (2,8) (5,5) + SGMatrix rect(2, 4); + rect(0, 0) = 0; + rect(1, 0) = 0; + rect(0, 1) = 2; + rect(1, 1) = 10; + rect(0, 2) = 2; + rect(1, 2) = 8; + rect(0, 3) = 5; + rect(1, 3) = 5; + + auto feature = some>(rect); + auto distance = some(feature, feature); + EXPECT_NEAR(distance->distance(1, 1), 0.0, 1e-10); + EXPECT_NEAR(distance->distance(1, 3), 2.63447126986, 1e-10); + EXPECT_NEAR(distance->distance(2, 3), 2.22834405812, 1e-10); +}