diff --git a/src/shogun/statistical_testing/MMD.cpp b/src/shogun/statistical_testing/MMD.cpp index 9c61d0434ae..217ec5ed7fa 100644 --- a/src/shogun/statistical_testing/MMD.cpp +++ b/src/shogun/statistical_testing/MMD.cpp @@ -89,6 +89,7 @@ void CMMD::Self::create_statistic_job() { const DataManager& dm = owner.get_data_manager(); auto Bx = dm.blocksize_at(0); + auto By = dm.blocksize_at(1); switch (statistic_type) { case EStatisticType::UNBIASED_FULL: @@ -102,7 +103,7 @@ void CMMD::Self::create_statistic_job() break; default : break; }; - permutation_job = mmd::WithinBlockPermutation(Bx, statistic_type); + permutation_job = mmd::WithinBlockPermutation(Bx, By, statistic_type); } void CMMD::Self::create_variance_job() diff --git a/src/shogun/statistical_testing/QuadraticTimeMMD.cpp b/src/shogun/statistical_testing/QuadraticTimeMMD.cpp index 0d1ae1780d0..e55446d0e43 100644 --- a/src/shogun/statistical_testing/QuadraticTimeMMD.cpp +++ b/src/shogun/statistical_testing/QuadraticTimeMMD.cpp @@ -89,6 +89,7 @@ void CQuadraticTimeMMD::Self::create_statistic_job() SG_SDEBUG("Entering\n"); const DataManager& dm=owner.get_data_manager(); auto Nx=dm.num_samples_at(0); + auto Ny=dm.num_samples_at(1); switch (owner.get_statistic_type()) { case EStatisticType::UNBIASED_FULL: @@ -102,7 +103,7 @@ void CQuadraticTimeMMD::Self::create_statistic_job() break; default : break; }; - permutation_job = mmd::WithinBlockPermutation(Nx, owner.get_statistic_type()); + permutation_job = mmd::WithinBlockPermutation(Nx, Ny, owner.get_statistic_type()); SG_SDEBUG("Leaving\n"); } diff --git a/src/shogun/statistical_testing/internals/mmd/WithinBlockPermutation.cpp b/src/shogun/statistical_testing/internals/mmd/WithinBlockPermutation.cpp index 1b0d16fb3bb..e21752d81f8 100644 --- a/src/shogun/statistical_testing/internals/mmd/WithinBlockPermutation.cpp +++ b/src/shogun/statistical_testing/internals/mmd/WithinBlockPermutation.cpp @@ -16,6 +16,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -32,84 +33,89 @@ using namespace shogun; using namespace internal; using namespace mmd; -WithinBlockPermutation::WithinBlockPermutation(index_t n, EStatisticType type) -: n_x(n), stype(type) +WithinBlockPermutation::WithinBlockPermutation(index_t nx, index_t ny, EStatisticType type) +: n_x(nx), n_y(ny), stype(type), terms() { + SG_SDEBUG("number of samples are %d and %d!\n", n_x, n_y); + std::fill(&terms.term[0], &terms.term[2]+1, 0); + std::fill(&terms.diag[0], &terms.diag[2]+1, 0); +} + +void WithinBlockPermutation::add_term(float64_t val, index_t i, index_t j) +{ + if (i=n_x && j>=n_x) + { + SG_SDEBUG("Adding KernelMatrix(%d,%d)=%f to term 2!\n", i, j, val); + terms.term[1]+=val; + if (i==j) + terms.diag[1]+=val; + } + else if (i>=n_x && j km) { SG_SDEBUG("Entering!\n"); - inds.resize(km.num_rows); - std::iota(inds.data(), inds.data()+inds.size(), 0); - SGVector permuted_inds(inds.data(), inds.size(), false); + SGVector permuted_inds(n_x+n_y); + std::iota(permuted_inds.vector, permuted_inds.vector+permuted_inds.vlen, 0); CMath::permute(permuted_inds); - const index_t n_y=km.num_rows-n_x; - SG_SDEBUG("number of samples are %d and %d!\n", n_x, n_y); + std::unordered_map inds; + for (int i=0; ij) - term_1+=km(permuted_inds[i], permuted_inds[j]); - } - } - term_1*=2; - SG_SDEBUG("term_1 sum (without diagonal) = %f!\n", term_1); - if (stype==EStatisticType::BIASED_FULL) - { - for (auto i=0; ij) - term_2+=km(permuted_inds[i], permuted_inds[j]); - } + for (auto i=0; isecond, inds.find(j)->second); } - term_2*=2.0; - SG_SDEBUG("term_2 sum (without diagonal) = %f!\n", term_2); - if (stype==EStatisticType::BIASED_FULL) + + SG_SDEBUG("term_1 sum (with diagonal) = %f!\n", terms.term[0]); + SG_SDEBUG("term_2 sum (with diagonal) = %f!\n", terms.term[1]); + if (stype!=EStatisticType::BIASED_FULL) { - for (auto i=n_x; i kernel_matrix); // return_type operator()(CGPUMatrix kernel_matrix); +private: const index_t n_x; + const index_t n_y; const EStatisticType stype; - std::vector inds; + void add_term(float64_t, index_t, index_t); + struct terms_t + { + float64_t term[3]; + float64_t diag[3]; + }; + terms_t terms; }; } diff --git a/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc b/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc index 6b206ac6269..1e9a791a997 100644 --- a/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc +++ b/tests/unit/statistical_testing/internals/WithinBlockPermutation_unittest.cc @@ -51,16 +51,25 @@ using namespace Eigen; TEST(WithinBlockPermutation, biased_full) { const index_t dim=2; - const index_t n=6; - const index_t m=4; + const index_t n=13; + const index_t m=7; using operation=std::function)>; - SGMatrix data(dim, n+m); - std::iota(data.matrix, data.matrix+dim*(n+m), 1); + SGMatrix data_p(dim, n); + std::iota(data_p.matrix, data_p.matrix+dim*n, 1); + std::for_each(data_p.matrix, data_p.matrix+dim*n, [&n](float64_t& val) { val/=n; }); - auto feats=new CDenseFeatures(data); + SGMatrix data_q(dim, m); + std::iota(data_q.matrix, data_q.matrix+dim*m, n+1); + std::for_each(data_q.matrix, data_q.matrix+dim*m, [&m](float64_t& val) { val/=2*m; }); + + auto feats_p=new CDenseFeatures(data_p); + auto feats_q=new CDenseFeatures(data_q); + auto feats=feats_p->create_merged_copy(feats_q); SG_REF(feats); + SG_UNREF(feats_p); + SG_UNREF(feats_q); auto kernel=some(); kernel->set_width(2.0); @@ -69,7 +78,7 @@ TEST(WithinBlockPermutation, biased_full) auto mat=kernel->get_kernel_matrix(); // compute using within-block-permutation functor - operation compute=shogun::internal::mmd::WithinBlockPermutation(n, EStatisticType::BIASED_FULL); + operation compute=shogun::internal::mmd::WithinBlockPermutation(n, m, EStatisticType::BIASED_FULL); sg_rand->set_seed(12345); auto result_1=compute(mat); @@ -101,22 +110,31 @@ TEST(WithinBlockPermutation, biased_full) EXPECT_NEAR(result_1, result_2, 1E-15); EXPECT_NEAR(result_1, result_3, 1E-15); - kernel->remove_lhs_and_rhs(); + SG_UNREF(feats); } TEST(WithinBlockPermutation, unbiased_full) { const index_t dim=2; - const index_t n=3; - const index_t m=4; + const index_t n=13; + const index_t m=7; using operation=std::function)>; - SGMatrix data(dim, n+m); - std::iota(data.matrix, data.matrix+dim*(n+m), 1); + SGMatrix data_p(dim, n); + std::iota(data_p.matrix, data_p.matrix+dim*n, 1); + std::for_each(data_p.matrix, data_p.matrix+dim*n, [&n](float64_t& val) { val/=n; }); + + SGMatrix data_q(dim, m); + std::iota(data_q.matrix, data_q.matrix+dim*m, n+1); + std::for_each(data_q.matrix, data_q.matrix+dim*m, [&m](float64_t& val) { val/=2*m; }); - auto feats=new CDenseFeatures(data); + auto feats_p=new CDenseFeatures(data_p); + auto feats_q=new CDenseFeatures(data_q); + auto feats=feats_p->create_merged_copy(feats_q); SG_REF(feats); + SG_UNREF(feats_p); + SG_UNREF(feats_q); auto kernel=some(); kernel->set_width(2.0); @@ -125,7 +143,7 @@ TEST(WithinBlockPermutation, unbiased_full) auto mat=kernel->get_kernel_matrix(); // compute using within-block-permutation functor - operation compute=shogun::internal::mmd::WithinBlockPermutation(n, EStatisticType::UNBIASED_FULL); + operation compute=shogun::internal::mmd::WithinBlockPermutation(n, m, EStatisticType::UNBIASED_FULL); sg_rand->set_seed(12345); auto result_1=compute(mat); @@ -157,22 +175,30 @@ TEST(WithinBlockPermutation, unbiased_full) EXPECT_NEAR(result_1, result_2, 1E-15); EXPECT_NEAR(result_1, result_3, 1E-15); - kernel->remove_lhs_and_rhs(); + SG_UNREF(feats); } TEST(WithinBlockPermutation, unbiased_incomplete) { const index_t dim=2; - const index_t n=5; - const index_t m=5; + const index_t n=10; using operation=std::function)>; - SGMatrix data(dim, n+m); - std::iota(data.matrix, data.matrix+dim*(n+m), 1); + SGMatrix data_p(dim, n); + std::iota(data_p.matrix, data_p.matrix+dim*n, 1); + std::for_each(data_p.matrix, data_p.matrix+dim*n, [&n](float64_t& val) { val/=n; }); + + SGMatrix data_q(dim, n); + std::iota(data_q.matrix, data_q.matrix+dim*n, n+1); + std::for_each(data_q.matrix, data_q.matrix+dim*n, [&n](float64_t& val) { val/=2*n; }); - auto feats=new CDenseFeatures(data); + auto feats_p=new CDenseFeatures(data_p); + auto feats_q=new CDenseFeatures(data_q); + auto feats=feats_p->create_merged_copy(feats_q); SG_REF(feats); + SG_UNREF(feats_p); + SG_UNREF(feats_q); auto kernel=some(); kernel->set_width(2.0); @@ -181,7 +207,7 @@ TEST(WithinBlockPermutation, unbiased_incomplete) auto mat=kernel->get_kernel_matrix(); // compute using within-block-permutation functor - operation compute=shogun::internal::mmd::WithinBlockPermutation(n, EStatisticType::UNBIASED_INCOMPLETE); + operation compute=shogun::internal::mmd::WithinBlockPermutation(n, n, EStatisticType::UNBIASED_INCOMPLETE); sg_rand->set_seed(12345); auto result_1=compute(mat); @@ -213,5 +239,5 @@ TEST(WithinBlockPermutation, unbiased_incomplete) EXPECT_NEAR(result_1, result_2, 1E-15); EXPECT_NEAR(result_1, result_3, 1E-15); - kernel->remove_lhs_and_rhs(); + SG_UNREF(feats); }