Skip to content

Commit

Permalink
More refactoring for statistical testing internals
Browse files Browse the repository at this point in the history
  - removed WithinBlockPermutationBatch and MultiKernelPermutationTest
  - Both single kernel and multi-kernel cases are now handled in
  PermutationMMD
  • Loading branch information
lambday committed Jul 8, 2016
1 parent bb80cdb commit 8d4feb5
Show file tree
Hide file tree
Showing 10 changed files with 333 additions and 498 deletions.
53 changes: 45 additions & 8 deletions src/shogun/statistical_testing/MultiKernelQuadraticTimeMMD.cpp
Expand Up @@ -38,7 +38,7 @@
#include <shogun/statistical_testing/internals/FeaturesUtil.h>
#include <shogun/statistical_testing/internals/KernelManager.h>
#include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
#include <shogun/statistical_testing/internals/mmd/MultiKernelPermutationTest.h>
#include <shogun/statistical_testing/internals/mmd/PermutationMMD.h>

using namespace shogun;
using namespace internal;
Expand All @@ -54,7 +54,8 @@ struct CMultiKernelQuadraticTimeMMD::Self
unique_ptr<CCustomDistance> m_pairwise_distance;
EDistanceType m_dtype;
KernelManager m_kernel_mgr;
ComputeMMD compute_mmd;
ComputeMMD statistic_job;
PermutationMMD permutation_job;
};

CMultiKernelQuadraticTimeMMD::Self::Self(CQuadraticTimeMMD *owner) : m_owner(owner),
Expand Down Expand Up @@ -159,10 +160,10 @@ SGVector<float64_t> CMultiKernelQuadraticTimeMMD::statistic(const KernelManager&
kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
SG_UNREF(distance);

self->compute_mmd.m_n_x=nx;
self->compute_mmd.m_n_y=ny;
self->compute_mmd.m_stype=stype;
SGVector<float64_t> result=self->compute_mmd(kernel_mgr);
self->statistic_job.m_n_x=nx;
self->statistic_job.m_n_y=ny;
self->statistic_job.m_stype=stype;
SGVector<float64_t> result=self->statistic_job(kernel_mgr);

kernel_mgr.unset_precomputed_distance();

Expand All @@ -173,6 +174,39 @@ SGVector<float64_t> CMultiKernelQuadraticTimeMMD::statistic(const KernelManager&
return result;
}

SGMatrix<float32_t> CMultiKernelQuadraticTimeMMD::sample_null(const KernelManager& kernel_mgr)
{
SG_DEBUG("Entering");
REQUIRE(self->m_owner->get_null_approximation_method()==ENullApproximationMethod::NAM_PERMUTATION,
"Multi-kernel tests requires the H0 approximation method to be PERMUTATION!\n");

REQUIRE(kernel_mgr.num_kernels()>0, "Number of kernels (%d) have to be greater than 0!\n", kernel_mgr.num_kernels());

const auto nx=self->m_owner->get_num_samples_p();
const auto ny=self->m_owner->get_num_samples_q();
const auto stype = self->m_owner->get_statistic_type();
const auto num_null_samples = self->m_owner->get_num_null_samples();

CDistance* distance=kernel_mgr.get_distance_instance();
self->update_pairwise_distance(distance);
kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
SG_UNREF(distance);

self->permutation_job.m_n_x=nx;
self->permutation_job.m_n_y=ny;
self->permutation_job.m_num_null_samples=num_null_samples;
self->permutation_job.m_stype=stype;
SGMatrix<float32_t> result=self->permutation_job(kernel_mgr);

kernel_mgr.unset_precomputed_distance();

for (auto i=0; i<result.size(); ++i)
result.matrix[i]=self->m_owner->normalize_statistic(result.matrix[i]);

SG_DEBUG("Leaving");
return result;
}

SGVector<float64_t> CMultiKernelQuadraticTimeMMD::p_values(const KernelManager& kernel_mgr)
{
SG_DEBUG("Entering");
Expand All @@ -191,8 +225,11 @@ SGVector<float64_t> CMultiKernelQuadraticTimeMMD::p_values(const KernelManager&
kernel_mgr.set_precomputed_distance(self->m_pairwise_distance.get());
SG_UNREF(distance);

MultiKernelPermutationTest compute(nx, ny, num_null_samples, stype);
SGVector<float64_t> result=compute(kernel_mgr);
self->permutation_job.m_n_x=nx;
self->permutation_job.m_n_y=ny;
self->permutation_job.m_num_null_samples=num_null_samples;
self->permutation_job.m_stype=stype;
SGVector<float64_t> result=self->permutation_job.p_value(kernel_mgr);

kernel_mgr.unset_precomputed_distance();

Expand Down
2 changes: 2 additions & 0 deletions src/shogun/statistical_testing/MultiKernelQuadraticTimeMMD.h
Expand Up @@ -73,6 +73,7 @@ class CMultiKernelQuadraticTimeMMD : public CSGObject
SGVector<float64_t> variance_h0();
SGVector<float64_t> variance_h1();

SGMatrix<float32_t> sample_null();
SGVector<float64_t> p_values();
SGVector<bool> perform_test(float64_t alpha);

Expand All @@ -81,6 +82,7 @@ class CMultiKernelQuadraticTimeMMD : public CSGObject
struct Self;
std::unique_ptr<Self> self;
SGVector<float64_t> statistic(const internal::KernelManager& kernel_mgr);
SGMatrix<float32_t> sample_null(const internal::KernelManager& kernel_mgr);
SGVector<float64_t> p_values(const internal::KernelManager& kernel_mgr);
};

Expand Down
16 changes: 9 additions & 7 deletions src/shogun/statistical_testing/QuadraticTimeMMD.cpp
Expand Up @@ -47,8 +47,8 @@
#include <shogun/statistical_testing/internals/KernelManager.h>
#include <shogun/statistical_testing/internals/ComputationManager.h>
#include <shogun/statistical_testing/internals/mmd/ComputeMMD.h>
#include <shogun/statistical_testing/internals/mmd/PermutationMMD.h>
#include <shogun/statistical_testing/internals/mmd/FullDirect.h>
#include <shogun/statistical_testing/internals/mmd/WithinBlockPermutationBatch.h>
#include <shogun/statistical_testing/kernelselection/KernelSelectionStrategy.h>

using namespace shogun;
Expand Down Expand Up @@ -78,6 +78,7 @@ struct CQuadraticTimeMMD::Self

std::function<float32_t(const SGMatrix<float32_t>&)> statistic_job;
std::function<float32_t(const SGMatrix<float32_t>&)> variance_job;
PermutationMMD permutation_job;
};

CQuadraticTimeMMD::Self::Self(CQuadraticTimeMMD& mmd) : owner(mmd), num_eigenvalues(10),
Expand Down Expand Up @@ -197,6 +198,7 @@ std::pair<float64_t, float64_t> CQuadraticTimeMMD::Self::compute_statistic_varia
SG_SDEBUG("Entering\n");
SGMatrix<float32_t> kernel_matrix=get_kernel_matrix();

// TODO make sure it works even when precompute is turned-off
ComputationManager cm;
create_computation_jobs();
cm.enqueue_job(statistic_job);
Expand Down Expand Up @@ -225,16 +227,16 @@ SGVector<float64_t> CQuadraticTimeMMD::Self::sample_null()
{
SG_SDEBUG("Entering\n");

const DataManager& data_mgr=owner.get_data_mgr();
auto Nx=data_mgr.num_samples_at(0);
auto Ny=data_mgr.num_samples_at(1);
WithinBlockPermutationBatch compute(Nx, Ny, owner.get_num_null_samples(), owner.get_statistic_type());
permutation_job.m_n_x=owner.get_num_samples_p();
permutation_job.m_n_y=owner.get_num_samples_q();
permutation_job.m_stype=owner.get_statistic_type();
permutation_job.m_num_null_samples=owner.get_num_null_samples();

SGVector<float32_t> result;
if (precompute)
{
SGMatrix<float32_t> kernel_matrix=get_kernel_matrix();
result=compute(kernel_matrix);
result=permutation_job(kernel_matrix);
}
else
{
Expand All @@ -246,7 +248,7 @@ SGVector<float64_t> CQuadraticTimeMMD::Self::sample_null()
SG_SERROR("Precomputed kernel matrix exists!\n");
}
init_kernel();
result=compute(internal::Kernel(kernel));
result=permutation_job(internal::Kernel(kernel));
}

SGVector<float64_t> null_samples(result.vlen);
Expand Down
30 changes: 27 additions & 3 deletions src/shogun/statistical_testing/internals/mmd/ComputeMMD.h
Expand Up @@ -33,13 +33,14 @@

#include <array>
#include <vector>
#include <shogun/io/SGIO.h>
#include <shogun/lib/config.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/kernel/Kernel.h>
#include <shogun/statistical_testing/MMD.h>
#include <shogun/statistical_testing/internals/KernelManager.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/io/SGIO.h>

namespace shogun
{
Expand All @@ -65,8 +66,22 @@ struct ComputeMMD
{
}

template <class Kernel>
float32_t operator()(const Kernel& kernel) const
{
ASSERT(m_n_x>0 && m_n_y>0);
const index_t size=m_n_x+m_n_y;
terms_t terms;
for (auto i=0; i<size; ++i)
{
for (auto j=i; j<size; ++j)
add_term(terms, kernel(i, j), i, j);
}
return compute(terms);
}

template <typename T>
T operator()(const SGMatrix<T>& kernel_matrix) const
float32_t operator()(const SGMatrix<T>& kernel_matrix) const
{
ASSERT(m_n_x>0 && m_n_y>0);
const index_t size=m_n_x+m_n_y;
Expand All @@ -90,7 +105,7 @@ struct ComputeMMD
terms.term[1]=(b_y.sum()-terms.diag[1])/2+terms.diag[1];
terms.term[2]=b_xy.sum();

return static_cast<T>(compute(terms));
return compute(terms);
}

SGVector<float64_t> operator()(const KernelManager& kernel_mgr) const
Expand Down Expand Up @@ -120,6 +135,15 @@ struct ComputeMMD
return result;
}

/**
* Adds the kernel value to to the term that corresponding to K(i, j). It only
* uses the lower triangular half of the matrix to exploit symmetry.
*
* @param terms the terms for computing MMD
* @param kernel_value the kernel value between i-th and j-th features.
* @param i the row index for the Gram matrix
* @param j the col index for the Gram matrix
*/
template <typename T>
inline void add_term(terms_t& terms, T kernel_value, index_t i, index_t j) const
{
Expand Down

0 comments on commit 8d4feb5

Please sign in to comment.