Skip to content

Commit

Permalink
added permutation without storing the kernel matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
lambday authored and karlnapf committed Jul 4, 2016
1 parent e89eab1 commit c4d66b5
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 39 deletions.
120 changes: 87 additions & 33 deletions src/shogun/statistical_testing/QuadraticTimeMMD.cpp
Expand Up @@ -37,6 +37,7 @@
#include <shogun/mathematics/eigen3.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/statistical_testing/QuadraticTimeMMD.h>
#include <shogun/statistical_testing/internals/Kernel.h>
#include <shogun/statistical_testing/internals/FeaturesUtil.h>
#include <shogun/statistical_testing/internals/NextSamples.h>
#include <shogun/statistical_testing/internals/DataManager.h>
Expand All @@ -59,6 +60,7 @@ struct CQuadraticTimeMMD::Self
SGMatrix<float32_t> get_kernel_matrix();
std::pair<float64_t, float64_t> compute_statistic_variance();
SGVector<float64_t> sample_null();
void init_kernel();

void create_statistic_job();
void create_variance_job();
Expand All @@ -68,12 +70,15 @@ struct CQuadraticTimeMMD::Self

CQuadraticTimeMMD& owner;
index_t num_eigenvalues;
bool precompute;
bool is_kernel_initialized;

std::function<float32_t(const SGMatrix<float32_t>&)> statistic_job;
std::function<float32_t(const SGMatrix<float32_t>&)> variance_job;
};

CQuadraticTimeMMD::Self::Self(CQuadraticTimeMMD& mmd) : owner(mmd), num_eigenvalues(10),
precompute(true), is_kernel_initialized(false),
statistic_job(nullptr), variance_job(nullptr)
{
}
Expand Down Expand Up @@ -133,61 +138,68 @@ void CQuadraticTimeMMD::Self::compute_jobs(ComputationManager& cm) const
SG_SDEBUG("Leaving\n");
}

SGMatrix<float32_t> CQuadraticTimeMMD::Self::get_kernel_matrix()
void CQuadraticTimeMMD::Self::init_kernel()
{
SG_SDEBUG("Entering\n");
const KernelManager& km=owner.get_kernel_manager();
auto kernel=km.kernel_at(0);
REQUIRE(kernel!=nullptr, "Kernel is not set!\n");

SGMatrix<float32_t> kernel_matrix;

// check if precomputed kernel is given, no need to do anything in that case
// otherwise, init kernel with data and precompute kernel matrix
if (kernel->get_kernel_type()==K_CUSTOM)
{
auto precomputed_kernel=dynamic_cast<CCustomKernel*>(kernel);
ASSERT(precomputed_kernel!=nullptr);
kernel_matrix=precomputed_kernel->get_float32_kernel_matrix();
}
else
if (!is_kernel_initialized && !(kernel->get_kernel_type()==K_CUSTOM))
{
DataManager& dm=owner.get_data_manager();

// using data manager next() API in order to make it work with
// streaming samples as well.
dm.start();
auto samples=dm.next();
if (!samples.empty())
{
dm.end();

// use 0th block from each distribution (since there is only one block
// for quadratic time MMD
CFeatures *samples_p=samples[0][0].get();
CFeatures *samples_q=samples[1][0].get();
auto samples_p_and_q=FeaturesUtil::create_merged_copy(samples_p, samples_q);
samples.clear();
kernel->init(samples_p_and_q, samples_p_and_q);
try
{
owner.get_kernel_manager().precompute_kernel_at(0);
}
catch (ShogunException e)
{
SG_SERROR("%s, Data is too large! Computing kernel matrix was not possible!\n", e.get_exception_string());
}
kernel->remove_lhs_and_rhs();
auto precomputed_kernel=dynamic_cast<CCustomKernel*>(km.kernel_at(0));
ASSERT(precomputed_kernel!=nullptr);
kernel_matrix=precomputed_kernel->get_float32_kernel_matrix();
is_kernel_initialized=true;
SG_SDEBUG("Kernel is initialized with joint features of %d total samples!\n", samples_p_and_q->get_num_vectors());
}
else
{
dm.end();
SG_SERROR("Could not fetch samples!\n");
}
}
SG_SDEBUG("Leaving\n");
}

SGMatrix<float32_t> CQuadraticTimeMMD::Self::get_kernel_matrix()
{
SG_SDEBUG("Entering\n");
const KernelManager& km=owner.get_kernel_manager();
auto kernel=km.kernel_at(0);
REQUIRE(kernel!=nullptr, "Kernel is not set!\n");

SGMatrix<float32_t> kernel_matrix;
if (kernel->get_kernel_type()==K_CUSTOM)
{
auto precomputed_kernel=dynamic_cast<CCustomKernel*>(kernel);
ASSERT(precomputed_kernel!=nullptr);
kernel_matrix=precomputed_kernel->get_float32_kernel_matrix();
}
else
{
init_kernel();
try
{
owner.get_kernel_manager().precompute_kernel_at(0);
}
catch (ShogunException e)
{
SG_SERROR("%s, Data is too large! Computing kernel matrix was not possible!\n", e.get_exception_string());
}
kernel->remove_lhs_and_rhs();
auto precomputed_kernel=dynamic_cast<CCustomKernel*>(km.kernel_at(0));
ASSERT(precomputed_kernel!=nullptr);
kernel_matrix=precomputed_kernel->get_float32_kernel_matrix();
}

SG_SDEBUG("Leaving\n");
return kernel_matrix;
Expand Down Expand Up @@ -225,17 +237,35 @@ std::pair<float64_t, float64_t> CQuadraticTimeMMD::Self::compute_statistic_varia
SGVector<float64_t> CQuadraticTimeMMD::Self::sample_null()
{
SG_SDEBUG("Entering\n");
SGMatrix<float32_t> kernel_matrix=get_kernel_matrix();

const DataManager& dm=owner.get_data_manager();
auto Nx=dm.num_samples_at(0);
auto Ny=dm.num_samples_at(1);

WithinBlockPermutationBatch compute(Nx, Ny, owner.get_num_null_samples(), owner.get_statistic_type());
SGVector<float32_t> result=compute(kernel_matrix);

SGVector<float32_t> result;
if (precompute)
{
SGMatrix<float32_t> kernel_matrix=get_kernel_matrix();
result=compute(kernel_matrix);
}
else
{
const KernelManager& km=owner.get_kernel_manager();
auto kernel=km.kernel_at(0);
REQUIRE(kernel!=nullptr, "Kernel is not set!\n");
if (kernel->get_kernel_type()==K_CUSTOM)
{
SG_SERROR("Precomputed kernel matrix exists!\n");
}
init_kernel();
result=compute(internal::Kernel(kernel));
}

SGVector<float64_t> null_samples(result.vlen);
for (auto i=0; i<result.vlen; ++i)
null_samples[i]=owner.normalize_statistic(result[i]);

SG_SDEBUG("Leaving\n");
return null_samples;
}
Expand Down Expand Up @@ -485,6 +515,30 @@ SGVector<float64_t> CQuadraticTimeMMD::spectrum_sample_null()
return null_samples;
}

void CQuadraticTimeMMD::precompute_kernel_matrix(bool precompute)
{
if (self->precompute && !precompute)
{
const KernelManager& km=get_kernel_manager();
auto kernel=km.kernel_at(0);
REQUIRE(kernel!=nullptr, "Kernel is not set!\n");

if (kernel->get_kernel_type()==K_CUSTOM)
{
SG_SINFO("Precomputed kernel matrix exists! Removing the existing matrix!\n");
get_kernel_manager().restore_kernel_at(0);
kernel=km.kernel_at(0);
REQUIRE(kernel!=nullptr, "Kernel is not set!\n");
if (kernel->get_kernel_type()==K_CUSTOM)
{
SG_SERROR("The underlying kernel itself is a precomputed kernel!\n");
}
}
self->is_kernel_initialized=false;
}
self->precompute=precompute;
}

const char* CQuadraticTimeMMD::get_name() const
{
return "QuadraticTimeMMD";
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/statistical_testing/QuadraticTimeMMD.h
Expand Up @@ -59,6 +59,8 @@ class CQuadraticTimeMMD : public CMMD
virtual float64_t compute_p_value(float64_t statistic) override;
virtual float64_t compute_threshold(float64_t alpha) override;

void precompute_kernel_matrix(bool precompute);

virtual const char* get_name() const;
private:
struct Self;
Expand Down
Expand Up @@ -22,6 +22,7 @@
#include <shogun/lib/GPUMatrix.h>
#include <shogun/mathematics/Math.h>
#include <shogun/statistical_testing/MMD.h>
#include <shogun/statistical_testing/internals/Kernel.h>
#include <shogun/statistical_testing/internals/mmd/WithinBlockPermutationBatch.h>
#include <shogun/statistical_testing/internals/mmd/BiasedFull.h>
#include <shogun/statistical_testing/internals/mmd/UnbiasedFull.h>
Expand Down Expand Up @@ -73,7 +74,8 @@ void WithinBlockPermutationBatch::add_term(terms_t& t, float32_t val, index_t i,
}
}

SGVector<float32_t> WithinBlockPermutationBatch::operator()(const SGMatrix<float32_t>& km)
template <class Kernel>
SGVector<float32_t> WithinBlockPermutationBatch::operator()(const Kernel& km)
{
SG_SDEBUG("Entering!\n");

Expand Down Expand Up @@ -134,3 +136,6 @@ SGVector<float32_t> WithinBlockPermutationBatch::operator()(const SGMatrix<float
SG_SDEBUG("Leaving!\n");
return result;
}

template SGVector<float32_t> WithinBlockPermutationBatch::operator()(const SGMatrix<float32_t>&);
template SGVector<float32_t> WithinBlockPermutationBatch::operator()(const Kernel&);
Expand Up @@ -42,8 +42,7 @@ class WithinBlockPermutationBatch
using return_type=SGVector<value_type>;
public:
WithinBlockPermutationBatch(index_t, index_t, index_t, EStatisticType);
return_type operator()(const SGMatrix<value_type>& kernel_matrix);
// return_type operator()(const CGPUMatrix<value_type>& kernel_matrix);
template <class Kernel> return_type operator()(const Kernel& kernel);
private:
struct terms_t;
void add_term(terms_t&, float32_t, index_t, index_t);
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc
Expand Up @@ -456,3 +456,40 @@ TEST(QuadraticTimeMMD, perform_test_spectrum)
p_value_spectrum=mmd->compute_p_value(mmd->compute_statistic());
EXPECT_NEAR(p_value_spectrum, 0.0, 1E-10);
}

TEST(QuadraticTimeMMD, precomputed_vs_nonprecomputed)
{
const index_t m=20;
const index_t n=20;
const index_t dim=3;

float64_t difference=0.5;

auto gen_p=some<CMeanShiftDataGenerator>(0, dim, 0);
auto gen_q=some<CMeanShiftDataGenerator>(difference, dim, 0);

CFeatures* feat_p=gen_p->get_streamed_features(m);
CFeatures* feat_q=gen_q->get_streamed_features(n);

float64_t sigma=2;
float64_t sq_sigma_twice=sigma*sigma*2;
CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice);

auto mmd=some<CQuadraticTimeMMD>(feat_p, feat_q);
mmd->set_kernel(kernel);

index_t num_null_samples=10;
mmd->set_num_null_samples(num_null_samples);
mmd->set_null_approximation_method(NAM_PERMUTATION);

sg_rand->set_seed(12345);
SGVector<float64_t> result_1=mmd->sample_null();

mmd->precompute_kernel_matrix(false);
sg_rand->set_seed(12345);
SGVector<float64_t> result_2=mmd->sample_null();

ASSERT_EQ(result_1.size(), result_2.size());
for (auto i=0; i<result_1.size(); ++i)
EXPECT_NEAR(result_1[i], result_2[i], 1E-6);
}
Expand Up @@ -39,6 +39,7 @@
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/statistical_testing/MMD.h>
#include <shogun/statistical_testing/internals/Kernel.h>
#include <shogun/statistical_testing/internals/mmd/BiasedFull.h>
#include <shogun/statistical_testing/internals/mmd/UnbiasedFull.h>
#include <shogun/statistical_testing/internals/mmd/UnbiasedIncomplete.h>
Expand All @@ -50,9 +51,9 @@ using namespace Eigen;

TEST(WithinBlockPermutationBatch, biased_full)
{
const index_t dim=2;
const index_t n=13;
const index_t m=7;
const index_t dim=3000;
const index_t n=1500;
const index_t m=1500;
const index_t num_null_samples=5;

using operation=std::function<float32_t(SGMatrix<float32_t>)>;
Expand Down Expand Up @@ -280,3 +281,48 @@ TEST(WithinBlockPermutationBatch, unbiased_incomplete)

SG_UNREF(feats);
}

TEST(WithinBlockPermutationBatch, kernel_functor)
{
const index_t dim=2;
const index_t n=8;
const index_t m=8;
const index_t num_null_samples=5;

SGMatrix<float64_t> data_p(dim, n);
std::iota(data_p.matrix, data_p.matrix+dim*n, 1);
std::for_each(data_p.matrix, data_p.matrix+dim*n, [&n](float64_t& val) { val/=n; });

SGMatrix<float64_t> data_q(dim, m);
std::iota(data_q.matrix, data_q.matrix+dim*m, n+1);
std::for_each(data_q.matrix, data_q.matrix+dim*m, [&m](float64_t& val) { val/=2*m; });

auto feats_p=new CDenseFeatures<float64_t>(data_p);
auto feats_q=new CDenseFeatures<float64_t>(data_q);
auto feats=feats_p->create_merged_copy(feats_q);
SG_REF(feats);
SG_UNREF(feats_p);
SG_UNREF(feats_q);

auto kernel=some<CGaussianKernel>();
kernel->set_width(2.0);

kernel->init(feats, feats);
auto mat=kernel->get_kernel_matrix<float32_t>();

// compute using within-block-permutation functor
shogun::internal::mmd::WithinBlockPermutationBatch batch(n, m, num_null_samples, ST_BIASED_FULL);

sg_rand->set_seed(12345);
SGVector<float32_t> result_1=batch(mat);

sg_rand->set_seed(12345);
SGVector<float32_t> result_2=batch(shogun::internal::Kernel(kernel));

EXPECT_TRUE(result_1.size()==result_2.size());
for (auto i=0; i<result_1.size(); ++i)
EXPECT_NEAR(result_1[i], result_2[i], 1E-6);

SG_UNREF(feats);
}

0 comments on commit c4d66b5

Please sign in to comment.