Skip to content

Commit

Permalink
added multi-kernel mmd computation to public API
Browse files Browse the repository at this point in the history
  • Loading branch information
lambday authored and karlnapf committed Jul 4, 2016
1 parent 3009842 commit 0ec7ef4
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 41 deletions.
10 changes: 10 additions & 0 deletions src/shogun/statistical_testing/MMD.cpp
Expand Up @@ -461,6 +461,16 @@ float64_t CMMD::compute_variance()
return self->compute_statistic_variance().second;
}

SGVector<float64_t> CMMD::compute_multiple()
{
return self->compute_statistic_and_Q(self->strategy->get_kernel_mgr()).first;
}

std::shared_ptr<CKernelSelectionStrategy> CMMD::get_strategy()
{
return self->strategy;
}

void CMMD::set_train_test_ratio(float64_t ratio)
{
auto& data_mgr=get_data_mgr();
Expand Down
3 changes: 3 additions & 0 deletions src/shogun/statistical_testing/MMD.h
Expand Up @@ -99,6 +99,8 @@ class CMMD : public CTwoSampleTest
virtual float64_t compute_statistic();
virtual float64_t compute_variance();

virtual SGVector<float64_t> compute_multiple();

virtual SGVector<float64_t> sample_null();

void use_gpu(bool gpu);
Expand All @@ -122,6 +124,7 @@ class CMMD : public CTwoSampleTest
virtual const float64_t normalize_statistic(float64_t statistic) const=0;
virtual const float64_t normalize_variance(float64_t variance) const=0;
bool use_gpu() const;
std::shared_ptr<CKernelSelectionStrategy> get_strategy();
private:
struct Self;
std::unique_ptr<Self> self;
Expand Down
5 changes: 5 additions & 0 deletions src/shogun/statistical_testing/QuadraticTimeMMD.cpp
Expand Up @@ -328,6 +328,11 @@ float64_t CQuadraticTimeMMD::compute_variance()
return self->compute_statistic_variance().second;
}

SGVector<float64_t> CQuadraticTimeMMD::compute_multiple()
{
return compute_statistic(get_strategy()->get_kernel_mgr());
}

float64_t CQuadraticTimeMMD::compute_p_value(float64_t statistic)
{
SG_DEBUG("Entering\n");
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/statistical_testing/QuadraticTimeMMD.h
Expand Up @@ -61,6 +61,8 @@ class CQuadraticTimeMMD : public CMMD
virtual float64_t compute_statistic();
virtual float64_t compute_variance();

virtual SGVector<float64_t> compute_multiple();

virtual SGVector<float64_t> sample_null();
void spectrum_set_num_eigenvalues(index_t num_eigenvalues);

Expand Down
93 changes: 52 additions & 41 deletions src/shogun/statistical_testing/internals/mmd/MultiKernelMMD.cpp
Expand Up @@ -59,14 +59,14 @@ void MultiKernelMMD::set_distance(CCustomDistance* distance)

void MultiKernelMMD::add_term(terms_t& t, float32_t val, index_t i, index_t j) const
{
if (i<n_x && j<n_x && i<=j)
if (i<n_x && j<n_x && i>=j)
{
SG_SDEBUG("Adding Kernel(%d,%d)=%f to term_0!\n", i, j, val);
t.term[0]+=val;
if (i==j)
t.diag[0]+=val;
}
else if (i>=n_x && j>=n_x && i<=j)
else if (i>=n_x && j>=n_x && i>=j)
{
SG_SDEBUG("Adding Kernel(%d,%d)=%f to term_1!\n", i, j, val);
t.term[1]+=val;
Expand All @@ -82,61 +82,72 @@ void MultiKernelMMD::add_term(terms_t& t, float32_t val, index_t i, index_t j) c
}
}

float64_t MultiKernelMMD::compute_mmd(terms_t& t) const
{
t.term[0]=2*(t.term[0]-t.diag[0]);
t.term[1]=2*(t.term[1]-t.diag[1]);
SG_SDEBUG("term_0 sum (without diagonal) = %f!\n", t.term[0]);
SG_SDEBUG("term_1 sum (without diagonal) = %f!\n", t.term[1]);
if (s_type!=ST_BIASED_FULL)
{
t.term[0]/=n_x*(n_x-1);
t.term[1]/=n_y*(n_y-1);
}
else
{
t.term[0]+=t.diag[0];
t.term[1]+=t.diag[1];
SG_SDEBUG("term_0 sum (with diagonal) = %f!\n", t.term[0]);
SG_SDEBUG("term_1 sum (with diagonal) = %f!\n", t.term[1]);
t.term[0]/=n_x*n_x;
t.term[1]/=n_y*n_y;
}
SG_SDEBUG("term_0 (normalized) = %f!\n", t.term[0]);
SG_SDEBUG("term_1 (normalized) = %f!\n", t.term[1]);

SG_SDEBUG("term_2 sum (with diagonal) = %f!\n", t.term[2]);
if (s_type==ST_UNBIASED_INCOMPLETE)
{
t.term[2]-=t.diag[2];
SG_SDEBUG("term_2 sum (without diagonal) = %f!\n", t.term[2]);
t.term[2]/=n_x*(n_x-1);
}
else
t.term[2]/=n_x*n_y;
SG_SDEBUG("term_2 (normalized) = %f!\n", t.term[2]);

auto result=t.term[0]+t.term[1]-2*t.term[2];
SG_SDEBUG("result = %f!\n", result);
return result;
}

SGVector<float64_t> MultiKernelMMD::operator()(const KernelManager& kernel_mgr) const
{
SG_SDEBUG("Entering!\n");
REQUIRE(m_distance, "Distance instace is not set!\n");
kernel_mgr.set_precomputed_distance(m_distance.get());

SGVector<float64_t> result(kernel_mgr.num_kernels());
#pragma omp parallel for
for (size_t k=0; k<kernel_mgr.num_kernels(); ++k)
std::vector<terms_t> terms(result.size());

for (auto j=0; j<n_x+n_y; ++j)
{
terms_t t;
for (auto j=0; j<n_x+n_y; ++j)
for (auto i=j; i<n_x+n_y; ++i)
{
for (auto i=0; i<n_x+n_y; ++i)
for (size_t k=0; k<kernel_mgr.num_kernels(); ++k)
{
auto kernel=kernel_mgr.kernel_at(k)->kernel(i, j);
add_term(t, kernel, i, j);
add_term(terms[k], kernel, i, j);
}
}
}

t.term[0]=2*(t.term[0]-t.diag[0]);
t.term[1]=2*(t.term[1]-t.diag[1]);
SG_SDEBUG("term_0 sum (without diagonal) = %f!\n", t.term[0]);
SG_SDEBUG("term_1 sum (without diagonal) = %f!\n", t.term[1]);
if (s_type!=ST_BIASED_FULL)
{
t.term[0]/=n_x*(n_x-1);
t.term[1]/=n_y*(n_y-1);
}
else
{
t.term[0]+=t.diag[0];
t.term[1]+=t.diag[1];
SG_SDEBUG("term_0 sum (with diagonal) = %f!\n", t.term[0]);
SG_SDEBUG("term_1 sum (with diagonal) = %f!\n", t.term[1]);
t.term[0]/=n_x*n_x;
t.term[1]/=n_y*n_y;
}
SG_SDEBUG("term_0 (normalized) = %f!\n", t.term[0]);
SG_SDEBUG("term_1 (normalized) = %f!\n", t.term[1]);

SG_SDEBUG("term_2 sum (with diagonal) = %f!\n", t.term[2]);
if (s_type==ST_UNBIASED_INCOMPLETE)
{
t.term[2]-=t.diag[2];
SG_SDEBUG("term_2 sum (without diagonal) = %f!\n", t.term[2]);
t.term[2]/=n_x*(n_x-1);
}
else
t.term[2]/=n_x*n_y;
SG_SDEBUG("term_2 (normalized) = %f!\n", t.term[2]);

result[k]=t.term[0]+t.term[1]-2*t.term[2];
for (size_t k=0; k<kernel_mgr.num_kernels(); ++k)
{
result[k]=compute_mmd(terms[k]);
SG_SDEBUG("result[%d] = %f!\n", k, result[k]);
}
terms.resize(0);

kernel_mgr.unset_precomputed_distance();
SG_SDEBUG("Leaving!\n");
Expand Down
Expand Up @@ -59,6 +59,7 @@ class MultiKernelMMD
const EStatisticType s_type;
std::shared_ptr<CCustomDistance> m_distance;
void add_term(terms_t&, float32_t, index_t, index_t) const;
float64_t compute_mmd(terms_t& t) const;
};

}
Expand Down
38 changes: 38 additions & 0 deletions tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc
Expand Up @@ -493,3 +493,41 @@ TEST(QuadraticTimeMMD, precomputed_vs_nonprecomputed)
for (auto i=0; i<result_1.size(); ++i)
EXPECT_NEAR(result_1[i], result_2[i], 1E-6);
}

TEST(QuadraticTimeMMD, compute_multiple)
{
const index_t m=20;
const index_t n=20;
const index_t dim=1;
const index_t num_kernels=10;

float64_t difference=0.5;
sg_rand->set_seed(12345);

auto gen_p=some<CMeanShiftDataGenerator>(0, dim, 0);
auto gen_q=some<CMeanShiftDataGenerator>(difference, dim, 0);

CFeatures* feat_p=gen_p->get_streamed_features(m);
CFeatures* feat_q=gen_q->get_streamed_features(n);

auto mmd=some<CQuadraticTimeMMD>(feat_p, feat_q);
for (auto i=0, sigma=-5; i<num_kernels; ++i, sigma+=1)
{
float64_t tau=pow(2, sigma);
mmd->add_kernel(new CGaussianKernel(10, tau));
}
SGVector<float64_t> mmd_multiple=mmd->compute_multiple();

SGVector<float64_t> mmd_single(num_kernels);
for (auto i=0, sigma=-5; i<num_kernels; ++i, sigma+=1)
{
auto mmd2=some<CQuadraticTimeMMD>(feat_p, feat_q);
float64_t tau=pow(2, sigma);
mmd2->set_kernel(new CGaussianKernel(10, tau));
mmd_single[i]=mmd2->compute_statistic();
}

ASSERT_EQ(mmd_multiple.size(), mmd_single.size());
for (auto i=0; i<mmd_multiple.size(); ++i)
EXPECT_NEAR(mmd_multiple[i], mmd_single[i], 1E-5);
}

0 comments on commit 0ec7ef4

Please sign in to comment.