Skip to content

Commit

Permalink
added unit test for null spectrum approximation in quadratic time mmd…
Browse files Browse the repository at this point in the history
… and a minor spell correction
  • Loading branch information
lambday committed Mar 26, 2014
1 parent 5b7df93 commit e4fad50
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 85 deletions.
4 changes: 2 additions & 2 deletions doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb
Expand Up @@ -545,7 +545,7 @@
"#tell Shogun to use spectrum approximation\n",
"mmd.set_null_approximation_method(MMD2_SPECTRUM)\n",
"mmd.set_num_eigenvalues_spectrum(num_eigen)\n",
"mmd.set_num_samples_sepctrum(num_samples)\n",
"mmd.set_num_samples_spectrum(num_samples)\n",
"\n",
"# the usual test interface\n",
"p_value_spectrum=mmd.perform_test()\n",
Expand Down Expand Up @@ -649,7 +649,7 @@
" mmd=QuadraticTimeMMD(kernel, feat_p, feat_q)\n",
" mmd.set_null_approximation_method(MMD2_SPECTRUM)\n",
" mmd.set_num_eigenvalues_spectrum(num_eigen)\n",
" mmd.set_num_samples_sepctrum(num_samples)\n",
" mmd.set_num_samples_spectrum(num_samples)\n",
" mmd.set_statistic_type(BIASED)\n",
" rejections_spectrum[i]=mmd.perform_test(alpha)\n",
" \n",
Expand Down
Expand Up @@ -61,7 +61,7 @@ void quadratic_time_mmd()
mmd->set_statistic_type(BIASED);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
mmd->set_num_eigenvalues_spectrum(3);
mmd->set_num_samples_sepctrum(250);
mmd->set_num_samples_spectrum(250);
p_value=mmd->perform_test();
/* reject if p-value is smaller than test level */
SG_SPRINT("spectrum: p!=q: %d\n", p_value<alpha);
Expand Down
Expand Up @@ -63,7 +63,7 @@ def statistics_quadratic_time_mmd (m,dim,difference):
mmd.set_statistic_type(BIASED);
mmd.set_null_approximation_method(MMD2_SPECTRUM);
mmd.set_num_eigenvalues_spectrum(3);
mmd.set_num_samples_sepctrum(250);
mmd.set_num_samples_spectrum(250);
p_value_spectrum=mmd.perform_test();
# reject if p-value is smaller than test level
#print "spectrum: p!=q: ", p_value_spectrum<alpha
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/statistics/QuadraticTimeMMD.cpp
Expand Up @@ -514,7 +514,7 @@ SGVector<float64_t> CQuadraticTimeMMD::fit_null_gamma()
return result;
}

void CQuadraticTimeMMD::set_num_samples_sepctrum(index_t
void CQuadraticTimeMMD::set_num_samples_spectrum(index_t
num_samples_spectrum)
{
m_num_samples_spectrum=num_samples_spectrum;
Expand Down
2 changes: 1 addition & 1 deletion src/shogun/statistics/QuadraticTimeMMD.h
Expand Up @@ -260,7 +260,7 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTest
* @param num_samples_spectrum number of samples to draw from
* approximate null-distributrion
*/
void set_num_samples_sepctrum(index_t num_samples_spectrum);
void set_num_samples_spectrum(index_t num_samples_spectrum);

/** setter for number of eigenvalues to use in spectrum based p-value
* computation. Maximum is m_m+m_n-1
Expand Down
199 changes: 120 additions & 79 deletions tests/unit/statistics/QuadraticTimeMMD_unittest.cc
Expand Up @@ -13,9 +13,11 @@
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/streaming/generators/MeanShiftDataGenerator.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/mathematics/eigen3.h>
#include <gtest/gtest.h>

using namespace shogun;
using namespace Eigen;

TEST(QuadraticTimeMMD,test_quadratic_mmd_biased)
{
Expand Down Expand Up @@ -188,6 +190,65 @@ TEST(QuadraticTimeMMD, test_quadratic_mmd_biased_different_num_samples)
SG_UNREF(features_q);
}

#ifdef HAVE_EIGEN3
TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples)
{
const index_t m=20;
const index_t n=30;
const index_t dim=3;

/* use fixed seed */
sg_rand->set_seed(12345);

float64_t difference=0.5;

/* streaming data generator for mean shift distributions */
CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim, 0);
CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim, 0);

/* stream some data from generator */
CFeatures* feat_p=gen_p->get_streamed_features(m);
CFeatures* feat_q=gen_q->get_streamed_features(n);

/* shoguns kernel width is different */
float64_t sigma=2;
float64_t sq_sigma_twice=sigma*sigma*2;
CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice);

/* create MMD instance, convienience constructor */
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q);

index_t num_null_samples=250;
index_t num_eigenvalues=10;
mmd->set_num_samples_spectrum(num_null_samples);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
mmd->set_num_eigenvalues_spectrum(num_eigenvalues);

/* biased case */

/* compute p-value using spectrum approximation for null distribution and
* assert against local machine computed result */
mmd->set_statistic_type(BIASED);
float64_t p_value_spectrum=mmd->perform_test();
EXPECT_NEAR(p_value_spectrum, 0.0, 1E-10);

/* unbiased case */

/* compute p-value using spectrum approximation for null distribution and
* assert against local machine computed result */
mmd->set_statistic_type(UNBIASED);
p_value_spectrum=mmd->perform_test();
EXPECT_NEAR(p_value_spectrum, 0.004, 1E-10);

/* clean up */
SG_UNREF(mmd);
SG_UNREF(feat_p);
SG_UNREF(feat_q);
SG_UNREF(gen_p);
SG_UNREF(gen_q);
}
#endif // HAVE_EIGEN3

TEST(QuadraticTimeMMD,test_quadratic_mmd_precomputed_kernel)
{
index_t m=8;
Expand Down Expand Up @@ -263,6 +324,7 @@ TEST(QuadraticTimeMMD,test_quadratic_mmd_precomputed_kernel)
SG_UNREF(p_and_q);
}

#ifdef HAVE_EIGEN3
TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
{
/* number of examples kept low in order to make things fast */
Expand All @@ -287,6 +349,15 @@ TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
* copies p and q and does not reference them */
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q);

/* set up for a precomputed custom kernel using merged features p_and_q */
CGaussianKernel* kernel2=new CGaussianKernel(10, width);
CFeatures* p_and_q=mmd->get_p_and_q();
kernel2->init(p_and_q, p_and_q);
CCustomKernel* precomputed=new CCustomKernel(kernel2);
CQuadraticTimeMMD* mmd2=new CQuadraticTimeMMD(precomputed, m);
SG_UNREF(p_and_q);
SG_UNREF(kernel2);

/* perform test: compute p-value and test if null-hypothesis is rejected for
* a test level of 0.05 */
float64_t alpha=0.05;
Expand All @@ -295,114 +366,83 @@ TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
mmd->set_statistic_type(BIASED);
mmd->set_num_null_samples(3);
mmd->set_num_eigenvalues_spectrum(3);
mmd->set_num_samples_sepctrum(250);
mmd->set_num_samples_spectrum(250);

mmd2->set_null_approximation_method(PERMUTATION);
mmd2->set_statistic_type(BIASED);
mmd2->set_num_null_samples(3);
mmd2->set_num_eigenvalues_spectrum(3);
mmd2->set_num_samples_spectrum(250);

/* compute tpye I and II error using normal and precomputed kernel */
index_t num_trials=3;
SGVector<float64_t> type_I_mmds(num_trials);
SGVector<float64_t> type_I_threshs_boot(num_trials);
SGVector<float64_t> type_I_threshs_spectrum(num_trials);
SGVector<float64_t> type_I_threshs_gamma(num_trials);
SGVector<float64_t> type_II_mmds(num_trials);
SGVector<float64_t> type_II_threshs_boot(num_trials);
SGVector<float64_t> type_II_threshs_spectrum(num_trials);
SGVector<float64_t> type_II_threshs_gamma(num_trials);

SGVector<index_t> inds(2*m);
inds.range_fill();
CFeatures* p_and_q=mmd->get_p_and_q();

/* use fixed seed */
CMath::init_random(1);
for (index_t i=0; i<num_trials; ++i)
{
/* this effectively means that p=q - rejecting is tpye I error */
inds.permute();

/* setting seed for Gaussian samples used in spectrum approximation method */
sg_rand->set_seed(1);

/* first, we compute using normal kernel */
p_and_q->add_subset(inds);
type_I_mmds[i]=mmd->compute_statistic();
float64_t type_I_mmds=mmd->compute_statistic();
mmd->set_null_approximation_method(PERMUTATION);
type_I_threshs_boot[i]=mmd->compute_threshold(alpha);
float64_t type_I_threshs_boot=mmd->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
type_I_threshs_spectrum[i]=mmd->compute_threshold(alpha);
float64_t type_I_threshs_spectrum=mmd->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_GAMMA);
type_I_threshs_gamma[i]=mmd->compute_threshold(alpha);
float64_t type_I_threshs_gamma=mmd->compute_threshold(alpha);
p_and_q->remove_subset();

type_II_mmds[i]=mmd->compute_statistic();
float64_t type_II_mmds=mmd->compute_statistic();
mmd->set_null_approximation_method(PERMUTATION);
type_II_threshs_boot[i]=mmd->compute_threshold(alpha);
float64_t type_II_threshs_boot=mmd->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
type_II_threshs_spectrum[i]=mmd->compute_threshold(alpha);
float64_t type_II_threshs_spectrum=mmd->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_GAMMA);
type_II_threshs_gamma[i]=mmd->compute_threshold(alpha);
float64_t type_II_threshs_gamma=mmd->compute_threshold(alpha);

}
SG_UNREF(p_and_q);
/* now compute using precomputed custom kernel */

/* setting seed for Gaussian samples used in spectrum approximation method */
sg_rand->set_seed(1);

//SG_SPRINT("normal kernel\n");
//type_I_mmds.display_vector("type_I_mmds");
//type_I_threshs_boot.display_vector("type_I_threshs_boot");
//type_II_mmds.display_vector("type_II_mmds");
//type_II_threshs_boot.display_vector("type_II_threshs_boot");

/* same thing with precomputed kernel */
SGVector<float64_t> type_I_mmds_pre(num_trials);
SGVector<float64_t> type_I_threshs_boot_pre(num_trials);
SGVector<float64_t> type_I_threshs_spectrum_pre(num_trials);
SGVector<float64_t> type_I_threshs_gamma_pre(num_trials);
SGVector<float64_t> type_II_mmds_pre(num_trials);
SGVector<float64_t> type_II_threshs_boot_pre(num_trials);
SGVector<float64_t> type_II_threshs_spectrum_pre(num_trials);
SGVector<float64_t> type_II_threshs_gamma_pre(num_trials);
kernel->init(p_and_q, p_and_q);
CCustomKernel* precomputed=new CCustomKernel(kernel);
CQuadraticTimeMMD* mmd2=new CQuadraticTimeMMD(precomputed, m);
mmd2->set_null_approximation_method(PERMUTATION);
mmd2->set_num_null_samples(3);
inds.range_fill();
CMath::init_random(1);
for (index_t i=0; i<num_trials; ++i)
{
/* this effectively means that p=q - rejecting is tpye I error */
inds.permute();
precomputed->add_row_subset(inds);
precomputed->add_col_subset(inds);
type_I_mmds_pre[i]=mmd2->compute_statistic();
mmd->set_null_approximation_method(PERMUTATION);
type_I_threshs_boot_pre[i]=mmd2->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
type_I_threshs_spectrum_pre[i]=mmd2->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_GAMMA);
type_I_threshs_gamma_pre[i]=mmd2->compute_threshold(alpha);
float64_t type_I_mmds_pre=mmd2->compute_statistic();
mmd2->set_null_approximation_method(PERMUTATION);
float64_t type_I_threshs_boot_pre=mmd2->compute_threshold(alpha);
mmd2->set_null_approximation_method(MMD2_SPECTRUM);
float64_t type_I_threshs_spectrum_pre=mmd2->compute_threshold(alpha);
mmd2->set_null_approximation_method(MMD2_GAMMA);
float64_t type_I_threshs_gamma_pre=mmd2->compute_threshold(alpha);
precomputed->remove_row_subset();
precomputed->remove_col_subset();

type_II_mmds_pre[i]=mmd2->compute_statistic();
mmd->set_null_approximation_method(PERMUTATION);
type_II_threshs_boot_pre[i]=mmd2->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
type_II_threshs_spectrum_pre[i]=mmd2->compute_threshold(alpha);
mmd->set_null_approximation_method(MMD2_GAMMA);
type_II_threshs_gamma_pre[i]=mmd2->compute_threshold(alpha);

}

//SG_SPRINT("precomputed kernel\n");
//type_I_mmds_pre.display_vector("type_I_mmds");
//type_I_threshs_boot_pre.display_vector("type_I_threshs_boot");
//type_II_mmds_pre.display_vector("type_II_mmds");
//type_II_threshs_boot_pre.display_vector("type_II_threshs_boot");

for (index_t i=0; i<num_trials; ++i)
{
EXPECT_LE(CMath::abs(type_I_mmds_pre[i]-type_I_mmds_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_I_threshs_boot_pre[i]-type_I_threshs_boot_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_I_threshs_spectrum_pre[i]-type_I_threshs_spectrum_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_I_threshs_gamma_pre[i]-type_I_threshs_gamma_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_II_mmds_pre[i]-type_II_mmds_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_II_threshs_boot_pre[i]-type_II_threshs_boot_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_II_threshs_spectrum_pre[i]-type_II_threshs_spectrum_pre[i]), 10E-15);
EXPECT_LE(CMath::abs(type_II_threshs_gamma_pre[i]-type_II_threshs_gamma_pre[i]), 10E-15);
float64_t type_II_mmds_pre=mmd2->compute_statistic();
mmd2->set_null_approximation_method(PERMUTATION);
float64_t type_II_threshs_boot_pre=mmd2->compute_threshold(alpha);
mmd2->set_null_approximation_method(MMD2_SPECTRUM);
float64_t type_II_threshs_spectrum_pre=mmd2->compute_threshold(alpha);
mmd2->set_null_approximation_method(MMD2_GAMMA);
float64_t type_II_threshs_gamma_pre=mmd2->compute_threshold(alpha);

/* assert results from both */
EXPECT_NEAR(type_I_mmds, type_I_mmds_pre, 1E-6);
EXPECT_NEAR(type_I_threshs_boot, type_I_threshs_boot_pre, 1E-6);
EXPECT_NEAR(type_I_threshs_spectrum, type_I_threshs_spectrum_pre, 1E-6);
EXPECT_NEAR(type_I_threshs_gamma, type_I_threshs_gamma_pre, 1E-6);
EXPECT_NEAR(type_II_mmds, type_II_mmds_pre, 1E-6);
EXPECT_NEAR(type_II_threshs_boot, type_II_threshs_boot_pre, 1E-6);
EXPECT_NEAR(type_II_threshs_spectrum, type_II_threshs_spectrum_pre, 1E-6);
EXPECT_NEAR(type_II_threshs_gamma, type_II_threshs_gamma_pre, 1E-6);
}

/* clean up */
Expand All @@ -415,3 +455,4 @@ TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
SG_UNREF(feat_p);
SG_UNREF(feat_q);
}
#endif // HAVE_EIGEN3

0 comments on commit e4fad50

Please sign in to comment.