added unit test for null spectrum approximation in quadratic time mmd…

… and a minor spell correction
shogun-toolbox · Mar 26, 2014 · e4fad50 · e4fad50
1 parent 5b7df93
commit e4fad50
Show file tree

Hide file tree

Showing 6 changed files with 126 additions and 85 deletions.
diff --git a/doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb b/doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb
@@ -545,7 +545,7 @@
       "#tell Shogun to use spectrum approximation\n",
       "mmd.set_null_approximation_method(MMD2_SPECTRUM)\n",
       "mmd.set_num_eigenvalues_spectrum(num_eigen)\n",
-      "mmd.set_num_samples_sepctrum(num_samples)\n",
+      "mmd.set_num_samples_spectrum(num_samples)\n",
       "\n",
       "# the usual test interface\n",
       "p_value_spectrum=mmd.perform_test()\n",
@@ -649,7 +649,7 @@
       "    mmd=QuadraticTimeMMD(kernel, feat_p, feat_q)\n",
       "    mmd.set_null_approximation_method(MMD2_SPECTRUM)\n",
       "    mmd.set_num_eigenvalues_spectrum(num_eigen)\n",
-      "    mmd.set_num_samples_sepctrum(num_samples)\n",
+      "    mmd.set_num_samples_spectrum(num_samples)\n",
       "    mmd.set_statistic_type(BIASED)\n",
       "    rejections_spectrum[i]=mmd.perform_test(alpha)\n",
       "    \n",

diff --git a/examples/undocumented/libshogun/statistics_quadratic_time_mmd.cpp b/examples/undocumented/libshogun/statistics_quadratic_time_mmd.cpp
@@ -61,7 +61,7 @@ void quadratic_time_mmd()
 	mmd->set_statistic_type(BIASED);
 	mmd->set_null_approximation_method(MMD2_SPECTRUM);
 	mmd->set_num_eigenvalues_spectrum(3);
-	mmd->set_num_samples_sepctrum(250);
+	mmd->set_num_samples_spectrum(250);
 	p_value=mmd->perform_test();
 	/* reject if p-value is smaller than test level */
 	SG_SPRINT("spectrum: p!=q: %d\n", p_value<alpha);

diff --git a/examples/undocumented/python_modular/statistics_quadratic_time_mmd.py b/examples/undocumented/python_modular/statistics_quadratic_time_mmd.py
@@ -63,7 +63,7 @@ def statistics_quadratic_time_mmd (m,dim,difference):
 	mmd.set_statistic_type(BIASED);
 	mmd.set_null_approximation_method(MMD2_SPECTRUM);
 	mmd.set_num_eigenvalues_spectrum(3);
-	mmd.set_num_samples_sepctrum(250);
+	mmd.set_num_samples_spectrum(250);
 	p_value_spectrum=mmd.perform_test();
 	# reject if p-value is smaller than test level
 	#print "spectrum: p!=q: ", p_value_spectrum<alpha

diff --git a/src/shogun/statistics/QuadraticTimeMMD.cpp b/src/shogun/statistics/QuadraticTimeMMD.cpp
@@ -514,7 +514,7 @@ SGVector<float64_t> CQuadraticTimeMMD::fit_null_gamma()
 	return result;
 }
 
-void CQuadraticTimeMMD::set_num_samples_sepctrum(index_t
+void CQuadraticTimeMMD::set_num_samples_spectrum(index_t
 		num_samples_spectrum)
 {
 	m_num_samples_spectrum=num_samples_spectrum;

diff --git a/src/shogun/statistics/QuadraticTimeMMD.h b/src/shogun/statistics/QuadraticTimeMMD.h
@@ -260,7 +260,7 @@ class CQuadraticTimeMMD : public CKernelTwoSampleTest
 	 * @param num_samples_spectrum number of samples to draw from
 	 * approximate null-distributrion
 	 */
-	void set_num_samples_sepctrum(index_t num_samples_spectrum);
+	void set_num_samples_spectrum(index_t num_samples_spectrum);
 
 	/** setter for number of eigenvalues to use in spectrum based p-value
 	 * computation. Maximum is m_m+m_n-1

diff --git a/tests/unit/statistics/QuadraticTimeMMD_unittest.cc b/tests/unit/statistics/QuadraticTimeMMD_unittest.cc
@@ -13,9 +13,11 @@
 #include <shogun/features/DenseFeatures.h>
 #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h>
 #include <shogun/mathematics/Statistics.h>
+#include <shogun/mathematics/eigen3.h>
 #include <gtest/gtest.h>
 
 using namespace shogun;
+using namespace Eigen;
 
 TEST(QuadraticTimeMMD,test_quadratic_mmd_biased)
 {
@@ -188,6 +190,65 @@ TEST(QuadraticTimeMMD, test_quadratic_mmd_biased_different_num_samples)
 	SG_UNREF(features_q);
 }
 
+#ifdef HAVE_EIGEN3
+TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples)
+{
+	const index_t m=20;
+	const index_t n=30;
+	const index_t dim=3;
+
+	/* use fixed seed */
+	sg_rand->set_seed(12345);
+
+	float64_t difference=0.5;
+
+	/* streaming data generator for mean shift distributions */
+	CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim, 0);
+	CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim, 0);
+
+	/* stream some data from generator */
+	CFeatures* feat_p=gen_p->get_streamed_features(m);
+	CFeatures* feat_q=gen_q->get_streamed_features(n);
+
+	/* shoguns kernel width is different */
+	float64_t sigma=2;
+	float64_t sq_sigma_twice=sigma*sigma*2;
+	CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice);
+
+	/* create MMD instance, convienience constructor */
+	CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q);
+
+	index_t num_null_samples=250;
+	index_t num_eigenvalues=10;
+	mmd->set_num_samples_spectrum(num_null_samples);
+	mmd->set_null_approximation_method(MMD2_SPECTRUM);
+	mmd->set_num_eigenvalues_spectrum(num_eigenvalues);
+
+	/* biased case */
+
+	/* compute p-value using spectrum approximation for null distribution and
+	 * assert against local machine computed result */
+	mmd->set_statistic_type(BIASED);
+	float64_t p_value_spectrum=mmd->perform_test();
+	EXPECT_NEAR(p_value_spectrum, 0.0, 1E-10);
+
+	/* unbiased case */
+
+	/* compute p-value using spectrum approximation for null distribution and
+	 * assert against local machine computed result */
+	mmd->set_statistic_type(UNBIASED);
+	p_value_spectrum=mmd->perform_test();
+	EXPECT_NEAR(p_value_spectrum, 0.004, 1E-10);
+
+	/* clean up */
+	SG_UNREF(mmd);
+	SG_UNREF(feat_p);
+	SG_UNREF(feat_q);
+	SG_UNREF(gen_p);
+	SG_UNREF(gen_q);
+}
+#endif // HAVE_EIGEN3
+
 TEST(QuadraticTimeMMD,test_quadratic_mmd_precomputed_kernel)
 {
 	index_t m=8;
@@ -263,6 +324,7 @@ TEST(QuadraticTimeMMD,test_quadratic_mmd_precomputed_kernel)
 	SG_UNREF(p_and_q);
 }
 
+#ifdef HAVE_EIGEN3
 TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
 {
 	/* number of examples kept low in order to make things fast */
@@ -287,6 +349,15 @@ TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
 	 * copies p and q and does not reference them */
 	CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q);
 
+	/* set up for a precomputed custom kernel using merged features p_and_q */
+	CGaussianKernel* kernel2=new CGaussianKernel(10, width);
+	CFeatures* p_and_q=mmd->get_p_and_q();
+	kernel2->init(p_and_q, p_and_q);
+	CCustomKernel* precomputed=new CCustomKernel(kernel2);
+	CQuadraticTimeMMD* mmd2=new CQuadraticTimeMMD(precomputed, m);
+	SG_UNREF(p_and_q);
+	SG_UNREF(kernel2);
+
 	/* perform test: compute p-value and test if null-hypothesis is rejected for
 	 * a test level of 0.05 */
 	float64_t alpha=0.05;
@@ -295,114 +366,83 @@ TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
 	mmd->set_statistic_type(BIASED);
 	mmd->set_num_null_samples(3);
 	mmd->set_num_eigenvalues_spectrum(3);
-	mmd->set_num_samples_sepctrum(250);
+	mmd->set_num_samples_spectrum(250);
+
+	mmd2->set_null_approximation_method(PERMUTATION);
+	mmd2->set_statistic_type(BIASED);
+	mmd2->set_num_null_samples(3);
+	mmd2->set_num_eigenvalues_spectrum(3);
+	mmd2->set_num_samples_spectrum(250);
 
 	/* compute tpye I and II error using normal and precomputed kernel */
 	index_t num_trials=3;
-	SGVector<float64_t> type_I_mmds(num_trials);
-	SGVector<float64_t> type_I_threshs_boot(num_trials);
-	SGVector<float64_t> type_I_threshs_spectrum(num_trials);
-	SGVector<float64_t> type_I_threshs_gamma(num_trials);
-	SGVector<float64_t> type_II_mmds(num_trials);
-	SGVector<float64_t> type_II_threshs_boot(num_trials);
-	SGVector<float64_t> type_II_threshs_spectrum(num_trials);
-	SGVector<float64_t> type_II_threshs_gamma(num_trials);
 
 	SGVector<index_t> inds(2*m);
 	inds.range_fill();
-	CFeatures* p_and_q=mmd->get_p_and_q();
 
 	/* use fixed seed */
 	CMath::init_random(1);
 	for (index_t i=0; i<num_trials; ++i)
 	{
 		/* this effectively means that p=q - rejecting is tpye I error */
 		inds.permute();
+
+		/* setting seed for Gaussian samples used in spectrum approximation method */
+		sg_rand->set_seed(1);
+
+		/* first, we compute using normal kernel */
 		p_and_q->add_subset(inds);
-		type_I_mmds[i]=mmd->compute_statistic();
+		float64_t type_I_mmds=mmd->compute_statistic();
 		mmd->set_null_approximation_method(PERMUTATION);
-		type_I_threshs_boot[i]=mmd->compute_threshold(alpha);
+		float64_t type_I_threshs_boot=mmd->compute_threshold(alpha);
 		mmd->set_null_approximation_method(MMD2_SPECTRUM);
-		type_I_threshs_spectrum[i]=mmd->compute_threshold(alpha);
+		float64_t type_I_threshs_spectrum=mmd->compute_threshold(alpha);
 		mmd->set_null_approximation_method(MMD2_GAMMA);
-		type_I_threshs_gamma[i]=mmd->compute_threshold(alpha);
+		float64_t type_I_threshs_gamma=mmd->compute_threshold(alpha);
 		p_and_q->remove_subset();
 
-		type_II_mmds[i]=mmd->compute_statistic();
+		float64_t type_II_mmds=mmd->compute_statistic();
 		mmd->set_null_approximation_method(PERMUTATION);
-		type_II_threshs_boot[i]=mmd->compute_threshold(alpha);
+		float64_t type_II_threshs_boot=mmd->compute_threshold(alpha);
 		mmd->set_null_approximation_method(MMD2_SPECTRUM);
-		type_II_threshs_spectrum[i]=mmd->compute_threshold(alpha);
+		float64_t type_II_threshs_spectrum=mmd->compute_threshold(alpha);
 		mmd->set_null_approximation_method(MMD2_GAMMA);
-		type_II_threshs_gamma[i]=mmd->compute_threshold(alpha);
+		float64_t type_II_threshs_gamma=mmd->compute_threshold(alpha);
 
-	}
-	SG_UNREF(p_and_q);
+		/* now compute using precomputed custom kernel */
+
+		/* setting seed for Gaussian samples used in spectrum approximation method */
+		sg_rand->set_seed(1);
 
-	//SG_SPRINT("normal kernel\n");
-	//type_I_mmds.display_vector("type_I_mmds");
-	//type_I_threshs_boot.display_vector("type_I_threshs_boot");
-	//type_II_mmds.display_vector("type_II_mmds");
-	//type_II_threshs_boot.display_vector("type_II_threshs_boot");
-
-	/* same thing with precomputed kernel */
-	SGVector<float64_t> type_I_mmds_pre(num_trials);
-	SGVector<float64_t> type_I_threshs_boot_pre(num_trials);
-	SGVector<float64_t> type_I_threshs_spectrum_pre(num_trials);
-	SGVector<float64_t> type_I_threshs_gamma_pre(num_trials);
-	SGVector<float64_t> type_II_mmds_pre(num_trials);
-	SGVector<float64_t> type_II_threshs_boot_pre(num_trials);
-	SGVector<float64_t> type_II_threshs_spectrum_pre(num_trials);
-	SGVector<float64_t> type_II_threshs_gamma_pre(num_trials);
-	kernel->init(p_and_q, p_and_q);
-	CCustomKernel* precomputed=new CCustomKernel(kernel);
-	CQuadraticTimeMMD* mmd2=new CQuadraticTimeMMD(precomputed, m);
-	mmd2->set_null_approximation_method(PERMUTATION);
-	mmd2->set_num_null_samples(3);
-	inds.range_fill();
-	CMath::init_random(1);
-	for (index_t i=0; i<num_trials; ++i)
-	{
-		/* this effectively means that p=q - rejecting is tpye I error */
-		inds.permute();
 		precomputed->add_row_subset(inds);
 		precomputed->add_col_subset(inds);
-		type_I_mmds_pre[i]=mmd2->compute_statistic();
-		mmd->set_null_approximation_method(PERMUTATION);
-		type_I_threshs_boot_pre[i]=mmd2->compute_threshold(alpha);
-		mmd->set_null_approximation_method(MMD2_SPECTRUM);
-		type_I_threshs_spectrum_pre[i]=mmd2->compute_threshold(alpha);
-		mmd->set_null_approximation_method(MMD2_GAMMA);
-		type_I_threshs_gamma_pre[i]=mmd2->compute_threshold(alpha);
+		float64_t type_I_mmds_pre=mmd2->compute_statistic();
+		mmd2->set_null_approximation_method(PERMUTATION);
+		float64_t type_I_threshs_boot_pre=mmd2->compute_threshold(alpha);
+		mmd2->set_null_approximation_method(MMD2_SPECTRUM);
+		float64_t type_I_threshs_spectrum_pre=mmd2->compute_threshold(alpha);
+		mmd2->set_null_approximation_method(MMD2_GAMMA);
+		float64_t type_I_threshs_gamma_pre=mmd2->compute_threshold(alpha);
 		precomputed->remove_row_subset();
 		precomputed->remove_col_subset();
 
-		type_II_mmds_pre[i]=mmd2->compute_statistic();
-		mmd->set_null_approximation_method(PERMUTATION);
-		type_II_threshs_boot_pre[i]=mmd2->compute_threshold(alpha);
-		mmd->set_null_approximation_method(MMD2_SPECTRUM);
-		type_II_threshs_spectrum_pre[i]=mmd2->compute_threshold(alpha);
-		mmd->set_null_approximation_method(MMD2_GAMMA);
-		type_II_threshs_gamma_pre[i]=mmd2->compute_threshold(alpha);
-
-	}
-
-	//SG_SPRINT("precomputed kernel\n");
-	//type_I_mmds_pre.display_vector("type_I_mmds");
-	//type_I_threshs_boot_pre.display_vector("type_I_threshs_boot");
-	//type_II_mmds_pre.display_vector("type_II_mmds");
-	//type_II_threshs_boot_pre.display_vector("type_II_threshs_boot");
-
-	for (index_t i=0; i<num_trials; ++i)
-	{
-		EXPECT_LE(CMath::abs(type_I_mmds_pre[i]-type_I_mmds_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_I_threshs_boot_pre[i]-type_I_threshs_boot_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_I_threshs_spectrum_pre[i]-type_I_threshs_spectrum_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_I_threshs_gamma_pre[i]-type_I_threshs_gamma_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_II_mmds_pre[i]-type_II_mmds_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_II_threshs_boot_pre[i]-type_II_threshs_boot_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_II_threshs_spectrum_pre[i]-type_II_threshs_spectrum_pre[i]), 10E-15);
-		EXPECT_LE(CMath::abs(type_II_threshs_gamma_pre[i]-type_II_threshs_gamma_pre[i]), 10E-15);
+		float64_t type_II_mmds_pre=mmd2->compute_statistic();
+		mmd2->set_null_approximation_method(PERMUTATION);
+		float64_t type_II_threshs_boot_pre=mmd2->compute_threshold(alpha);
+		mmd2->set_null_approximation_method(MMD2_SPECTRUM);
+		float64_t type_II_threshs_spectrum_pre=mmd2->compute_threshold(alpha);
+		mmd2->set_null_approximation_method(MMD2_GAMMA);
+		float64_t type_II_threshs_gamma_pre=mmd2->compute_threshold(alpha);
+
+		/* assert results from both */
+		EXPECT_NEAR(type_I_mmds, type_I_mmds_pre, 1E-6);
+		EXPECT_NEAR(type_I_threshs_boot, type_I_threshs_boot_pre, 1E-6);
+		EXPECT_NEAR(type_I_threshs_spectrum, type_I_threshs_spectrum_pre, 1E-6);
+		EXPECT_NEAR(type_I_threshs_gamma, type_I_threshs_gamma_pre, 1E-6);
+		EXPECT_NEAR(type_II_mmds, type_II_mmds_pre, 1E-6);
+		EXPECT_NEAR(type_II_threshs_boot, type_II_threshs_boot_pre, 1E-6);
+		EXPECT_NEAR(type_II_threshs_spectrum, type_II_threshs_spectrum_pre, 1E-6);
+		EXPECT_NEAR(type_II_threshs_gamma, type_II_threshs_gamma_pre, 1E-6);
 	}
 
 	/* clean up */
@@ -415,3 +455,4 @@ TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel)
 	SG_UNREF(feat_p);
 	SG_UNREF(feat_q);
 }
+#endif // HAVE_EIGEN3