diff --git a/src/shogun/statistical_testing/QuadraticTimeMMD.cpp b/src/shogun/statistical_testing/QuadraticTimeMMD.cpp index 17d0beee73a..00fecdfa487 100644 --- a/src/shogun/statistical_testing/QuadraticTimeMMD.cpp +++ b/src/shogun/statistical_testing/QuadraticTimeMMD.cpp @@ -442,7 +442,7 @@ SGVector CQuadraticTimeMMD::spectrum_sample_null() * K is matrix for XX, L is matrix for YY, KL is XY, LK is YX * works since X and Y are concatenated here */ SGMatrix precomputed_km=self->get_kernel_matrix(); - SGMatrix K(precomputed_km.matrix, precomputed_km.num_rows, precomputed_km.num_cols); + SGMatrix K(precomputed_km.num_rows, precomputed_km.num_cols); std::copy(precomputed_km.matrix, precomputed_km.matrix+precomputed_km.num_rows*precomputed_km.num_cols, K.matrix); /* center matrix K=H*K*H */ diff --git a/src/shogun/statistical_testing/internals/mmd/UnbiasedIncomplete.cpp b/src/shogun/statistical_testing/internals/mmd/UnbiasedIncomplete.cpp index 45f57a18d58..96b1b1475d0 100644 --- a/src/shogun/statistical_testing/internals/mmd/UnbiasedIncomplete.cpp +++ b/src/shogun/statistical_testing/internals/mmd/UnbiasedIncomplete.cpp @@ -45,7 +45,7 @@ float64_t UnbiasedIncomplete::operator()(SGMatrix km) Block& b_xy = map.block(n, 0, n, n); auto term_3 = b_xy.sum() - b_xy.diagonal().sum(); - auto statistic = term_1/n/(n-1) + term_2/n/(n-1) - 2*term_3/n/n; + auto statistic = term_1/n/(n-1) + term_2/n/(n-1) - 2*term_3/n/(n-1); return statistic; diff --git a/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc b/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc index 97a71b58978..7b0393bd57e 100644 --- a/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc +++ b/tests/unit/statistical_testing/QuadraticTimeMMD_unittest.cc @@ -86,104 +86,7 @@ TEST(QuadraticTimeMMD, biased) EXPECT_NEAR(statistic, 0.17882546486779649, 1E-6); } -/* -TEST(QuadraticTimeMMD,test_quadratic_mmd_biased_DEPRECATED) -{ - index_t m=8; - index_t d=3; - SGMatrix data(d,2*m); - for (index_t i=0; i<2*d*m; ++i) - data.matrix[i]=i; - - // create data matrix for each features (appended is not supported) - SGMatrix data_p(d, m); - memcpy(&(data_p.matrix[0]), &(data.matrix[0]), sizeof(float64_t)*d*m); - - SGMatrix data_q(d, m); - memcpy(&(data_q.matrix[0]), &(data.matrix[d*m]), sizeof(float64_t)*d*m); - - // normalise data - float64_t max_p=data_p.max_single(); - float64_t max_q=data_q.max_single(); - - for (index_t i=0; i* features_p=new CDenseFeatures(data_p); - CDenseFeatures* features_q=new CDenseFeatures(data_q); - - // shoguns kernel width is different - float64_t sigma=2; - float64_t sq_sigma_twice=sigma*sigma*2; - CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); - - // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(BIASED_DEPRECATED); - - // assert matlab result - float64_t statistic=mmd->compute_statistic(); - //SG_SPRINT("statistic=%f\n", statistic); - EXPECT_NEAR(statistic, 0.357650929735592, 10E-15); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); -} - -TEST(QuadraticTimeMMD,test_quadratic_mmd_unbiased) -{ - index_t m=8; - index_t d=3; - SGMatrix data(d,2*m); - for (index_t i=0; i<2*d*m; ++i) - data.matrix[i]=i; - - // create data matrix for each features (appended is not supported) - SGMatrix data_p(d, m); - memcpy(&(data_p.matrix[0]), &(data.matrix[0]), sizeof(float64_t)*d*m); - - SGMatrix data_q(d, m); - memcpy(&(data_q.matrix[0]), &(data.matrix[d*m]), sizeof(float64_t)*d*m); - - // normalise data - float64_t max_p=data_p.max_single(); - float64_t max_q=data_q.max_single(); - - for (index_t i=0; i* features_p=new CDenseFeatures(data_p); - CDenseFeatures* features_q=new CDenseFeatures(data_q); - - // shoguns kernel width is different - float64_t sigma=2; - float64_t sq_sigma_twice=sigma*sigma*2; - CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); - - // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(UNBIASED); - - // assert matlab result - float64_t statistic=mmd->compute_statistic(); - //SG_SPRINT("statistic=%f\n", statistic); - EXPECT_NEAR(statistic, 0.13440094336133723, 1E-15); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); -} - -TEST(QuadraticTimeMMD,test_quadratic_mmd_unbiased_DEPRECATED) +TEST(QuadraticTimeMMD, unbiased) { index_t m=8; index_t d=3; @@ -217,22 +120,16 @@ TEST(QuadraticTimeMMD,test_quadratic_mmd_unbiased_DEPRECATED) CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(UNBIASED_DEPRECATED); + auto mmd=some(features_p, features_q); + mmd->set_statistic_type(EStatisticType::UNBIASED_FULL); + mmd->set_kernel(kernel); // assert matlab result float64_t statistic=mmd->compute_statistic(); - //SG_SPRINT("statistic=%f\n", statistic); - float64_t difference=statistic-0.268801886722675; - EXPECT_LE(CMath::abs(difference), 10E-15); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); + EXPECT_NEAR(statistic, 0.13440094336133723, 1E-6); } -TEST(QuadraticTimeMMD,test_quadratic_mmd_incomplete) +TEST(QuadraticTimeMMD, incomplete) { index_t m=8; index_t d=3; @@ -266,20 +163,16 @@ TEST(QuadraticTimeMMD,test_quadratic_mmd_incomplete) CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(INCOMPLETE); + auto mmd=some(features_p, features_q); + mmd->set_statistic_type(EStatisticType::UNBIASED_INCOMPLETE); + mmd->set_kernel(kernel); // assert local machine computed result float64_t statistic=mmd->compute_statistic(); - EXPECT_NEAR(statistic, 0.16743977201175841, 1E-15); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); + EXPECT_NEAR(statistic, 0.16743977201175841, 1E-6); } -TEST(QuadraticTimeMMD, test_quadratic_mmd_unbiased_different_num_samples) +TEST(QuadraticTimeMMD, unbiased_different_num_samples) { const index_t m=5; const index_t n=6; @@ -302,95 +195,17 @@ TEST(QuadraticTimeMMD, test_quadratic_mmd_unbiased_different_num_samples) CGaussianKernel* kernel=new CGaussianKernel(10, 2); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(UNBIASED); - - // assert python result at - // https://github.com/lambday/shogun-hypothesis-testing/blob/master/mmd.py - float64_t statistic=mmd->compute_statistic(); - EXPECT_NEAR(statistic, -0.037500338130199401, 1E-9); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); -} - -TEST(QuadraticTimeMMD, test_quadratic_mmd_unbiased_different_num_samples_DEPRECATED) -{ - const index_t m=5; - const index_t n=6; - const index_t d=1; - float64_t data[] = {0.61318059, -0.69222999, 0.94424411, -0.48769626, - -0.00709551, 0.35025598, 0.20741384, -0.63622519, -1.21315264, - -0.77349617, -0.42707091}; - - // create data matrix for each features (appended is not supported) - SGMatrix data_p(d, m); - memcpy(&(data_p.matrix[0]), &(data[0]), sizeof(float64_t)*m); - - SGMatrix data_q(d, n); - memcpy(&(data_q.matrix[0]), &(data[m]), sizeof(float64_t)*n); - - CDenseFeatures* features_p=new CDenseFeatures(data_p); - CDenseFeatures* features_q=new CDenseFeatures(data_q); - - // shoguns kernel width is different - CGaussianKernel* kernel=new CGaussianKernel(10, 2); - - // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(UNBIASED_DEPRECATED); - - // assert python result at - // https://github.com/lambday/shogun-hypothesis-testing/blob/master/mmd.py - float64_t statistic=mmd->compute_statistic(); - EXPECT_NEAR(statistic, -0.151251364436, 1E-9); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); -} - -TEST(QuadraticTimeMMD, test_quadratic_mmd_biased_different_num_samples) -{ - const index_t m=5; - const index_t n=6; - const index_t d=1; - float64_t data[] = {-0.47616889, -2.1767364, -0.04185537, -1.20787529, - 1.94875193, -0.16695709, 2.51282666, -0.58116389, 1.52366887, - 0.18985099, 0.76120258}; - - // create data matrix for each features (appended is not supported) - SGMatrix data_p(d, m); - memcpy(&(data_p.matrix[0]), &(data[0]), sizeof(float64_t)*m); - - SGMatrix data_q(d, n); - memcpy(&(data_q.matrix[0]), &(data[m]), sizeof(float64_t)*n); - - CDenseFeatures* features_p=new CDenseFeatures(data_p); - CDenseFeatures* features_q=new CDenseFeatures(data_q); - - // shoguns kernel width is different - CGaussianKernel* kernel=new CGaussianKernel(10, 2); - - / create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(BIASED); + auto mmd=some(features_p, features_q); + mmd->set_statistic_type(EStatisticType::UNBIASED_FULL); + mmd->set_kernel(kernel); // assert python result at // https://github.com/lambday/shogun-hypothesis-testing/blob/master/mmd.py float64_t statistic=mmd->compute_statistic(); - EXPECT_NEAR(statistic, 0.54418915736201567, 1E-8); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); + EXPECT_NEAR(statistic, -0.037500338130199401, 1E-8); } -TEST(QuadraticTimeMMD, test_quadratic_mmd_biased_different_num_samples_DEPRECATED) +TEST(QuadraticTimeMMD, biased_different_num_samples) { const index_t m=5; const index_t n=6; @@ -413,18 +228,14 @@ TEST(QuadraticTimeMMD, test_quadratic_mmd_biased_different_num_samples_DEPRECATE CGaussianKernel* kernel=new CGaussianKernel(10, 2); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - mmd->set_statistic_type(BIASED_DEPRECATED); + auto mmd=some(features_p, features_q); + mmd->set_statistic_type(EStatisticType::BIASED_FULL); + mmd->set_kernel(kernel); // assert python result at // https://github.com/lambday/shogun-hypothesis-testing/blob/master/mmd.py float64_t statistic=mmd->compute_statistic(); - EXPECT_NEAR(statistic, 2.1948962593, 1E-8); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); + EXPECT_NEAR(statistic, 0.54418915736201567, 1E-8); } TEST(QuadraticTimeMMD,compute_variance_null) @@ -461,84 +272,24 @@ TEST(QuadraticTimeMMD,compute_variance_null) CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); + auto mmd=some(features_p, features_q); + mmd->set_kernel(kernel); // assert local machine computed result - mmd->set_statistic_type(UNBIASED); - float64_t var=mmd->compute_variance_under_null(); + mmd->set_statistic_type(EStatisticType::UNBIASED_FULL); + float64_t var=mmd->compute_variance(); EXPECT_NEAR(var, 0.0064888052500351456, 1E-10); - mmd->set_statistic_type(BIASED); - var=mmd->compute_variance_under_null(); + mmd->set_statistic_type(EStatisticType::BIASED_FULL); + var=mmd->compute_variance(); EXPECT_NEAR(var, 0.0071464012090942663, 1E-10); - mmd->set_statistic_type(INCOMPLETE); - var=mmd->compute_variance_under_null(); + mmd->set_statistic_type(EStatisticType::UNBIASED_INCOMPLETE); + var=mmd->compute_variance(); EXPECT_NEAR(var, 0.0064888052500342575, 1E-10); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); } -TEST(QuadraticTimeMMD,compute_variance_alternative) -{ - index_t m=8; - index_t d=3; - SGMatrix data(d,2*m); - for (index_t i=0; i<2*d*m; ++i) - data.matrix[i]=i; - - // create data matrix for each features (appended is not supported) - SGMatrix data_p(d, m); - memcpy(&(data_p.matrix[0]), &(data.matrix[0]), sizeof(float64_t)*d*m); - - SGMatrix data_q(d, m); - memcpy(&(data_q.matrix[0]), &(data.matrix[d*m]), sizeof(float64_t)*d*m); - - // normalise data - float64_t max_p=data_p.max_single(); - float64_t max_q=data_q.max_single(); - - for (index_t i=0; i* features_p=new CDenseFeatures(data_p); - CDenseFeatures* features_q=new CDenseFeatures(data_q); - - // shoguns kernel width is different - float64_t sigma=2; - float64_t sq_sigma_twice=sigma*sigma*2; - CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); - - // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features_p, features_q); - - // assert local machine computed result - mmd->set_statistic_type(UNBIASED); - float64_t var=mmd->compute_variance_under_alternative(); - EXPECT_NEAR(var, 0.0065377436264417842, 1E-15); - - mmd->set_statistic_type(BIASED); - var=mmd->compute_variance_under_alternative(); - EXPECT_NEAR(var, 0.0065069769045954847, 1E-15); - - mmd->set_statistic_type(INCOMPLETE); - var=mmd->compute_variance_under_alternative(); - EXPECT_NEAR(var, 0.0080742069013913682, 1E-15); - - // clean up - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); -} - -#ifdef HAVE_EIGEN3 -TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples) +TEST(QuadraticTimeMMD, perform_test_permutation) { const index_t m=20; const index_t n=30; @@ -550,8 +301,8 @@ TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples) float64_t difference=0.5; // streaming data generator for mean shift distributions - CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim, 0); - CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim, 0); + auto gen_p=some(0, dim, 0); + auto gen_q=some(difference, dim, 0); // stream some data from generator CFeatures* feat_p=gen_p->get_streamed_features(m); @@ -563,39 +314,31 @@ TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples) CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q); + auto mmd=some(feat_p, feat_q); + mmd->set_kernel(kernel); index_t num_null_samples=250; - index_t num_eigenvalues=10; - mmd->set_num_samples_spectrum(num_null_samples); - mmd->set_null_approximation_method(MMD2_SPECTRUM); - mmd->set_num_eigenvalues_spectrum(num_eigenvalues); + mmd->set_num_null_samples(num_null_samples); + mmd->set_null_approximation_method(ENullApproximationMethod::PERMUTATION); // biased case - // compute p-value using spectrum approximation for null distribution and + // compute p-value using permutation for null distribution and // assert against local machine computed result - mmd->set_statistic_type(BIASED); - float64_t p_value_spectrum=mmd->perform_test(); - EXPECT_NEAR(p_value_spectrum, 0.0, 1E-10); + mmd->set_statistic_type(EStatisticType::BIASED_FULL); + float64_t p_value=mmd->compute_p_value(mmd->compute_statistic()); + EXPECT_NEAR(p_value, 0.0, 1E-10); // unbiased case - // compute p-value using spectrum approximation for null distribution and + // compute p-value using permutation for null distribution and // assert against local machine computed result - mmd->set_statistic_type(UNBIASED); - p_value_spectrum=mmd->perform_test(); - EXPECT_NEAR(p_value_spectrum, 0.004, 1E-10); - - // clean up - SG_UNREF(mmd); - SG_UNREF(feat_p); - SG_UNREF(feat_q); - SG_UNREF(gen_p); - SG_UNREF(gen_q); + mmd->set_statistic_type(EStatisticType::UNBIASED_FULL); + p_value=mmd->compute_p_value(mmd->compute_statistic()); + EXPECT_NEAR(p_value, 0.0, 1E-10); } -TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples_DEPRECATED) +TEST(QuadraticTimeMMD, perform_test_spectrum) { const index_t m=20; const index_t n=30; @@ -607,8 +350,8 @@ TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples_DEPRECA float64_t difference=0.5; // streaming data generator for mean shift distributions - CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim, 0); - CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim, 0); + auto gen_p=some(0, dim, 0); + auto gen_q=some(difference, dim, 0); // stream some data from generator CFeatures* feat_p=gen_p->get_streamed_features(m); @@ -620,244 +363,28 @@ TEST(QuadraticTimeMMD, null_approximation_spectrum_different_num_samples_DEPRECA CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); // create MMD instance, convienience constructor - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q); + auto mmd=some(feat_p, feat_q); + mmd->set_kernel(kernel); index_t num_null_samples=250; index_t num_eigenvalues=10; - mmd->set_num_samples_spectrum(num_null_samples); - mmd->set_null_approximation_method(MMD2_SPECTRUM_DEPRECATED); - mmd->set_num_eigenvalues_spectrum(num_eigenvalues); + mmd->set_num_null_samples(num_null_samples); + mmd->set_null_approximation_method(ENullApproximationMethod::MMD2_SPECTRUM); + mmd->spectrum_set_num_eigenvalues(num_eigenvalues); // biased case // compute p-value using spectrum approximation for null distribution and // assert against local machine computed result - mmd->set_statistic_type(BIASED_DEPRECATED); - float64_t p_value_spectrum=mmd->perform_test(); + mmd->set_statistic_type(EStatisticType::BIASED_FULL); + float64_t p_value_spectrum=mmd->compute_p_value(mmd->compute_statistic()); EXPECT_NEAR(p_value_spectrum, 0.0, 1E-10); // unbiased case // compute p-value using spectrum approximation for null distribution and // assert against local machine computed result - mmd->set_statistic_type(UNBIASED_DEPRECATED); - p_value_spectrum=mmd->perform_test(); + mmd->set_statistic_type(EStatisticType::UNBIASED_FULL); + p_value_spectrum=mmd->compute_p_value(mmd->compute_statistic()); EXPECT_NEAR(p_value_spectrum, 0.004, 1E-10); - - // clean up - SG_UNREF(mmd); - SG_UNREF(feat_p); - SG_UNREF(feat_q); - SG_UNREF(gen_p); - SG_UNREF(gen_q); } -#endif // HAVE_EIGEN3 - -TEST(QuadraticTimeMMD,test_quadratic_mmd_precomputed_kernel) -{ - index_t m=8; - index_t d=3; - SGMatrix data(d,2*m); - for (index_t i=0; i<2*d*m; ++i) - data.matrix[i]=i; - - // create data matrix for each features (appended is not supported) - SGMatrix data_p(d, m); - memcpy(&(data_p.matrix[0]), &(data.matrix[0]), sizeof(float64_t)*d*m); - - SGMatrix data_q(d, m); - memcpy(&(data_q.matrix[0]), &(data.matrix[d*m]), sizeof(float64_t)*d*m); - - // normalise data - float64_t max_p=data_p.max_single(); - float64_t max_q=data_q.max_single(); - - for (index_t i=0; i* features_p=new CDenseFeatures(data_p); - CDenseFeatures* features_q=new CDenseFeatures(data_q); - CFeatures* p_and_q=features_p->create_merged_copy(features_q); - SG_REF(p_and_q); - - // shoguns kernel width is different - float64_t sigma=2; - float64_t sq_sigma_twice=sigma*sigma*2; - CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); - - // create MMD instance - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, p_and_q, m); - mmd->set_num_null_samples(10); - - // use fixed seed - sg_rand->set_seed(12345); - SGVector null_samples=mmd->sample_null(); - - float64_t mean=CStatistics::mean(null_samples); - float64_t var=CStatistics::variance(null_samples); - - //SG_SPRINT("mean %f, var %f\n", mean, var); - - // now again but with a precomputed kernel, same features. - // This avoids re-computing the kernel matrix in every permutation - // iteration and should be num_iterations times faster - - // re-init kernel before kernel matrix is computed: this is due to a design - // error in subsets and should be worked on! - kernel->init(p_and_q, p_and_q); - CCustomKernel* precomputed_kernel=new CCustomKernel(kernel); - SG_UNREF(mmd); - mmd=new CQuadraticTimeMMD(precomputed_kernel, p_and_q, m); - mmd->set_num_null_samples(10); - sg_rand->set_seed(12345); - null_samples=mmd->sample_null(); - - // assert that results do not change - //SG_SPRINT("mean %f, var %f\n", CStatistics::mean(null_samples), - // CStatistics::variance(null_samples)); - EXPECT_LE(CMath::abs(mean-CStatistics::mean(null_samples)), 10E-8); - EXPECT_LE(CMath::abs(var-CStatistics::variance(null_samples)), 10E-8); - - SG_UNREF(mmd); - SG_UNREF(features_p); - SG_UNREF(features_q); - SG_UNREF(p_and_q); -} - -#ifdef HAVE_EIGEN3 -TEST(QuadraticTimeMMD,custom_kernel_vs_normal_kernel_DEPRECATED) -{ - // number of examples kept low in order to make things fast - index_t m=20; - index_t dim=2; - float64_t difference=0.5; - - // streaming data generator for mean shift distributions - CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim, 0); - CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim, 0); - - // stream some data from generator - CFeatures* feat_p=gen_p->get_streamed_features(m); - CFeatures* feat_q=gen_q->get_streamed_features(m); - - // set kernel a-priori. usually one would do some kernel selection. See - // other examples for this. - float64_t width=10; - CGaussianKernel* kernel=new CGaussianKernel(10, width); - - // create quadratic time mmd instance. Note that this constructor - // copies p and q and does not reference them - CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q); - - // set up for a precomputed custom kernel using merged features p_and_q - CGaussianKernel* kernel2=new CGaussianKernel(10, width); - CFeatures* p_and_q=mmd->get_p_and_q(); - kernel2->init(p_and_q, p_and_q); - CCustomKernel* precomputed=new CCustomKernel(kernel2); - CQuadraticTimeMMD* mmd2=new CQuadraticTimeMMD(precomputed, m); - SG_UNREF(p_and_q); - SG_UNREF(kernel2); - - // perform test: compute p-value and test if null-hypothesis is rejected for - // a test level of 0.05 - float64_t alpha=0.05; - - mmd->set_null_approximation_method(PERMUTATION); - mmd->set_statistic_type(BIASED_DEPRECATED); - mmd->set_num_null_samples(3); - mmd->set_num_eigenvalues_spectrum(3); - mmd->set_num_samples_spectrum(250); - - mmd2->set_null_approximation_method(PERMUTATION); - mmd2->set_statistic_type(BIASED_DEPRECATED); - mmd2->set_num_null_samples(3); - mmd2->set_num_eigenvalues_spectrum(3); - mmd2->set_num_samples_spectrum(250); - - // compute tpye I and II error using normal and precomputed kernel - index_t num_trials=3; - - SGVector inds(2*m); - inds.range_fill(); - - // use fixed seed - CMath::init_random(1); - for (index_t i=0; iset_seed(1); - - // first, we compute using normal kernel - p_and_q->add_subset(inds); - float64_t type_I_mmds=mmd->compute_statistic(); - mmd->set_null_approximation_method(PERMUTATION); - float64_t type_I_threshs_boot=mmd->compute_threshold(alpha); - mmd->set_null_approximation_method(MMD2_SPECTRUM_DEPRECATED); - float64_t type_I_threshs_spectrum=mmd->compute_threshold(alpha); - mmd->set_null_approximation_method(MMD2_GAMMA); - float64_t type_I_threshs_gamma=mmd->compute_threshold(alpha); - p_and_q->remove_subset(); - - float64_t type_II_mmds=mmd->compute_statistic(); - mmd->set_null_approximation_method(PERMUTATION); - float64_t type_II_threshs_boot=mmd->compute_threshold(alpha); - mmd->set_null_approximation_method(MMD2_SPECTRUM_DEPRECATED); - float64_t type_II_threshs_spectrum=mmd->compute_threshold(alpha); - mmd->set_null_approximation_method(MMD2_GAMMA); - float64_t type_II_threshs_gamma=mmd->compute_threshold(alpha); - - // now compute using precomputed custom kernel - - // setting seed for Gaussian samples used in spectrum approximation method - sg_rand->set_seed(1); - - precomputed->add_row_subset(inds); - precomputed->add_col_subset(inds); - float64_t type_I_mmds_pre=mmd2->compute_statistic(); - mmd2->set_null_approximation_method(PERMUTATION); - float64_t type_I_threshs_boot_pre=mmd2->compute_threshold(alpha); - mmd2->set_null_approximation_method(MMD2_SPECTRUM_DEPRECATED); - float64_t type_I_threshs_spectrum_pre=mmd2->compute_threshold(alpha); - mmd2->set_null_approximation_method(MMD2_GAMMA); - float64_t type_I_threshs_gamma_pre=mmd2->compute_threshold(alpha); - precomputed->remove_row_subset(); - precomputed->remove_col_subset(); - - float64_t type_II_mmds_pre=mmd2->compute_statistic(); - mmd2->set_null_approximation_method(PERMUTATION); - float64_t type_II_threshs_boot_pre=mmd2->compute_threshold(alpha); - mmd2->set_null_approximation_method(MMD2_SPECTRUM_DEPRECATED); - float64_t type_II_threshs_spectrum_pre=mmd2->compute_threshold(alpha); - mmd2->set_null_approximation_method(MMD2_GAMMA); - float64_t type_II_threshs_gamma_pre=mmd2->compute_threshold(alpha); - - // assert results from both - EXPECT_NEAR(type_I_mmds, type_I_mmds_pre, 1E-6); - EXPECT_NEAR(type_I_threshs_boot, type_I_threshs_boot_pre, 1E-6); - EXPECT_NEAR(type_I_threshs_spectrum, type_I_threshs_spectrum_pre, 1E-6); - EXPECT_NEAR(type_I_threshs_gamma, type_I_threshs_gamma_pre, 1E-6); - EXPECT_NEAR(type_II_mmds, type_II_mmds_pre, 1E-5); - EXPECT_NEAR(type_II_threshs_boot, type_II_threshs_boot_pre, 1E-6); - EXPECT_NEAR(type_II_threshs_spectrum, type_II_threshs_spectrum_pre, 1E-6); - EXPECT_NEAR(type_II_threshs_gamma, type_II_threshs_gamma_pre, 1E-6); - } - - // clean up - SG_UNREF(mmd); - SG_UNREF(mmd2); - SG_UNREF(gen_p); - SG_UNREF(gen_q); - - // convienience constructor of MMD was used, these were not referenced - SG_UNREF(feat_p); - SG_UNREF(feat_q); -} -#endif // HAVE_EIGEN3 - -*/