From e1aa552774138534e3e27968051e6660f510f492 Mon Sep 17 00:00:00 2001 From: lambday Date: Wed, 5 Mar 2014 18:39:12 +0530 Subject: [PATCH] minor documentation fix --- .../statistics/mmd_two_sample_testing.ipynb | 8 ++++---- src/shogun/statistics/LinearTimeMMD.cpp | 6 ------ src/shogun/statistics/LinearTimeMMD.h | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb b/doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb index 3f218bbd3c8..b0a098fdfe4 100644 --- a/doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb +++ b/doc/ipython-notebooks/statistics/mmd_two_sample_testing.ipynb @@ -91,7 +91,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Shogun implements statistical testing in the abstract class CTestStatistic. All implemented methods will work with this interface at their most basic level. This class offers methods to\n", + "Shogun implements statistical testing in the abstract class CHypothesisTest. All implemented methods will work with this interface at their most basic level. This class offers methods to\n", "\n", " * compute the implemented test statistic,\n", " * compute p-values for a given value of the test statistic,\n", @@ -99,7 +99,7 @@ " * sampling the null distribution, i.e. perform the permutation test or bootstrappig of the null-distribution, and\n", " * performing a full two-sample test, and either returning a p-value or a binary rejection decision. This method is most useful in practice. Note that, depending on the used test statistic, it might be faster to call this than to compute threshold and test statistic seperately with the above methods.\n", " \n", - "There are special subclasses for testing two distributions against each other (CTwoDistributionsTestStatistic), kernel two-sample testing (CKernelTwoSampleTestStatistic), and kernel independence testing (CKernelIndependenceTestStatistic), which however mostly differ in internals and constructors." + "There are special subclasses for testing two distributions against each other (CTwoSampleTest, CIndependenceTest), kernel two-sample testing (CKernelTwoSampleTest), and kernel independence testing (CKernelIndependenceTest), which however mostly differ in internals and constructors." ] }, { @@ -295,7 +295,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Any sub-class of CTwoDistributionsTestStatistic can compute approximate the null distribution using permutation/bootstrapping. This way always is guaranteed to produce constitent results, however, it might take a long time as for each sample of the null distribution, the test statistic has to be computed for a different permutation of the data. Note that each of the below calls samples from the null distribution. It is wise to choose one method in practice. Also not that we set the number of samples from the null distribution to a low value to reduce runtume. Choose larger in practice, it is in fact good to plot the samples." + "Any sub-class of CHypothesisTest can compute approximate the null distribution using permutation/bootstrapping. This way always is guaranteed to produce constitent results, however, it might take a long time as for each sample of the null distribution, the test statistic has to be computed for a different permutation of the data. Note that each of the below calls samples from the null distribution. It is wise to choose one method in practice. Also not that we set the number of samples from the null distribution to a low value to reduce runtume. Choose larger in practice, it is in fact good to plot the samples." ] }, { @@ -380,7 +380,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now let us visualise distribution of MMD statistic under $H_0:p=q$ and $H_A:p\\neq q$. Sample both null and alternative distribution for that. Use the interface of CTwoDistributionsTestStatistic to sample from the null distribution (permutations, re-computing of test statistic is done internally). For the alternative distribution, compute the test statistic for a new sample set of $X$ and $Y$ in a loop. Note that the latter is expensive, as the kernel cannot be precomputed, and infinite data is needed. Though it is not needed in practice but only for illustrational purposes here." + "Now let us visualise distribution of MMD statistic under $H_0:p=q$ and $H_A:p\\neq q$. Sample both null and alternative distribution for that. Use the interface of CTwoSampleTest to sample from the null distribution (permutations, re-computing of test statistic is done internally). For the alternative distribution, compute the test statistic for a new sample set of $X$ and $Y$ in a loop. Note that the latter is expensive, as the kernel cannot be precomputed, and infinite data is needed. Though it is not needed in practice but only for illustrational purposes here." ] }, { diff --git a/src/shogun/statistics/LinearTimeMMD.cpp b/src/shogun/statistics/LinearTimeMMD.cpp index 10f2c009417..6bcefa5e9d2 100644 --- a/src/shogun/statistics/LinearTimeMMD.cpp +++ b/src/shogun/statistics/LinearTimeMMD.cpp @@ -190,18 +190,14 @@ void CLinearTimeMMD::compute_statistic_and_variance( * only once */ CKernel* kernel=m_kernel; if (multiple_kernels) - { SG_DEBUG("using multiple kernels\n"); - } /* iterate through all kernels for this data */ for (index_t i=0; iget_kernel(i); - } /* compute kernel matrix diagonals */ kernel->init(p1, p2); @@ -235,9 +231,7 @@ void CLinearTimeMMD::compute_statistic_and_variance( } if (multiple_kernels) - { SG_UNREF(kernel); - } } /* clean up streamed data */ diff --git a/src/shogun/statistics/LinearTimeMMD.h b/src/shogun/statistics/LinearTimeMMD.h index cf51fab6708..59f9260457e 100644 --- a/src/shogun/statistics/LinearTimeMMD.h +++ b/src/shogun/statistics/LinearTimeMMD.h @@ -31,7 +31,7 @@ class CFeatures; * The MMD is the distance of two probability distributions \f$p\f$ and \f$q\f$ * in a RKHS. * \f[ - * \text{MMD}}[\mathcal{F},p,q]^2=\textbf{E}_{x,x'}\left[ k(x,x')\right]- + * \text{MMD}[\mathcal{F},p,q]^2=\textbf{E}_{x,x'}\left[ k(x,x')\right]- * 2\textbf{E}_{x,y}\left[ k(x,y)\right] * +\textbf{E}_{y,y'}\left[ k(y,y')\right]=||\mu_p - \mu_q||^2_\mathcal{F} * \f] @@ -256,7 +256,7 @@ class CLinearTimeMMD: public CKernelTwoSampleTest /** Number of examples processed at once, i.e. in one burst */ index_t m_blocksize; - /** If this is true, samples will be mixed between p and q ind any method + /** If this is true, samples will be mixed between p and q in any method * that computes the statistic */ bool m_simulate_h0; };