removed SVD_QR for Gaussian, too unstable, replaced by ridge on diagonal

shogun-toolbox · Aug 2, 2013 · 13ec4b7 · 13ec4b7
1 parent 37bac96
commit 13ec4b7
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 308 deletions.
diff --git a/src/shogun/distributions/classical/GaussianDistribution.cpp b/src/shogun/distributions/classical/GaussianDistribution.cpp
@@ -22,8 +22,7 @@ CGaussianDistribution::CGaussianDistribution() : CProbabilityDistribution()
 }
 
 CGaussianDistribution::CGaussianDistribution(SGVector<float64_t> mean,
-		SGMatrix<float64_t> cov,
-		ECovarianceFactorization factorization, bool cov_is_factor) :
+		SGMatrix<float64_t> cov, bool cov_is_factor) :
 				CProbabilityDistribution(mean.vlen)
 {
 	REQUIRE(cov.num_rows==cov.num_cols, "Covariance must be square but is "
@@ -35,10 +34,19 @@ CGaussianDistribution::CGaussianDistribution(SGVector<float64_t> mean,
 	init();
 
 	m_mean=mean;
-	m_factorization=factorization;
 
 	if (!cov_is_factor)
-		compute_covariance_factorization(cov, factorization);
+	{
+		Map<MatrixXd> eigen_cov(cov.matrix, cov.num_rows, cov.num_cols);
+		m_L=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
+		Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
+
+		LLT<MatrixXd> llt(eigen_cov);
+		if (llt.info()==NumericalIssue)
+			SG_ERROR("Error computing Cholesky\n");
+
+		eigen_L=llt.matrixL();
+	}
 	else
 		m_L=cov;
 }
@@ -101,30 +109,11 @@ SGVector<float64_t> CGaussianDistribution::log_pdf(SGMatrix<float64_t> samples)
 
 	float64_t const_part=-0.5 * m_dimension * CMath::log(2 * CMath::PI);
 
-	/* log-determinant */
+	/* determinant is product of diagonal elements of triangular matrix */
 	float64_t log_det_part=0;
 	Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
-	switch (m_factorization)
-	{
-		case CF_CHOLESKY:
-		{
-			/* determinant is product of diagonal elements of triangular matrix */
-			VectorXd diag=eigen_L.diagonal();
-			log_det_part=-diag.array().log().sum();
-			break;
-		}
-		case CF_SVD_QR:
-		{
-			/* use QR for computing log-determinant, which abs(log-det R),
-			 * note that since covariance is psd, determinant is positive, so
-			 * absolute value here is fine (and necessary, since determinant
-			 * of R might be negative. */
-			Map<MatrixXd> eigen_R(m_R.matrix, m_R.num_rows, m_R.num_cols);
-			VectorXd diag=eigen_R.diagonal();
-			log_det_part=-0.5*diag.array().abs().log().sum();
-			break;
-		}
-	}
+	VectorXd diag=eigen_L.diagonal();
+	log_det_part=-diag.array().log().sum();
 
 	/* sample based part */
 	Map<MatrixXd> eigen_samples(samples.matrix, samples.num_rows,
@@ -142,27 +131,8 @@ SGVector<float64_t> CGaussianDistribution::log_pdf(SGMatrix<float64_t> samples)
 	}
 
 	/* solve the linear system based on factorization */
-	MatrixXd solved;
-	switch (m_factorization)
-	{
-		case CF_CHOLESKY:
-		{
-			/* use triangular solver */
-			solved=eigen_L.triangularView<Lower>().solve(eigen_centred);
-			solved=eigen_L.transpose().triangularView<Upper>().solve(solved);
-			break;
-		}
-		case CF_SVD_QR:
-		{
-			/* use orthogonality of Q and triangular solver */
-			Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
-			Map<MatrixXd> eigen_R(m_R.matrix, m_R.num_rows, m_R.num_cols);
-
-			solved=eigen_Q.transpose()*eigen_centred;
-			solved=eigen_R.triangularView<Upper>().solve(solved);
-			break;
-		}
-	}
+	MatrixXd solved=eigen_L.triangularView<Lower>().solve(eigen_centred);
+	solved=eigen_L.transpose().triangularView<Upper>().solve(solved);
 
 	/* one quadratic part x^T C^-1 x for each sample x */
 	SGVector<float64_t> result(num_samples);
@@ -187,61 +157,9 @@ SGVector<float64_t> CGaussianDistribution::log_pdf(SGMatrix<float64_t> samples)
 
 void CGaussianDistribution::init()
 {
-	m_factorization=CF_CHOLESKY;
-
 	SG_ADD(&m_mean, "mean", "Mean of the Gaussian.", MS_NOT_AVAILABLE);
 	SG_ADD(&m_L, "L", "Lower factor of covariance matrix, "
 			"depending on the factorization type.", MS_NOT_AVAILABLE);
-	SG_ADD(&m_Q, "Q", "Orthogonal Q factor of QR of covariance, if used.",
-			MS_NOT_AVAILABLE);
-	SG_ADD(&m_R, "R", "Triangular R factor of QR of covariance, if used.",
-			MS_NOT_AVAILABLE);
-	SG_ADD((machine_int_t*)&m_factorization, "factorization", "Type of the "
-			"factorization of the covariance matrix.", MS_NOT_AVAILABLE);
-}
-
-void CGaussianDistribution::compute_covariance_factorization(
-		SGMatrix<float64_t> cov, ECovarianceFactorization factorization)
-{
-	Map<MatrixXd> eigen_cov(cov.matrix, cov.num_rows, cov.num_cols);
-	m_L=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
-	Map<MatrixXd> eigen_factor(m_L.matrix, m_L.num_rows, m_L.num_cols);
-
-	switch (m_factorization)
-	{
-		case CF_CHOLESKY:
-		{
-			LLT<MatrixXd> llt(eigen_cov);
-			if (llt.info()==NumericalIssue)
-				SG_ERROR("Error computing Cholesky\n");
-
-			eigen_factor=llt.matrixL();
-			break;
-		}
-		case CF_SVD_QR:
-		{
-			JacobiSVD<MatrixXd> svd(eigen_cov, ComputeFullU);
-			MatrixXd U=svd.matrixU();
-			VectorXd s=svd.singularValues();
-
-			/* square root of covariance using all eigenvectors */
-			eigen_factor=U.array().rowwise()*s.transpose().array().sqrt();
-
-			/* QR factorization of covariance for log-pdf */
-			m_Q=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
-			m_R=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
-			Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
-			Map<MatrixXd> eigen_R(m_R.matrix, m_R.num_rows, m_R.num_cols);
-
-			ColPivHouseholderQR<MatrixXd> qr(eigen_cov);
-			eigen_Q=qr.householderQ();
-			eigen_R=qr.matrixQR().triangularView<Upper>();
-			break;
-		}
-		default:
-			SG_ERROR("Unknown factorization type: %d\n", m_factorization);
-			break;
-	}
 }
 
 #endif // HAVE_EIGEN3
diff --git a/src/shogun/distributions/classical/GaussianDistribution.h b/src/shogun/distributions/classical/GaussianDistribution.h
@@ -17,13 +17,6 @@
 namespace shogun
 {
 
-/** Different types of covariance factorizations. See CGaussianDistribution. */
-enum ECovarianceFactorization
-{
-	CF_CHOLESKY,
-	CF_SVD_QR
-};
-
 
 /** @brief Dense version of the well-known Gaussian probability distribution,
  * defined as
@@ -33,21 +26,9 @@ enum ECovarianceFactorization
  * \exp\left(-\frac{1}{2}(x-\mu)^T\Sigma^{-1}(x-\mu\right)
  * \f]
  *
- * The implementation offers various techniques for representing the covariance
- * matrix \f$\Sigma \f$, such as Cholesky factorisation, SVD-decomposition,
- * QR factorization, etc.
- *
- * All factorizations store a matrix \f$F\f$, such that the covariance can be
- * computed as \f$\Sigma=LL^T\f$.
- *
- * For Cholesky factorization, the lower factor \f$\Sigma=LL^T\f$ is computed
- * with a eigen3's LLT (classic Cholesky.
- * The factor \f$L\f$ can be used for both sampling and computing the log-pdf.
- *
- * For SVD factorization \f$\Sigma=USV^T\f$, the factor \f$U*\text{diag}(S)\f$
- * (column-wise product) is stored for sampling. For evaluating the
- * log-determinant of the log-pdf, a QR factorization of the covariance
- * \f$\Sigma=QR\f$ is used.
+ * The implementation represents the covariance matrix \f$\Sigma \f$, as
+ * Cholesky factorisation, such that the covariance can be computed as
+ * \f$\Sigma=LL^T\f$.
  */
 
 class CGaussianDistribution: public CProbabilityDistribution
@@ -62,13 +43,10 @@ class CGaussianDistribution: public CProbabilityDistribution
 	 *
 	 * @param mean mean of the Gaussian
 	 * @param cov covariance of the Gaussian, or covariance factor
-	 * @param factorization factorization type of covariance matrix (default is
-	 * Cholesky, others are for increased numerical stability)
 	 * @param cov_is_factor whether cov is a factor of the covariance or not
 	 * (default is false). If false, the factorization is explicitly computed
 	 */
 	CGaussianDistribution(SGVector<float64_t> mean, SGMatrix<float64_t> cov,
-			ECovarianceFactorization factorization=CF_CHOLESKY,
 			bool cov_is_factor=false);
 
 	/** Destructor */
@@ -96,12 +74,7 @@ class CGaussianDistribution: public CProbabilityDistribution
 	 *
 	 * where \f$d\f$ is the dimension of the Gaussian.
 	 * The method to compute the log-determinant is based on the factorization
-	 * of the covariance matrix. If a Cholesky based factorization is used, the
-	 * log-determinant is computed using the triangular factor. If an SVD
-	 * factorization is used for sampling, the log-pdf is computed using a QR
-	 * factorization (which is computed once).
-	 *
-	 * The inversion of the covariance is done using the factorization.
+	 * of the covariance matrix.
 	 *
 	 * @param samples samples to compute log-pdf of (column vectors)
 	 * @return vector with log-pdfs of given samples
@@ -119,30 +92,13 @@ class CGaussianDistribution: public CProbabilityDistribution
 	/** Initialses and registers parameters */
 	void init();
 
-	/** Computes and stores the factorization of the covariance matrix
-	 *
-	 * @param cov positive definite covariance matrix to compute factorization of
-	 * @param factorization the factorizaation type to be used
-	 */
-	void compute_covariance_factorization(SGMatrix<float64_t> cov,
-			ECovarianceFactorization factorization);
-
 protected:
 	/** Mean */
 	SGVector<float64_t> m_mean;
 
 	/** Lower factor of covariance matrix (depends on factorization type).
 	 * Covariance (approximation) is given by \f$\Sigma=LL^T\f$ */
 	SGMatrix<float64_t> m_L;
-
-	/** Orthogonal Q factor of \f$\Sigma=QR\f$ factorization of covariance */
-	SGMatrix<float64_t> m_Q;
-
-	/** Triangular R factor of \f$\Sigma=QR\f$ factorization of covariance */
-	SGMatrix<float64_t> m_R;
-
-	/** Type of the factorization of the covariance matrix */
-	ECovarianceFactorization m_factorization;
 };
 
 }

diff --git a/src/shogun/machine/gp/InferenceMethod.cpp b/src/shogun/machine/gp/InferenceMethod.cpp
@@ -93,14 +93,18 @@ void CInferenceMethod::check_members()
 }
 
 float64_t CInferenceMethod::get_log_ml_estimate(
-		int32_t num_importance_samples, ECovarianceFactorization factorization)
+		int32_t num_importance_samples, float64_t ridge_size)
 {
 	/* sample from Gaussian approximation to q(f|y) */
 	SGMatrix<float64_t> cov=get_posterior_approximation_covariance();
+
+	/* add ridge */
+	for (index_t i=0; i<cov.num_rows; ++i)
+		cov(i,i)+=ridge_size;
+
 	SGVector<float64_t> mean=get_posterior_approximation_mean();
 
-	CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov,
-			factorization);
+	CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
 	SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);
 
 	/* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
@@ -120,9 +124,12 @@ float64_t CInferenceMethod::get_log_ml_estimate(
 	for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
 		scaled_kernel.matrix[i]*=CMath::sq(m_scale);
 
+	/* add ridge */
+	for (index_t i=0; i<cov.num_rows; ++i)
+		scaled_kernel(i,i)+=ridge_size;
+
 	CGaussianDistribution* prior=new CGaussianDistribution(
-			m_mean->get_mean_vector(m_feature_matrix), scaled_kernel,
-			factorization);
+			m_mean->get_mean_vector(m_feature_matrix), scaled_kernel);
 	SGVector<float64_t> log_pdf_prior=prior->log_pdf(samples);
 	SG_UNREF(prior);
 	prior=NULL;

diff --git a/src/shogun/machine/gp/InferenceMethod.h b/src/shogun/machine/gp/InferenceMethod.h
@@ -20,7 +20,6 @@
 #include <shogun/machine/gp/LikelihoodModel.h>
 #include <shogun/machine/gp/MeanFunction.h>
 #include <shogun/evaluation/DifferentiableFunction.h>
-#include <shogun/distributions/classical/GaussianDistribution.h>
 
 namespace shogun
 {
@@ -342,14 +341,14 @@ class CInferenceMethod : public CDifferentiableFunction
 	 *
 	 * @param num_importance_samples the number of importance samples \f$n\f$
 	 * from \f$ q(f|y, \theta) \f$.
-	 * @param factorization type to factorize the Gaussian covariances. Default
-	 * is Cholesky but this might be changed to SVD/QR for greater numerical
-	 * stability.
+	 * @param ridge_size scalar that is added to the diagonal of the involved
+	 * Gaussian distribution's covariance of GP prior and posterior approximation
+	 * to stabilise things. Increase if Cholesky factorization fails.
 	 * @return unbiased estimate of the  log of the marginal likelihood
 	 * function \f$ log(p(y|\theta)) \f$
 	 */
 	float64_t get_log_ml_estimate(int32_t num_importance_samples=1,
-			ECovarianceFactorization factorization=CF_CHOLESKY);
+			float64_t ridge_size=1e-15);
 
 protected:
 	/** update alpha matrix */