Skip to content

Commit

Permalink
removed SVD_QR for Gaussian, too unstable, replaced by ridge on diagonal
Browse files Browse the repository at this point in the history
  • Loading branch information
karlnapf committed Aug 2, 2013
1 parent 37bac96 commit 13ec4b7
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 308 deletions.
116 changes: 17 additions & 99 deletions src/shogun/distributions/classical/GaussianDistribution.cpp
Expand Up @@ -22,8 +22,7 @@ CGaussianDistribution::CGaussianDistribution() : CProbabilityDistribution()
}

CGaussianDistribution::CGaussianDistribution(SGVector<float64_t> mean,
SGMatrix<float64_t> cov,
ECovarianceFactorization factorization, bool cov_is_factor) :
SGMatrix<float64_t> cov, bool cov_is_factor) :
CProbabilityDistribution(mean.vlen)
{
REQUIRE(cov.num_rows==cov.num_cols, "Covariance must be square but is "
Expand All @@ -35,10 +34,19 @@ CGaussianDistribution::CGaussianDistribution(SGVector<float64_t> mean,
init();

m_mean=mean;
m_factorization=factorization;

if (!cov_is_factor)
compute_covariance_factorization(cov, factorization);
{
Map<MatrixXd> eigen_cov(cov.matrix, cov.num_rows, cov.num_cols);
m_L=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);

LLT<MatrixXd> llt(eigen_cov);
if (llt.info()==NumericalIssue)
SG_ERROR("Error computing Cholesky\n");

eigen_L=llt.matrixL();
}
else
m_L=cov;
}
Expand Down Expand Up @@ -101,30 +109,11 @@ SGVector<float64_t> CGaussianDistribution::log_pdf(SGMatrix<float64_t> samples)

float64_t const_part=-0.5 * m_dimension * CMath::log(2 * CMath::PI);

/* log-determinant */
/* determinant is product of diagonal elements of triangular matrix */
float64_t log_det_part=0;
Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols);
switch (m_factorization)
{
case CF_CHOLESKY:
{
/* determinant is product of diagonal elements of triangular matrix */
VectorXd diag=eigen_L.diagonal();
log_det_part=-diag.array().log().sum();
break;
}
case CF_SVD_QR:
{
/* use QR for computing log-determinant, which abs(log-det R),
* note that since covariance is psd, determinant is positive, so
* absolute value here is fine (and necessary, since determinant
* of R might be negative. */
Map<MatrixXd> eigen_R(m_R.matrix, m_R.num_rows, m_R.num_cols);
VectorXd diag=eigen_R.diagonal();
log_det_part=-0.5*diag.array().abs().log().sum();
break;
}
}
VectorXd diag=eigen_L.diagonal();
log_det_part=-diag.array().log().sum();

/* sample based part */
Map<MatrixXd> eigen_samples(samples.matrix, samples.num_rows,
Expand All @@ -142,27 +131,8 @@ SGVector<float64_t> CGaussianDistribution::log_pdf(SGMatrix<float64_t> samples)
}

/* solve the linear system based on factorization */
MatrixXd solved;
switch (m_factorization)
{
case CF_CHOLESKY:
{
/* use triangular solver */
solved=eigen_L.triangularView<Lower>().solve(eigen_centred);
solved=eigen_L.transpose().triangularView<Upper>().solve(solved);
break;
}
case CF_SVD_QR:
{
/* use orthogonality of Q and triangular solver */
Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
Map<MatrixXd> eigen_R(m_R.matrix, m_R.num_rows, m_R.num_cols);

solved=eigen_Q.transpose()*eigen_centred;
solved=eigen_R.triangularView<Upper>().solve(solved);
break;
}
}
MatrixXd solved=eigen_L.triangularView<Lower>().solve(eigen_centred);
solved=eigen_L.transpose().triangularView<Upper>().solve(solved);

/* one quadratic part x^T C^-1 x for each sample x */
SGVector<float64_t> result(num_samples);
Expand All @@ -187,61 +157,9 @@ SGVector<float64_t> CGaussianDistribution::log_pdf(SGMatrix<float64_t> samples)

void CGaussianDistribution::init()
{
m_factorization=CF_CHOLESKY;

SG_ADD(&m_mean, "mean", "Mean of the Gaussian.", MS_NOT_AVAILABLE);
SG_ADD(&m_L, "L", "Lower factor of covariance matrix, "
"depending on the factorization type.", MS_NOT_AVAILABLE);
SG_ADD(&m_Q, "Q", "Orthogonal Q factor of QR of covariance, if used.",
MS_NOT_AVAILABLE);
SG_ADD(&m_R, "R", "Triangular R factor of QR of covariance, if used.",
MS_NOT_AVAILABLE);
SG_ADD((machine_int_t*)&m_factorization, "factorization", "Type of the "
"factorization of the covariance matrix.", MS_NOT_AVAILABLE);
}

void CGaussianDistribution::compute_covariance_factorization(
SGMatrix<float64_t> cov, ECovarianceFactorization factorization)
{
Map<MatrixXd> eigen_cov(cov.matrix, cov.num_rows, cov.num_cols);
m_L=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
Map<MatrixXd> eigen_factor(m_L.matrix, m_L.num_rows, m_L.num_cols);

switch (m_factorization)
{
case CF_CHOLESKY:
{
LLT<MatrixXd> llt(eigen_cov);
if (llt.info()==NumericalIssue)
SG_ERROR("Error computing Cholesky\n");

eigen_factor=llt.matrixL();
break;
}
case CF_SVD_QR:
{
JacobiSVD<MatrixXd> svd(eigen_cov, ComputeFullU);
MatrixXd U=svd.matrixU();
VectorXd s=svd.singularValues();

/* square root of covariance using all eigenvectors */
eigen_factor=U.array().rowwise()*s.transpose().array().sqrt();

/* QR factorization of covariance for log-pdf */
m_Q=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
m_R=SGMatrix<float64_t>(cov.num_rows, cov.num_cols);
Map<MatrixXd> eigen_Q(m_Q.matrix, m_Q.num_rows, m_Q.num_cols);
Map<MatrixXd> eigen_R(m_R.matrix, m_R.num_rows, m_R.num_cols);

ColPivHouseholderQR<MatrixXd> qr(eigen_cov);
eigen_Q=qr.householderQ();
eigen_R=qr.matrixQR().triangularView<Upper>();
break;
}
default:
SG_ERROR("Unknown factorization type: %d\n", m_factorization);
break;
}
}

#endif // HAVE_EIGEN3
52 changes: 4 additions & 48 deletions src/shogun/distributions/classical/GaussianDistribution.h
Expand Up @@ -17,13 +17,6 @@
namespace shogun
{

/** Different types of covariance factorizations. See CGaussianDistribution. */
enum ECovarianceFactorization
{
CF_CHOLESKY,
CF_SVD_QR
};


/** @brief Dense version of the well-known Gaussian probability distribution,
* defined as
Expand All @@ -33,21 +26,9 @@ enum ECovarianceFactorization
* \exp\left(-\frac{1}{2}(x-\mu)^T\Sigma^{-1}(x-\mu\right)
* \f]
*
* The implementation offers various techniques for representing the covariance
* matrix \f$\Sigma \f$, such as Cholesky factorisation, SVD-decomposition,
* QR factorization, etc.
*
* All factorizations store a matrix \f$F\f$, such that the covariance can be
* computed as \f$\Sigma=LL^T\f$.
*
* For Cholesky factorization, the lower factor \f$\Sigma=LL^T\f$ is computed
* with a eigen3's LLT (classic Cholesky.
* The factor \f$L\f$ can be used for both sampling and computing the log-pdf.
*
* For SVD factorization \f$\Sigma=USV^T\f$, the factor \f$U*\text{diag}(S)\f$
* (column-wise product) is stored for sampling. For evaluating the
* log-determinant of the log-pdf, a QR factorization of the covariance
* \f$\Sigma=QR\f$ is used.
* The implementation represents the covariance matrix \f$\Sigma \f$, as
* Cholesky factorisation, such that the covariance can be computed as
* \f$\Sigma=LL^T\f$.
*/

class CGaussianDistribution: public CProbabilityDistribution
Expand All @@ -62,13 +43,10 @@ class CGaussianDistribution: public CProbabilityDistribution
*
* @param mean mean of the Gaussian
* @param cov covariance of the Gaussian, or covariance factor
* @param factorization factorization type of covariance matrix (default is
* Cholesky, others are for increased numerical stability)
* @param cov_is_factor whether cov is a factor of the covariance or not
* (default is false). If false, the factorization is explicitly computed
*/
CGaussianDistribution(SGVector<float64_t> mean, SGMatrix<float64_t> cov,
ECovarianceFactorization factorization=CF_CHOLESKY,
bool cov_is_factor=false);

/** Destructor */
Expand Down Expand Up @@ -96,12 +74,7 @@ class CGaussianDistribution: public CProbabilityDistribution
*
* where \f$d\f$ is the dimension of the Gaussian.
* The method to compute the log-determinant is based on the factorization
* of the covariance matrix. If a Cholesky based factorization is used, the
* log-determinant is computed using the triangular factor. If an SVD
* factorization is used for sampling, the log-pdf is computed using a QR
* factorization (which is computed once).
*
* The inversion of the covariance is done using the factorization.
* of the covariance matrix.
*
* @param samples samples to compute log-pdf of (column vectors)
* @return vector with log-pdfs of given samples
Expand All @@ -119,30 +92,13 @@ class CGaussianDistribution: public CProbabilityDistribution
/** Initialses and registers parameters */
void init();

/** Computes and stores the factorization of the covariance matrix
*
* @param cov positive definite covariance matrix to compute factorization of
* @param factorization the factorizaation type to be used
*/
void compute_covariance_factorization(SGMatrix<float64_t> cov,
ECovarianceFactorization factorization);

protected:
/** Mean */
SGVector<float64_t> m_mean;

/** Lower factor of covariance matrix (depends on factorization type).
* Covariance (approximation) is given by \f$\Sigma=LL^T\f$ */
SGMatrix<float64_t> m_L;

/** Orthogonal Q factor of \f$\Sigma=QR\f$ factorization of covariance */
SGMatrix<float64_t> m_Q;

/** Triangular R factor of \f$\Sigma=QR\f$ factorization of covariance */
SGMatrix<float64_t> m_R;

/** Type of the factorization of the covariance matrix */
ECovarianceFactorization m_factorization;
};

}
Expand Down
17 changes: 12 additions & 5 deletions src/shogun/machine/gp/InferenceMethod.cpp
Expand Up @@ -93,14 +93,18 @@ void CInferenceMethod::check_members()
}

float64_t CInferenceMethod::get_log_ml_estimate(
int32_t num_importance_samples, ECovarianceFactorization factorization)
int32_t num_importance_samples, float64_t ridge_size)
{
/* sample from Gaussian approximation to q(f|y) */
SGMatrix<float64_t> cov=get_posterior_approximation_covariance();

/* add ridge */
for (index_t i=0; i<cov.num_rows; ++i)
cov(i,i)+=ridge_size;

SGVector<float64_t> mean=get_posterior_approximation_mean();

CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov,
factorization);
CGaussianDistribution* post_approx=new CGaussianDistribution(mean, cov);
SGMatrix<float64_t> samples=post_approx->sample(num_importance_samples);

/* evaluate q(f^i|y), p(f^i|\theta), p(y|f^i), i.e.,
Expand All @@ -120,9 +124,12 @@ float64_t CInferenceMethod::get_log_ml_estimate(
for (index_t i=0; i<m_ktrtr.num_rows*m_ktrtr.num_cols; ++i)
scaled_kernel.matrix[i]*=CMath::sq(m_scale);

/* add ridge */
for (index_t i=0; i<cov.num_rows; ++i)
scaled_kernel(i,i)+=ridge_size;

CGaussianDistribution* prior=new CGaussianDistribution(
m_mean->get_mean_vector(m_feature_matrix), scaled_kernel,
factorization);
m_mean->get_mean_vector(m_feature_matrix), scaled_kernel);
SGVector<float64_t> log_pdf_prior=prior->log_pdf(samples);
SG_UNREF(prior);
prior=NULL;
Expand Down
9 changes: 4 additions & 5 deletions src/shogun/machine/gp/InferenceMethod.h
Expand Up @@ -20,7 +20,6 @@
#include <shogun/machine/gp/LikelihoodModel.h>
#include <shogun/machine/gp/MeanFunction.h>
#include <shogun/evaluation/DifferentiableFunction.h>
#include <shogun/distributions/classical/GaussianDistribution.h>

namespace shogun
{
Expand Down Expand Up @@ -342,14 +341,14 @@ class CInferenceMethod : public CDifferentiableFunction
*
* @param num_importance_samples the number of importance samples \f$n\f$
* from \f$ q(f|y, \theta) \f$.
* @param factorization type to factorize the Gaussian covariances. Default
* is Cholesky but this might be changed to SVD/QR for greater numerical
* stability.
* @param ridge_size scalar that is added to the diagonal of the involved
* Gaussian distribution's covariance of GP prior and posterior approximation
* to stabilise things. Increase if Cholesky factorization fails.
* @return unbiased estimate of the log of the marginal likelihood
* function \f$ log(p(y|\theta)) \f$
*/
float64_t get_log_ml_estimate(int32_t num_importance_samples=1,
ECovarianceFactorization factorization=CF_CHOLESKY);
float64_t ridge_size=1e-15);

protected:
/** update alpha matrix */
Expand Down

0 comments on commit 13ec4b7

Please sign in to comment.