Browse files

Mean predictions in GaussianProcessRegression now solved using Cholesky

decomposition. Predicted Covariance Matrix is also accessible.
  • Loading branch information...
1 parent 1cfe695 commit 6f128a57d2ec91ba0185ab6a375934cf9a51553a puffin444 committed Apr 10, 2012
View
7 examples/undocumented/libshogun/regression_gaussian_process.cpp
@@ -48,8 +48,14 @@ int main(int argc, char** argv)
//Gaussian Process Regression with sigma = 1.
CGaussianProcessRegression regressor(1.0, kernel, features, labels);
+ regressor.train(features);
//Get mean predictions
CLabels* result = regressor.apply();
+ SG_REF(result);
+
+ SGMatrix<float64_t> cov = regressor.getCovarianceMatrix(features);
+
+ CMath::display_matrix(cov.matrix, cov.num_rows, cov.num_cols, "Covariance Matrix");
// output predictions
for (int32_t i=0; i<3; i++)
@@ -60,6 +66,7 @@ int main(int argc, char** argv)
SG_UNREF(features);
SG_UNREF(labels);
SG_UNREF(kernel);
+ cov.destroy_matrix();
#endif
View
2 examples/undocumented/python_modular/regression_gaussian_process_modular.py
@@ -24,7 +24,7 @@
labels=Labels(trainlab);
gp=GaussianProcessRegression(1.0, kernel, feats_train, labels);
-
+gp.train(feats_train);
out=gp.apply(feats_test).get_labels();
testerr=mean(sign(out)!=testlab)
print testerr
View
219 src/shogun/regression/GaussianProcessRegression.cpp
@@ -53,71 +53,29 @@ void CGaussianProcessRegression::init()
CLabels* CGaussianProcessRegression::mean_prediction(CFeatures* data)
{
- if(kernel == NULL) return NULL;
-
- kernel->init(features, features);
-
- //K(X_train, X_train)
- SGMatrix<float64_t> kernel_train_matrix = kernel->get_kernel_matrix();
-
+ if (!kernel)
+ SG_ERROR( "No kernel assigned!\n");
+ if (m_labels->get_num_labels() != features->get_num_vectors())
+ SG_ERROR("Number of training vectors does not match number of labels\n");
+ if (m_labels->get_num_labels() != m_alpha.vlen)
+ SG_ERROR("Machine not properly trained.\n");
+
kernel->init(data, features);
//K(X_test, X_train)
SGMatrix<float64_t> kernel_test_matrix = kernel->get_kernel_matrix();
-
- SGMatrix<float64_t> temp1(kernel_train_matrix.num_rows,
- kernel_train_matrix.num_cols);
-
- SGMatrix<float64_t> temp2(kernel_train_matrix.num_rows,
- kernel_train_matrix.num_cols);
-
- SGVector<float64_t> diagonal(temp1.num_rows);
- CMath::fill_vector(diagonal.vector, temp1.num_rows, 1.0);
-
-
- CMath::create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
- CMath::create_diagonal_matrix(temp2.matrix, diagonal.vector, temp2.num_rows);
-
+
SGVector< float64_t > result_vector(m_labels->get_num_labels());
- SGVector< float64_t > label_vector = m_labels->get_labels();
-
- /* We wish to calculate
- * K(X_test, X_train)*(K(X_train, X_train)+sigma^(2)*I)^-1 * labels
- * for mean predictions.
- */
-
- //Calculate first (K(X_train, X_train)+sigma*I)
- cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
- kernel_train_matrix.num_rows, temp2.num_cols, 1.0,
- kernel_train_matrix.matrix, kernel_train_matrix.num_cols,
- temp2.matrix, temp2.num_cols, m_sigma*m_sigma,
- temp1.matrix, temp1.num_cols);
-
- //Take inverse of (K(X_train, X_train)+sigma*I)
- CMath::inverse(temp1);
- //Then multiply K(X_test, X_train) by
- //(K(X_train, X_train) + sigma*I)^-1)
- cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
- kernel_test_matrix.num_rows, temp1.num_cols, 1.0,
- kernel_test_matrix.matrix, kernel_test_matrix.num_cols,
- temp1.matrix, temp1.num_cols, 0.0, temp2.matrix,
- temp2.num_cols);
-
- //Finally multiply result by labels to obtain mean predictions on training
- //examples
- cblas_dgemv(CblasColMajor, CblasNoTrans, temp1.num_rows,
- m_labels->get_num_labels(), 1.0, temp1.matrix,
- temp1.num_cols, label_vector.vector, 1, 0.0,
+ //Here we multiply K*^t by alpha to receive the mean predictions.
+ cblas_dgemv(CblasColMajor, CblasTrans, kernel_test_matrix.num_rows,
+ m_alpha.vlen, 1.0, kernel_test_matrix.matrix,
+ kernel_test_matrix.num_cols, m_alpha.vector, 1, 0.0,
result_vector.vector, 1);
CLabels* result = new CLabels(result_vector);
-
- kernel_train_matrix.destroy_matrix();
+
kernel_test_matrix.destroy_matrix();
- temp2.destroy_matrix();
- temp1.destroy_matrix();
- diagonal.destroy_vector();
return result;
}
@@ -136,7 +94,7 @@ CLabels* CGaussianProcessRegression::apply(CFeatures* data)
SG_ERROR("No features specified\n");
if (!data->has_property(FP_DOT))
SG_ERROR("Specified features are not of type CDotFeatures\n");
- if (m_labels->get_num_labels() != data->get_num_vectors())
+ if (m_labels->get_num_labels() != features->get_num_vectors())
SG_ERROR("Number of training vectors does not match number of labels\n");
if (data->get_feature_class() != C_SIMPLE)
SG_ERROR("Expected Simple Features\n");
@@ -156,16 +114,159 @@ float64_t CGaussianProcessRegression::apply(int32_t num)
return 0;
}
-/*
+
bool CGaussianProcessRegression::train_machine(CFeatures* data)
{
- return false;
-}*/
+ if (!data->has_property(FP_DOT))
+ SG_ERROR("Specified features are not of type CDotFeatures\n");
+ if (m_labels->get_num_labels() != data->get_num_vectors())
+ SG_ERROR("Number of training vectors does not match number of labels\n");
+ if (data->get_feature_class() != C_SIMPLE)
+ SG_ERROR("Expected Simple Features\n");
+ if (data->get_feature_type() != F_DREAL)
+ SG_ERROR("Expected Real Features\n");
+ if (!kernel)
+ SG_ERROR( "No kernel assigned!\n");
+
+ set_features((CDotFeatures*)data);
+
+ kernel->init(features, features);
+
+ //K(X_train, X_train)
+ SGMatrix<float64_t> kernel_train_matrix = kernel->get_kernel_matrix();
+
+ SGMatrix<float64_t> temp1(kernel_train_matrix.num_rows,
+ kernel_train_matrix.num_cols);
+
+ SGMatrix<float64_t> temp2(kernel_train_matrix.num_rows,
+ kernel_train_matrix.num_cols);
+
+ SGVector<float64_t> diagonal(temp1.num_rows);
+ CMath::fill_vector(diagonal.vector, temp1.num_rows, 1.0);
+
+ CMath::create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows);
+ CMath::create_diagonal_matrix(temp2.matrix, diagonal.vector, temp2.num_rows);
+
+ //Calculate first (K(X_train, X_train)+sigma*I)
+ cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper,
+ kernel_train_matrix.num_rows, temp2.num_cols, 1.0,
+ kernel_train_matrix.matrix, kernel_train_matrix.num_cols,
+ temp2.matrix, temp2.num_cols, m_sigma*m_sigma,
+ temp1.matrix, temp1.num_cols);
+
+ memcpy(temp2.matrix, temp1.matrix,
+ temp2.num_cols*temp2.num_rows*sizeof(float64_t));
+
+ //Get Lower triangle cholesky decomposition of K(X_train, X_train)+sigma*I)
+ clapack_dpotrf(CblasColMajor, CblasLower,
+ temp1.num_cols, temp1.matrix, temp1.num_cols);
+
+ m_L.destroy_matrix();
+
+ m_L = SGMatrix<float64_t>(temp1.num_rows, temp1.num_cols);
+ memcpy(m_L.matrix, temp1.matrix,
+ temp1.num_cols*temp1.num_rows*sizeof(float64_t));
+
+ SGVector<float64_t> label_vector = m_labels->get_labels();
+
+ m_alpha.destroy_vector();
+ m_alpha = SGVector<float64_t>(label_vector.vlen);
+ memcpy(m_alpha.vector, label_vector.vector,
+ label_vector.vlen*sizeof(float64_t));
+
+ //Solve (K(X_train, X_train)+sigma*I) alpha = labels for alpha.
+ clapack_dposv(CblasColMajor, CblasLower,
+ temp2.num_cols, 1, temp2.matrix, temp2.num_cols,
+ m_alpha.vector, temp2.num_cols);
+
+ temp1.destroy_matrix();
+ temp2.destroy_matrix();
+ kernel_train_matrix.destroy_matrix();
+ diagonal.destroy_vector();
+
+ return true;
+}
+
+SGMatrix<float64_t> CGaussianProcessRegression::getCovarianceMatrix(CFeatures* data)
+{
+ if (m_labels->get_num_labels() != features->get_num_vectors())
+ SG_ERROR("Number of training vectors does not match number of labels.\n");
+ if (!kernel)
+ SG_ERROR( "No kernel assigned!\n");
+ if (features->get_num_vectors() != m_L.num_rows)
+ SG_ERROR("Machine not properly trained.\n");
+
+ kernel->init(data, features);
+
+ //K(X_test, X_train)
+ SGMatrix<float64_t> kernel_test_matrix = kernel->get_kernel_matrix();
+
+ kernel->init(data, data);
+
+ //K(X_test, X_test)
+ SGMatrix<float64_t> kernel_star_matrix = kernel->get_kernel_matrix();
+
+ SGMatrix<float64_t> temp1(kernel_test_matrix.num_rows,
+ kernel_test_matrix.num_cols);
+
+ SGMatrix<float64_t> temp2(kernel_test_matrix.num_rows,
+ kernel_test_matrix.num_cols);
+
+ SGMatrix<float64_t> temp3(kernel_test_matrix.num_rows,
+ kernel_test_matrix.num_cols);
+
+ //Indices used to solve Lv=K(X_test, X_train) for v
+ SGVector< int32_t > ipiv(CMath::min(m_L.num_rows, m_L.num_cols));
+
+ int info;
+
+
+ memcpy(temp1.matrix, kernel_test_matrix.matrix,
+ kernel_test_matrix.num_cols*kernel_test_matrix.num_rows*sizeof(float64_t));
+
+ //Get indices used to solve Lv=K(X_test, X_train) for v
+ dgetrf_(&m_L.num_rows, &m_L.num_cols, m_L.matrix, &m_L.num_cols, ipiv.vector, &info);
+
+ //Solve Lv=K(X_test, X_train) for v
+ clapack_dgetrs(CblasColMajor, CblasNoTrans,
+ m_L.num_rows, kernel_test_matrix.num_cols, m_L.matrix, m_L.num_cols,
+ ipiv.vector, temp1.matrix, temp1.num_cols);
+
+ memcpy(temp2.matrix, temp1.matrix, temp1.num_cols*temp1.num_rows*sizeof(float64_t));
+
+ //Store v^t*v in temp3
+ cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, temp1.num_rows,
+ temp1.num_cols, temp1.num_cols, 1.0, temp1.matrix, temp1.num_cols,
+ temp2.matrix, temp2.num_cols, 0.0, temp3.matrix, temp3.num_cols);
+
+ //Set temp2 to identity matrix
+ SGVector<float64_t> diagonal(temp2.num_rows);
+ CMath::fill_vector(diagonal.vector, temp2.num_rows, 1.0);
+ CMath::create_diagonal_matrix(temp2.matrix, diagonal.vector, temp2.num_rows);
+
+ //Calculate Covariance Matrix = K(X_test, X_test) - v^t*v
+ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, kernel_star_matrix.num_rows,
+ kernel_star_matrix.num_cols, kernel_star_matrix.num_cols, 1.0,
+ kernel_star_matrix.matrix, kernel_star_matrix.num_cols,
+ temp2.matrix, temp2.num_cols, -1.0, temp3.matrix, temp3.num_cols);
+
+ temp1.destroy_matrix();
+ temp2.destroy_matrix();
+ ipiv.destroy_vector();
+ kernel_star_matrix.destroy_matrix();
+ kernel_test_matrix.destroy_matrix();
+ diagonal.destroy_vector();
+
+ return temp3;
+}
+
CGaussianProcessRegression::~CGaussianProcessRegression()
{
SG_UNREF(kernel);
SG_UNREF(features);
+ m_L.destroy_matrix();
+ m_alpha.destroy_vector();
}
void CGaussianProcessRegression::set_kernel(CKernel* k)
View
25 src/shogun/regression/GaussianProcessRegression.h
@@ -138,9 +138,23 @@ class CGaussianProcessRegression : public CMachine
return CT_GAUSSIANPROCESSREGRESSION;
}
+ /** get covariance matrix
+ *
+ * @return covariance matrix
+ */
+ SGMatrix<float64_t> getCovarianceMatrix(CFeatures* data);
+
/** @return object name */
inline virtual const char* get_name() const { return "GaussianProcessRegression"; }
+ protected:
+ /** train regression
+ *
+ * @param data training data
+ *
+ * @return whether training was successful
+ */
+ virtual bool train_machine(CFeatures* data=NULL);
private:
/** function for initialization*/
@@ -164,6 +178,17 @@ class CGaussianProcessRegression : public CMachine
/** kernel */
CKernel* kernel;
+ /** Lower triangle Cholesky decomposition of
+ * feature matrix
+ */
+ SGMatrix<float64_t> m_L;
+
+ /** Alpha used for calculation of mean predictions,
+ * solves the equation (K(train,train)+sigma^2*I) alpha = labels.
+ */
+ SGVector< float64_t > m_alpha;
+
+
};
}
#endif

0 comments on commit 6f128a5

Please sign in to comment.