diff --git a/albatross/core/distribution.h b/albatross/core/distribution.h index 40edc7ca..db5b4651 100644 --- a/albatross/core/distribution.h +++ b/albatross/core/distribution.h @@ -85,10 +85,18 @@ template struct Distribution { } }; +// A JointDistribution has a dense covariance matrix, which +// contains the covariance between each variable and all others. +using JointDistribution = Distribution; + +// We use a wrapper around DiagonalMatrix in order to make +// the resulting distribution serializable using DiagonalMatrixXd = Eigen::SerializableDiagonalMatrix; -using DenseDistribution = Distribution; -using DiagonalDistribution = Distribution; +// A MarginalDistribution has only a digaonal covariance +// matrix, so in turn only describes the variance of each +// variable independent of all others. +using MarginalDistribution = Distribution; template Distribution subset(const std::vector &indices, diff --git a/albatross/core/model.h b/albatross/core/model.h index f40d4328..3fc7d877 100644 --- a/albatross/core/model.h +++ b/albatross/core/model.h @@ -24,9 +24,6 @@ namespace albatross { -using TargetDistribution = DiagonalDistribution; -using PredictDistribution = DenseDistribution; - /* * A RegressionDataset holds two vectors of data, the features * where a single feature can be any class that contains the information used @@ -36,12 +33,12 @@ using PredictDistribution = DenseDistribution; */ template struct RegressionDataset { std::vector features; - TargetDistribution targets; + MarginalDistribution targets; RegressionDataset(){}; RegressionDataset(const std::vector &features_, - const TargetDistribution &targets_) + const MarginalDistribution &targets_) : features(features_), targets(targets_) { // If the two inputs aren't the same size they clearly aren't // consistent. @@ -51,7 +48,7 @@ template struct RegressionDataset { RegressionDataset(const std::vector &features_, const Eigen::VectorXd &targets_) - : RegressionDataset(features_, TargetDistribution(targets_)) {} + : RegressionDataset(features_, MarginalDistribution(targets_)) {} template typename std::enable_if::value, @@ -127,7 +124,7 @@ class RegressionModel : public ParameterHandlingMixin { * predict. */ void fit(const std::vector &features, - const TargetDistribution &targets) { + const MarginalDistribution &targets) { assert(features.size() > 0); assert(static_cast(features.size()) == static_cast(targets.size())); @@ -140,7 +137,7 @@ class RegressionModel : public ParameterHandlingMixin { */ void fit(const std::vector &features, const Eigen::VectorXd &targets) { - fit(features, TargetDistribution(targets)); + fit(features, MarginalDistribution(targets)); } /* @@ -155,28 +152,49 @@ class RegressionModel : public ParameterHandlingMixin { * and makes simple checks to confirm the implementation is returning * properly sized Distribution. */ - PredictDistribution predict(const std::vector &features) const { + JointDistribution predict(const std::vector &features) const { assert(has_been_fit()); - PredictDistribution preds = predict_(features); + JointDistribution preds = predict_(features); assert(static_cast(preds.mean.size()) == static_cast(features.size())); return preds; } - PredictDistribution predict(const FeatureType &feature) const { + JointDistribution predict(const FeatureType &feature) const { std::vector features = {feature}; return predict(features); } + MarginalDistribution + predict_marginal(const std::vector &features) const { + assert(has_been_fit()); + MarginalDistribution preds = predict_marginal_(features); + assert(static_cast(preds.mean.size()) == + static_cast(features.size())); + return preds; + } + + Eigen::VectorXd predict_mean(const std::vector &features) const { + assert(has_been_fit()); + Eigen::VectorXd preds = predict_mean_(features); + assert(static_cast(preds.size()) == static_cast(features.size())); + return preds; + } + + double predict_mean(const FeatureType &feature) const { + std::vector features = {feature}; + return predict_mean(features)[0]; + } + /* * Computes predictions for the test features given set of training * features and targets. In the general case this is simply a call to fit, * follwed by predict but overriding this method may speed up computation for * some models. */ - PredictDistribution + JointDistribution fit_and_predict(const std::vector &train_features, - const TargetDistribution &train_targets, + const MarginalDistribution &train_targets, const std::vector &test_features) { // Fit using the training data, then predict with the test. fit(train_features, train_targets); @@ -187,7 +205,7 @@ class RegressionModel : public ParameterHandlingMixin { * A convenience wrapper around fit_and_predict which uses the entries * in a RegressionFold struct */ - PredictDistribution fit_and_predict(const RegressionFold &fold) { + JointDistribution fit_and_predict(const RegressionFold &fold) { return fit_and_predict(fold.train.features, fold.train.targets, fold.test.features); } @@ -228,11 +246,29 @@ class RegressionModel : public ParameterHandlingMixin { protected: virtual void fit_(const std::vector &features, - const TargetDistribution &targets) = 0; + const MarginalDistribution &targets) = 0; - virtual PredictDistribution + virtual JointDistribution predict_(const std::vector &features) const = 0; + virtual MarginalDistribution + predict_marginal_(const std::vector &features) const { + std::cout << "WARNING: A marginal prediction is being made, but in a " + "horribly inefficient way."; + const auto full_distribution = predict_(features); + return MarginalDistribution( + full_distribution.mean, + full_distribution.covariance.diagonal().asDiagonal()); + } + + virtual Eigen::VectorXd + predict_mean_(const std::vector &features) const { + std::cout << "WARNING: A mean prediction is being made, but in a horribly " + "inefficient way."; + const auto full_distribution = predict_(features); + return full_distribution.mean; + } + bool has_been_fit_; }; diff --git a/albatross/core/model_adapter.h b/albatross/core/model_adapter.h index f60ba2dc..aee7de4c 100644 --- a/albatross/core/model_adapter.h +++ b/albatross/core/model_adapter.h @@ -119,7 +119,7 @@ class AdaptedRegressionModel protected: void fit_(const std::vector &features, - const TargetDistribution &targets) override { + const MarginalDistribution &targets) override { this->sub_model_.fit(convert_features(features), targets); } @@ -130,18 +130,28 @@ class AdaptedRegressionModel */ fit_type_if_serializable serializable_fit_(const std::vector &features, - const TargetDistribution &targets) const override { + const MarginalDistribution &targets) const override { assert(false && "serializable_fit_ for an adapted model should never be called"); typename fit_type_or_void::type dummy; return dummy; } - PredictDistribution + JointDistribution predict_(const std::vector &features) const override { return sub_model_.predict(convert_features(features)); } + virtual MarginalDistribution + predict_marginal_(const std::vector &features) const override { + return sub_model_.predict_marginal(convert_features(features)); + } + + virtual Eigen::VectorXd + predict_mean_(const std::vector &features) const override { + return sub_model_.predict_mean(convert_features(features)); + } + const std::vector convert_features(const std::vector &parent_features) const { std::vector converted; diff --git a/albatross/core/serialize.h b/albatross/core/serialize.h index 3e5e3ef1..bb96ef05 100644 --- a/albatross/core/serialize.h +++ b/albatross/core/serialize.h @@ -89,13 +89,13 @@ class SerializableRegressionModel : public RegressionModel { protected: void fit_(const std::vector &features, - const TargetDistribution &targets) { + const MarginalDistribution &targets) { model_fit_ = serializable_fit_(features, targets); } virtual ModelFit serializable_fit_(const std::vector &features, - const TargetDistribution &targets) const = 0; + const MarginalDistribution &targets) const = 0; ModelFit model_fit_; }; diff --git a/albatross/crossvalidation.h b/albatross/crossvalidation.h index ac435c7f..3ffcaa75 100644 --- a/albatross/crossvalidation.h +++ b/albatross/crossvalidation.h @@ -26,7 +26,7 @@ namespace albatross { * the quality of the prediction. */ using EvaluationMetric = std::function; + const JointDistribution &prediction, const MarginalDistribution &targets)>; inline FoldIndices get_train_indices(const FoldIndices &test_indices, const int n) { @@ -68,11 +68,11 @@ folds_from_fold_indexer(const RegressionDataset &dataset, std::vector train_features = subset(train_indices, dataset.features); - TargetDistribution train_targets = subset(train_indices, dataset.targets); + MarginalDistribution train_targets = subset(train_indices, dataset.targets); std::vector test_features = subset(test_indices, dataset.features); - TargetDistribution test_targets = subset(test_indices, dataset.targets); + MarginalDistribution test_targets = subset(test_indices, dataset.targets); assert(train_features.size() == train_targets.size()); assert(test_features.size() == test_targets.size()); @@ -151,17 +151,17 @@ static inline std::vector> leave_one_group_out( } /* - * Computes a PredictDistribution for each fold in set of cross validation - * folds. The resulting vector of PredictDistributions can then be used + * Computes a JointDistribution for each fold in set of cross validation + * folds. The resulting vector of JointDistributions can then be used * for things like computing an EvaluationMetric for each fold, or assembling * all the predictions into a single cross validated PredictionDistribution. */ template -static inline std::vector cross_validated_predictions( +static inline std::vector cross_validated_predictions( const std::vector> &folds, RegressionModel *model) { // Iteratively make predictions and assemble the output vector - std::vector predictions; + std::vector predictions; for (std::size_t i = 0; i < folds.size(); i++) { predictions.push_back(model->fit_and_predict( folds[i].train_dataset.features, folds[i].train_dataset.targets, @@ -178,7 +178,7 @@ template static inline Eigen::VectorXd compute_scores(const EvaluationMetric &metric, const std::vector> &folds, - const std::vector &predictions) { + const std::vector &predictions) { // Create a vector of metrics, one for each fold. Eigen::VectorXd metrics(static_cast(folds.size())); // Loop over each fold, making predictions then evaluating them @@ -200,7 +200,7 @@ cross_validated_scores(const EvaluationMetric &metric, const std::vector> &folds, RegressionModel *model) { // Create a vector of predictions. - std::vector predictions = + std::vector predictions = cross_validated_predictions(folds, model); return compute_scores(metric, folds, predictions); } @@ -216,13 +216,13 @@ cross_validated_scores(const EvaluationMetric &metric, * unknown. */ template -static inline PredictDistribution +static inline JointDistribution cross_validated_predict(const std::vector> &folds, RegressionModel *model) { // Get the cross validated predictions, note however that // depending on the type of folds, these predictions may // be shuffled. - const std::vector predictions = + const std::vector predictions = cross_validated_predictions(folds, model); // Create a new prediction mean that will eventually contain // the ordered concatenation of each fold's predictions. @@ -240,7 +240,7 @@ cross_validated_predict(const std::vector> &folds, pred.mean[i]; } } - return PredictDistribution(mean); + return JointDistribution(mean); } } // namespace albatross diff --git a/albatross/evaluate.h b/albatross/evaluate.h index 10af6a02..db807e6e 100644 --- a/albatross/evaluate.h +++ b/albatross/evaluate.h @@ -105,9 +105,8 @@ negative_log_likelihood(const Eigen::VectorXd &deviation, */ namespace evaluation_metrics { -static inline double -root_mean_square_error(const PredictDistribution &prediction, - const TargetDistribution &truth) { +static inline double root_mean_square_error(const JointDistribution &prediction, + const MarginalDistribution &truth) { const Eigen::VectorXd error = prediction.mean - truth.mean; double mse = error.dot(error) / static_cast(error.size()); return sqrt(mse); @@ -117,8 +116,8 @@ root_mean_square_error(const PredictDistribution &prediction, * Takes output from a model (PredictionDistribution) * and the corresponding truth and uses them to compute the stddev. */ -static inline double standard_deviation(const PredictDistribution &prediction, - const TargetDistribution &truth) { +static inline double standard_deviation(const JointDistribution &prediction, + const MarginalDistribution &truth) { Eigen::VectorXd error = prediction.mean - truth.mean; const auto n_elements = static_cast(error.size()); const double mean_error = error.sum() / n_elements; @@ -131,8 +130,8 @@ static inline double standard_deviation(const PredictDistribution &prediction, * distribution is multivariate normal. */ static inline double -negative_log_likelihood(const PredictDistribution &prediction, - const TargetDistribution &truth) { +negative_log_likelihood(const JointDistribution &prediction, + const MarginalDistribution &truth) { const Eigen::VectorXd mean = prediction.mean - truth.mean; Eigen::MatrixXd covariance(prediction.covariance); if (truth.has_covariance()) { diff --git a/albatross/models/gp.h b/albatross/models/gp.h index 184cd401..1012c0a7 100644 --- a/albatross/models/gp.h +++ b/albatross/models/gp.h @@ -25,7 +25,7 @@ namespace albatross { -using InspectionDistribution = PredictDistribution; +using InspectionDistribution = JointDistribution; template struct GaussianProcessFit { std::vector train_features; @@ -127,8 +127,9 @@ class GaussianProcessRegression } protected: - FitType serializable_fit_(const std::vector &features, - const TargetDistribution &targets) const override { + FitType + serializable_fit_(const std::vector &features, + const MarginalDistribution &targets) const override { Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, features); FitType model_fit; model_fit.train_features = features; @@ -141,17 +142,42 @@ class GaussianProcessRegression return model_fit; } - PredictDistribution + JointDistribution predict_(const std::vector &features) const override { const auto cross_cov = asymmetric_covariance( covariance_function_, features, this->model_fit_.train_features); - // Then we can use the information vector to determine the posterior const Eigen::VectorXd pred = cross_cov * this->model_fit_.information; Eigen::MatrixXd pred_cov = symmetric_covariance(covariance_function_, features); auto ldlt = this->model_fit_.train_ldlt; pred_cov -= cross_cov * ldlt.solve(cross_cov.transpose()); - return PredictDistribution(pred, pred_cov); + return JointDistribution(pred, pred_cov); + } + + virtual MarginalDistribution + predict_marginal_(const std::vector &features) const override { + const auto cross_cov = asymmetric_covariance( + covariance_function_, features, this->model_fit_.train_features); + const Eigen::VectorXd pred = cross_cov * this->model_fit_.information; + // Here we efficiently only compute the diagonal of the posterior + // covariance matrix. + auto ldlt = this->model_fit_.train_ldlt; + Eigen::MatrixXd explained = ldlt.solve(cross_cov.transpose()); + Eigen::VectorXd marginal_variance = + -explained.cwiseProduct(cross_cov.transpose()).array().colwise().sum(); + for (Eigen::Index i = 0; i < pred.size(); i++) { + marginal_variance[i] += covariance_function_(features[i], features[i]); + } + + return MarginalDistribution(pred, marginal_variance.asDiagonal()); + } + + virtual Eigen::VectorXd + predict_mean_(const std::vector &features) const override { + const auto cross_cov = asymmetric_covariance( + covariance_function_, features, this->model_fit_.train_features); + const Eigen::VectorXd pred = cross_cov * this->model_fit_.information; + return pred; } private: diff --git a/albatross/models/least_squares.h b/albatross/models/least_squares.h index 7bbc8583..b60ebf03 100644 --- a/albatross/models/least_squares.h +++ b/albatross/models/least_squares.h @@ -51,7 +51,7 @@ class LeastSquaresRegression LeastSquaresFit serializable_fit_(const std::vector &features, - const TargetDistribution &targets) const override { + const MarginalDistribution &targets) const override { // The way this is currently implemented we assume all targets have the same // variance (or zero variance). assert(!targets.has_covariance()); @@ -68,7 +68,7 @@ class LeastSquaresRegression } protected: - PredictDistribution + JointDistribution predict_(const std::vector &features) const override { int n = static_cast(features.size()); Eigen::VectorXd predictions(n); @@ -77,7 +77,7 @@ class LeastSquaresRegression features[static_cast(i)].dot(this->model_fit_.coefs); } - return PredictDistribution(predictions); + return JointDistribution(predictions); } /* diff --git a/tests/test_core_distribution.cc b/tests/test_core_distribution.cc index d1b44a83..d37ba4fa 100644 --- a/tests/test_core_distribution.cc +++ b/tests/test_core_distribution.cc @@ -76,7 +76,7 @@ void expect_subset_equal(const Eigen::DiagonalMatrix &original, template class PolymorphicDistributionTest : public ::testing::Test {}; -typedef ::testing::Types +typedef ::testing::Types DistributionsToTest; TYPED_TEST_CASE(PolymorphicDistributionTest, DistributionsToTest); diff --git a/tests/test_core_model.cc b/tests/test_core_model.cc index 5a5b7262..8ae0e1b1 100644 --- a/tests/test_core_model.cc +++ b/tests/test_core_model.cc @@ -25,7 +25,7 @@ TEST(test_core_model, test_fit_predict) { MockModel m; m.fit(dataset); // We should be able to perfectly predict in this case. - PredictDistribution predictions = m.predict(dataset.features); + JointDistribution predictions = m.predict(dataset.features); EXPECT_LT((predictions.mean - dataset.targets.mean).norm(), 1e-10); } diff --git a/tests/test_evaluate.cc b/tests/test_evaluate.cc index 9216b821..c5ca6e5b 100644 --- a/tests/test_evaluate.cc +++ b/tests/test_evaluate.cc @@ -51,7 +51,7 @@ TEST(test_evaluate, test_negative_log_likelihood) { } TEST_F(LinearRegressionTest, test_leave_one_out) { - PredictDistribution preds = model_ptr_->fit_and_predict( + JointDistribution preds = model_ptr_->fit_and_predict( dataset_.features, dataset_.targets, dataset_.features); double in_sample_rmse = root_mean_square_error(preds, dataset_.targets); const auto folds = leave_one_out(dataset_); @@ -90,7 +90,7 @@ bool is_monotonic_increasing(Eigen::VectorXd &x) { TEST_F(LinearRegressionTest, test_cross_validated_predict) { const auto folds = leave_one_group_out(dataset_, group_by_interval); - PredictDistribution preds = cross_validated_predict(folds, model_ptr_.get()); + JointDistribution preds = cross_validated_predict(folds, model_ptr_.get()); // Make sure the group cross validation resulted in folds that // are out of order diff --git a/tests/test_models.cc b/tests/test_models.cc index d5a3ec5b..95eda9c3 100644 --- a/tests/test_models.cc +++ b/tests/test_models.cc @@ -90,4 +90,22 @@ TEST(test_models, test_with_target_distribution) { EXPECT_LE(scores.mean(), scores_without_variance.mean()); } + +TEST(test_models, test_predict_variants) { + auto dataset = make_heteroscedastic_toy_linear_data(); + + auto model = MakeGaussianProcess().create(); + model->fit(dataset); + const auto joint_predictions = model->predict(dataset.features); + const auto marginal_predictions = model->predict_marginal(dataset.features); + const auto mean_predictions = model->predict_mean(dataset.features); + + for (Eigen::Index i = 0; i < joint_predictions.mean.size(); i++) { + EXPECT_NEAR(joint_predictions.mean[i], mean_predictions[i], 1e-6); + EXPECT_NEAR(joint_predictions.mean[i], marginal_predictions.mean[i], 1e-6); + EXPECT_NEAR(joint_predictions.covariance(i, i), + marginal_predictions.covariance.diagonal()[i], 1e-6); + } +} + } // namespace albatross diff --git a/tests/test_serialize.cc b/tests/test_serialize.cc index 417f1024..9566e080 100644 --- a/tests/test_serialize.cc +++ b/tests/test_serialize.cc @@ -99,36 +99,36 @@ struct EigenMatrixXd : public SerializableType { } }; -struct FullDenseDistribution : public SerializableType { - DenseDistribution create() const override { +struct FullJointDistribution : public SerializableType { + JointDistribution create() const override { Eigen::MatrixXd cov(3, 3); cov << 1., 2., 3., 4., 5., 6., 7, 8, 9; Eigen::VectorXd mean = Eigen::VectorXd::Ones(3); - return DenseDistribution(mean, cov); + return JointDistribution(mean, cov); } }; -struct MeanOnlyDenseDistribution : public SerializableType { - DenseDistribution create() const override { +struct MeanOnlyJointDistribution : public SerializableType { + JointDistribution create() const override { Eigen::MatrixXd mean = Eigen::VectorXd::Ones(3); - return DenseDistribution(mean); + return JointDistribution(mean); } }; -struct FullDiagonalDistribution - : public SerializableType { - DiagonalDistribution create() const override { +struct FullMarginalDistribution + : public SerializableType { + MarginalDistribution create() const override { Eigen::VectorXd diag = Eigen::VectorXd::Ones(3); Eigen::VectorXd mean = Eigen::VectorXd::Ones(3); - return DiagonalDistribution(mean, diag.asDiagonal()); + return MarginalDistribution(mean, diag.asDiagonal()); } }; -struct MeanOnlyDiagonalDistribution - : public SerializableType { - DiagonalDistribution create() const override { +struct MeanOnlyMarginalDistribution + : public SerializableType { + MarginalDistribution create() const override { Eigen::MatrixXd mean = Eigen::VectorXd::Ones(3); - return DiagonalDistribution(mean); + return MarginalDistribution(mean); } }; @@ -314,8 +314,8 @@ struct PolymorphicSerializeTest : public ::testing::Test { typedef ::testing::Types< LDLT, SerializableType, SerializableType, EmptyEigenVectorXd, EigenVectorXd, EmptyEigenMatrixXd, EigenMatrixXd, - FullDenseDistribution, MeanOnlyDenseDistribution, FullDiagonalDistribution, - MeanOnlyDiagonalDistribution, ParameterStoreType, + FullJointDistribution, MeanOnlyJointDistribution, FullMarginalDistribution, + MeanOnlyMarginalDistribution, ParameterStoreType, SerializableType, UnfitSerializableModel, FitSerializableModel, FitDirectModel, UnfitDirectModel, UnfitRegressionModel, FitLinearRegressionModel, FitLinearSerializablePointer, diff --git a/tests/test_utils.h b/tests/test_utils.h index 1995e371..6b114f6d 100644 --- a/tests/test_utils.h +++ b/tests/test_utils.h @@ -73,8 +73,9 @@ class MockModel : public SerializableRegressionModel { protected: // builds the map from int to value - MockFit serializable_fit_(const std::vector &features, - const TargetDistribution &targets) const override { + MockFit + serializable_fit_(const std::vector &features, + const MarginalDistribution &targets) const override { int n = static_cast(features.size()); Eigen::VectorXd predictions(n); @@ -87,7 +88,7 @@ class MockModel : public SerializableRegressionModel { } // looks up the prediction in the map - PredictDistribution + JointDistribution predict_(const std::vector &features) const override { int n = static_cast(features.size()); Eigen::VectorXd predictions(n); @@ -97,7 +98,7 @@ class MockModel : public SerializableRegressionModel { predictions[i] = this->model_fit_.train_data.find(index)->second; } - return PredictDistribution(predictions); + return JointDistribution(predictions); } }; @@ -190,7 +191,7 @@ make_heteroscedastic_toy_linear_data(const double a = 5., const double b = 1., auto diag_matrix = variance.asDiagonal(); - TargetDistribution target_dist(targets, diag_matrix); + MarginalDistribution target_dist(targets, diag_matrix); return RegressionDataset(dataset.features, target_dist); }