diff --git a/CMakeLists.txt b/CMakeLists.txt index d9b0a5f1..7c9bcb3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,6 @@ set(albatross_HEADERS albatross/models/ransac_gp.h albatross/models/least_squares.h albatross/covariance_functions/covariance_function.h - albatross/covariance_functions/covariance_functions.h albatross/covariance_functions/distance_metrics.h albatross/covariance_functions/noise.h albatross/covariance_functions/polynomials.h @@ -68,4 +67,3 @@ add_custom_target(albatross DEPENDS ${albatross_SRCS} ${albatross_HEADERS} COMME add_subdirectory(tests) add_subdirectory(examples) - diff --git a/albatross/Common b/albatross/Common new file mode 100644 index 00000000..dbe3dd3d --- /dev/null +++ b/albatross/Common @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_COMMON_H +#define ALBATROSS_COMMON_H + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "map_utils.h" +#include "random_utils.h" + +#endif \ No newline at end of file diff --git a/albatross/Core b/albatross/Core new file mode 100644 index 00000000..4090a0e0 --- /dev/null +++ b/albatross/Core @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CORE_H +#define ALBATROSS_CORE_H + +#include "Dataset" + +#include +#include "core/declarations.h" +#include "core/traits.h" +#include "core/priors.h" +#include "core/parameter_handling_mixin.h" +#include "core/parameter_macros.h" +#include "core/fit_model.h" +#include "core/prediction.h" +#include "core/model.h" + +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//#include "cereal/cereal.hpp" +//#include "keys.h" +//#include "map_utils.h" +//#include "priors.h" +//#include + +#endif diff --git a/albatross/CovarianceFunctions b/albatross/CovarianceFunctions new file mode 100644 index 00000000..d0534f55 --- /dev/null +++ b/albatross/CovarianceFunctions @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_COVARIANCE_FUNCTIONS_H +#define ALBATROSS_COVARIANCE_FUNCTIONS_H + +#include "Common" + +#include "core/declarations.h" +#include "core/traits.h" +#include "core/priors.h" +#include "core/parameter_handling_mixin.h" +#include "core/parameter_macros.h" + +#include "covariance_functions/traits.h" +#include "covariance_functions/covariance_function.h" +#include "covariance_functions/call_trace.h" +#include "covariance_functions/distance_metrics.h" +#include "covariance_functions/noise.h" +#include "covariance_functions/polynomials.h" +#include "covariance_functions/radial.h" +#include "covariance_functions/scaling_function.h" + +#endif \ No newline at end of file diff --git a/albatross/covariance_functions/covariance_functions.h b/albatross/Dataset similarity index 59% rename from albatross/covariance_functions/covariance_functions.h rename to albatross/Dataset index 00e12aaf..dec7c5b6 100644 --- a/albatross/covariance_functions/covariance_functions.h +++ b/albatross/Dataset @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Swift Navigation Inc. + * Copyright (C) 2019 Swift Navigation Inc. * Contact: Swift Navigation * * This source is subject to the license found in the file 'LICENSE' which must @@ -9,14 +9,12 @@ * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. */ + +#ifndef ALBATROSS_DATASET_H +#define ALBATROSS_DATASET_H -#ifndef ALBATROSS_COVARIANCE_FUNCTIONS_COVARIANCE_FUNCTIONS_H -#define ALBATROSS_COVARIANCE_FUNCTIONS_COVARIANCE_FUNCTIONS_H +#include "Distribution" -#include "covariance_function.h" -#include "noise.h" -#include "polynomials.h" -#include "radial.h" -#include "scaling_function.h" +#include "core/dataset.h" -#endif +#endif \ No newline at end of file diff --git a/albatross/Distribution b/albatross/Distribution new file mode 100644 index 00000000..13572534 --- /dev/null +++ b/albatross/Distribution @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_DISTRIBUTION_H +#define ALBATROSS_DISTRIBUTION_H + +#include "Common" + +#include "cereal/traits.h" +#include "eigen/serializable_diagonal_matrix.h" + +#include "core/declarations.h" +#include "core/indexing.h" +#include "core/distribution.h" +#include "core/concatenate.h" + +#endif \ No newline at end of file diff --git a/albatross/GP b/albatross/GP new file mode 100644 index 00000000..b81f92a6 --- /dev/null +++ b/albatross/GP @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_GP_H +#define ALBATROSS_GP_H + +#include "CovarianceFunctions" + +#include "eigen/serializable_ldlt.h" +#include "models/gp.h" + +#endif \ No newline at end of file diff --git a/albatross/cereal/traits.h b/albatross/cereal/traits.h new file mode 100644 index 00000000..ed86730f --- /dev/null +++ b/albatross/cereal/traits.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2018 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CEREAL_TRAITS_H +#define ALBATROSS_CEREAL_TRAITS_H + +#include "cereal/details/traits.hpp" + +namespace albatross { + +/* + * This little trick was borrowed from cereal, you an think of it as + * a function that will always return false ... but that doesn't + * get resolved until template instantiation, which when combined + * with a static assert let's you include a static assert that + * only triggers with a particular template parameter is used. + */ +template struct delay_static_assert : std::false_type {}; + +/* + * The following helper functions let you inspect a type and cereal Archive + * and determine if the type has a valid serialization method for that Archive + * type. + */ +template class valid_output_serializer { + template + static typename std::enable_if< + 1 == cereal::traits::detail::count_output_serializers::value, + std::true_type>::type + test(int); + template static std::false_type test(...); + +public: + static constexpr bool value = decltype(test(0))::value; +}; + +template class valid_input_serializer { + template + static typename std::enable_if< + 1 == cereal::traits::detail::count_input_serializers::value, + std::true_type>::type + test(int); + template static std::false_type test(...); + +public: + static constexpr bool value = decltype(test(0))::value; +}; + +template class valid_in_out_serializer { + template + static typename std::enable_if::value && + valid_output_serializer::value, + std::true_type>::type + test(int); + template static std::false_type test(...); + +public: + static constexpr bool value = decltype(test(0))::value; +}; +} + +#endif diff --git a/albatross/core/concatenate.h b/albatross/core/concatenate.h index 0b6db188..64af59b7 100644 --- a/albatross/core/concatenate.h +++ b/albatross/core/concatenate.h @@ -13,9 +13,6 @@ #ifndef ALBATROSS_CORE_CONCATENATE_H #define ALBATROSS_CORE_CONCATENATE_H -#include "core/dataset.h" -#include "map_utils.h" - namespace albatross { template diff --git a/albatross/core/dataset.h b/albatross/core/dataset.h index f8e6e93b..b21b32b7 100644 --- a/albatross/core/dataset.h +++ b/albatross/core/dataset.h @@ -13,28 +13,8 @@ #ifndef ALBATROSS_CORE_DATASET_H #define ALBATROSS_CORE_DATASET_H -#include "core/distribution.h" -#include "core/traits.h" -#include -#include -#include -#include - namespace albatross { -// A JointDistribution has a dense covariance matrix, which -// contains the covariance between each variable and all others. -using JointDistribution = Distribution; - -// We use a wrapper around DiagonalMatrix in order to make -// the resulting distribution serializable -using DiagonalMatrixXd = - Eigen::SerializableDiagonalMatrix; -// A MarginalDistribution has only a digaonal covariance -// matrix, so in turn only describes the variance of each -// variable independent of all others. -using MarginalDistribution = Distribution; - /* * A RegressionDataset holds two vectors of data, the features * where a single feature can be any class that contains the information used @@ -67,6 +47,8 @@ template struct RegressionDataset { metadata == other.metadata); } + std::size_t size() const { return features.size(); } + template typename std::enable_if::value, void>::type @@ -86,6 +68,17 @@ template struct RegressionDataset { } }; +/* + * Convenience method which subsets the features and targets of a dataset. + */ +template +inline RegressionDataset +subset(const std::vector &indices, + const RegressionDataset &dataset) { + return RegressionDataset(subset(indices, dataset.features), + subset(indices, dataset.targets)); +} + } // namespace albatross #endif diff --git a/albatross/core/declarations.h b/albatross/core/declarations.h index a7e55c11..ba220be0 100644 --- a/albatross/core/declarations.h +++ b/albatross/core/declarations.h @@ -13,13 +13,6 @@ #ifndef ALBATROSS_CORE_DECLARATIONS_H #define ALBATROSS_CORE_DECLARATIONS_H -#include -#include -#include -#include - -#include - namespace Eigen { template @@ -31,15 +24,32 @@ namespace albatross { /* * Model */ -template class RegressionModel; +template class ModelBase; + template struct RegressionDataset; -template struct RegressionFold; -template -class SerializableRegressionModel; -template -using RegressionModelCreator = - std::function>()>; +template struct PredictTypeIdentity; + +template class Prediction; + +template class FitModel; + +template class Fit {}; + +/* + * Parameter Handling + */ +class Prior; +struct Parameter; + +using ParameterKey = std::string; +// If you change the way these are stored, be sure there's +// a corresponding cereal type included or you'll get some +// really impressive compilation errors. +using ParameterPrior = std::shared_ptr; +using ParameterValue = double; + +using ParameterStore = std::map; /* * Distributions @@ -51,27 +61,40 @@ using DiagonalMatrixXd = Eigen::SerializableDiagonalMatrix; using MarginalDistribution = Distribution; +/* + * Models + */ +template +class GaussianProcessBase; + +template +class GaussianProcessRegression; + +struct NullLeastSquaresImpl {}; + +template +class LeastSquares; + + + /* * Cross Validation */ -using FoldIndices = std::vector; -using FoldName = std::string; -using FoldIndexer = std::map; -template -using IndexerFunction = + using FoldIndices = std::vector; + using FoldName = std::string; + using FoldIndexer = std::map; + + template + using IndexerFunction = std::function &)>; + template + class CrossValidation; + /* * RANSAC */ -template class GenericRansac; -template -std::unique_ptr> -make_generic_ransac_model(ModelType *model, double inlier_threshold, - std::size_t min_inliers, - std::size_t random_sample_size, - std::size_t max_iterations, - const IndexerFunction &indexer_function); + template class Ransac; } #endif diff --git a/albatross/core/distribution.h b/albatross/core/distribution.h index d29a6fd7..d3a38970 100644 --- a/albatross/core/distribution.h +++ b/albatross/core/distribution.h @@ -13,13 +13,6 @@ #ifndef ALBATROSS_CORE_DISTRIBUTION_H #define ALBATROSS_CORE_DISTRIBUTION_H -#include "cereal/cereal.hpp" -#include "core/traits.h" -#include "eigen/serializable_diagonal_matrix.h" -#include -#include -#include - namespace albatross { /* @@ -30,37 +23,33 @@ namespace albatross { template struct Distribution { Eigen::VectorXd mean; CovarianceType covariance; - // Sometimes it can be helpful to keep track of some - // auxillary information regarding how a distribution was - // derived, that can be stored in this map. - std::map metadata; - - std::size_t size() const { - // If the covariance is defined it must have the same number - // of rows and columns which should be the same size as the mean. - assert_valid(); - return mean.size(); - } - - void assert_valid() const { - if (covariance.size() > 0) { - assert(covariance.rows() == covariance.cols()); - assert(mean.size() == covariance.rows()); - } - } - - bool has_covariance() const { - assert_valid(); - return covariance.size() > 0; - } Distribution() : mean(), covariance(){}; Distribution(const Eigen::VectorXd &mean_) : mean(mean_), covariance(){}; Distribution(const Eigen::VectorXd &mean_, const CovarianceType &covariance_) : mean(mean_), covariance(covariance_){}; - double get_diagonal(Eigen::Index i) const { - return has_covariance() ? covariance.diagonal()[i] : NAN; + std::size_t size() const; + + void assert_valid() const; + + bool has_covariance() const; + + double get_diagonal(Eigen::Index i) const; + + template + Distribution + subset(const std::vector &indices) const; + + bool operator==(const Distribution &other) const { + return (mean == other.mean && covariance == other.covariance); + } + + template + typename std::enable_if< + !std::is_same::value, bool>::type + operator==(const Distribution &other) const { + return false; } /* @@ -72,7 +61,6 @@ template struct Distribution { serialize(Archive &archive) { archive(cereal::make_nvp("mean", mean)); archive(cereal::make_nvp("covariance", covariance)); - archive(cereal::make_nvp("metadata", metadata)); } /* @@ -87,25 +75,46 @@ template struct Distribution { "In order to serialize a Distribution the corresponding " "CovarianceType must be serializable."); } - - bool operator==(const Distribution &other) const { - return (mean == other.mean && covariance == other.covariance && - metadata == other.metadata); - } }; -// A JointDistribution has a dense covariance matrix, which -// contains the covariance between each variable and all others. -using JointDistribution = Distribution; - -// We use a wrapper around DiagonalMatrix in order to make -// the resulting distribution serializable -using DiagonalMatrixXd = - Eigen::SerializableDiagonalMatrix; -// A MarginalDistribution has only a digaonal covariance -// matrix, so in turn only describes the variance of each -// variable independent of all others. -using MarginalDistribution = Distribution; +template +std::size_t Distribution::size() const { + // If the covariance is defined it must have the same number + // of rows and columns which should be the same size as the mean. + assert_valid(); + return static_cast(mean.size()); +} + +template +void Distribution::assert_valid() const { + if (covariance.size() > 0) { + assert(covariance.rows() == covariance.cols()); + assert(mean.size() == covariance.rows()); + } +} + +template +bool Distribution::has_covariance() const { + assert_valid(); + return covariance.size() > 0; +} + +template +double Distribution::get_diagonal(Eigen::Index i) const { + return has_covariance() ? covariance.diagonal()[i] : NAN; +} + +template +Distribution subset(const std::vector &indices, + const Distribution &dist) { + auto subset_mean = albatross::subset(indices, Eigen::VectorXd(dist.mean)); + if (dist.has_covariance()) { + auto subset_cov = albatross::symmetric_subset(indices, dist.covariance); + return Distribution(subset_mean, subset_cov); + } else { + return Distribution(subset_mean); + } +} } // namespace albatross diff --git a/albatross/core/fit_model.h b/albatross/core/fit_model.h new file mode 100644 index 00000000..07ee8159 --- /dev/null +++ b/albatross/core/fit_model.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CORE_FIT_MODEL_H +#define ALBATROSS_CORE_FIT_MODEL_H + +namespace albatross { + +template +class FitModel { + public: + template + friend class Prediction; + + static_assert(std::is_move_constructible::value, + "Fit type must be move constructible to avoid unexpected copying."); + + FitModel(const ModelType &model, + Fit &&fit) + : model_(model), fit_(std::move(fit)) {} + + template + Prediction + get_prediction(const std::vector &features) const { + return Prediction(*this, features); + } + + private: + const ModelType model_; + const Fit fit_; + +}; + +} +#endif diff --git a/albatross/core/functional_model.h b/albatross/core/functional_model.h index c7917aa4..e7b95ff9 100644 --- a/albatross/core/functional_model.h +++ b/albatross/core/functional_model.h @@ -13,8 +13,6 @@ #ifndef ALBATROSS_CORE_FUNCTIONAL_MODEL_H #define ALBATROSS_CORE_FUNCTIONAL_MODEL_H -#include "core/model.h" - namespace albatross { // This struct is just a type helper to make it obvious that diff --git a/albatross/core/indexing.h b/albatross/core/indexing.h index 9aa6fd54..6056c590 100644 --- a/albatross/core/indexing.h +++ b/albatross/core/indexing.h @@ -13,22 +13,8 @@ #ifndef ALBATROSS_CORE_INDEXING_H #define ALBATROSS_CORE_INDEXING_H -#include "core/dataset.h" -#include -#include -#include -#include - namespace albatross { -using FoldIndices = std::vector; -using FoldName = std::string; -using FoldIndexer = std::map; - -template -using IndexerFunction = - std::function &)>; - /* * Extract a subset of a standard vector. */ @@ -56,17 +42,6 @@ inline Eigen::VectorXd subset(const std::vector &indices, return out; } -/* - * Convenience method which subsets the features and targets of a dataset. - */ -template -inline RegressionDataset -subset(const std::vector &indices, - const RegressionDataset &dataset) { - return RegressionDataset(subset(indices, dataset.features), - subset(indices, dataset.targets)); -} - /* * Extracts a subset of columns from an Eigen::Matrix */ @@ -105,8 +80,7 @@ inline Eigen::MatrixXd subset(const std::vector &row_indices, /* * Takes a symmetric subset of an Eigen::Matrix. Ie, it'll index the same rows - * and - * columns. + * and columns. */ template inline Eigen::MatrixXd symmetric_subset(const std::vector &indices, @@ -125,35 +99,6 @@ symmetric_subset(const std::vector &indices, return subset(indices, v.diagonal()).asDiagonal(); } -template -Distribution subset(const std::vector &indices, - const Distribution &dist) { - auto mean = subset(indices, Eigen::VectorXd(dist.mean)); - if (dist.has_covariance()) { - auto cov = symmetric_subset(indices, dist.covariance); - return Distribution(mean, cov); - } else { - return Distribution(mean); - } -} - -/* - * A combination of training and testing datasets, typically used in cross - * validation. - */ -template struct RegressionFold { - RegressionDataset train_dataset; - RegressionDataset test_dataset; - FoldName name; - FoldIndices test_indices; - - RegressionFold(const RegressionDataset &train_dataset_, - const RegressionDataset &test_dataset_, - const FoldName &name_, const FoldIndices &test_indices_) - : train_dataset(train_dataset_), test_dataset(test_dataset_), name(name_), - test_indices(test_indices_){}; -}; - template inline std::vector vector_set_difference(const std::vector &x, const std::vector &y) { @@ -170,128 +115,13 @@ inline std::vector vector_set_difference(const std::vector &x, * In other words, indices and indices_complement(indices) should * contain all the numbers between 0 and n-1 */ -inline FoldIndices indices_complement(const FoldIndices &indices, const int n) { - FoldIndices all_indices(n); +inline std::vector indices_complement(const std::vector &indices, + const std::size_t n) { + std::vector all_indices(n); std::iota(all_indices.begin(), all_indices.end(), 0); return vector_set_difference(all_indices, indices); } -/* - * Each flavor of cross validation can be described by a set of - * FoldIndices, which store which indices should be used for the - * test cases. This function takes a map from FoldName to - * FoldIndices and a dataset and creates the resulting folds. - */ -template -static inline std::vector> -folds_from_fold_indexer(const RegressionDataset &dataset, - const FoldIndexer &groups) { - // For a dataset with n features, we'll have n folds. - const std::size_t n = dataset.features.size(); - std::vector> folds; - // For each fold, partition into train and test sets. - for (const auto &pair : groups) { - // These get exposed inside the returned RegressionFold and because - // we'd like to prevent modification of the output from this function - // from changing the input FoldIndexer we perform a copy here. - const FoldName group_name(pair.first); - const FoldIndices test_indices(pair.second); - const auto train_indices = indices_complement(test_indices, n); - - std::vector train_features = - subset(train_indices, dataset.features); - MarginalDistribution train_targets = subset(train_indices, dataset.targets); - - std::vector test_features = - subset(test_indices, dataset.features); - MarginalDistribution test_targets = subset(test_indices, dataset.targets); - - assert(train_features.size() == train_targets.size()); - assert(test_features.size() == test_targets.size()); - assert(test_targets.size() + train_targets.size() == n); - - const RegressionDataset train_split(train_features, - train_targets); - const RegressionDataset test_split(test_features, - test_targets); - folds.push_back(RegressionFold(train_split, test_split, - group_name, test_indices)); - } - return folds; -} - -template -static inline FoldIndexer -leave_one_out_indexer(const RegressionDataset &dataset) { - FoldIndexer groups; - for (std::size_t i = 0; i < dataset.features.size(); i++) { - FoldName group_name = std::to_string(i); - groups[group_name] = {i}; - } - return groups; -} - -/* - * Splits a dataset into cross validation folds where each fold contains all but - * one predictor/target pair. - */ -template -static inline FoldIndexer leave_one_group_out_indexer( - const std::vector &features, - const std::function &get_group_name) { - FoldIndexer groups; - for (std::size_t i = 0; i < features.size(); i++) { - const std::string k = get_group_name(features[i]); - // Get the existing indices if we've already encountered this group_name - // otherwise initialize a new one. - FoldIndices indices; - if (groups.find(k) == groups.end()) { - indices = FoldIndices(); - } else { - indices = groups[k]; - } - // Add the current index. - indices.push_back(i); - groups[k] = indices; - } - return groups; -} - -/* - * Splits a dataset into cross validation folds where each fold contains all but - * one predictor/target pair. - */ -template -static inline FoldIndexer leave_one_group_out_indexer( - const RegressionDataset &dataset, - const std::function &get_group_name) { - return leave_one_group_out_indexer(dataset.features, get_group_name); -} - -/* - * Generates cross validation folds which represent leave one out - * cross validation. - */ -template -static inline std::vector> -leave_one_out(const RegressionDataset &dataset) { - return folds_from_fold_indexer( - dataset, leave_one_out_indexer(dataset)); -} - -/* - * Uses a `get_group_name` function to bucket each FeatureType into - * a group, then holds out one group at a time. - */ -template -static inline std::vector> leave_one_group_out( - const RegressionDataset &dataset, - const std::function &get_group_name) { - const FoldIndexer indexer = - leave_one_group_out_indexer(dataset, get_group_name); - return folds_from_fold_indexer(dataset, indexer); -} - } // namespace albatross #endif diff --git a/albatross/core/keys.h b/albatross/core/keys.h index 2e6ff964..ff0b63e7 100644 --- a/albatross/core/keys.h +++ b/albatross/core/keys.h @@ -13,8 +13,6 @@ #ifndef ALBATROSS_CORE_KEYS_H #define ALBATROSS_CORE_KEYS_H -#include - namespace albatross { namespace keys { const std::string YAML_MODEL_NAME = "model_name"; diff --git a/albatross/core/model.h b/albatross/core/model.h index 42b98054..5c5559dc 100644 --- a/albatross/core/model.h +++ b/albatross/core/model.h @@ -13,266 +13,116 @@ #ifndef ALBATROSS_CORE_MODEL_H #define ALBATROSS_CORE_MODEL_H -#include "core/dataset.h" -#include "core/indexing.h" -#include "core/parameter_handling_mixin.h" -#include "map_utils.h" -#include "traits.h" -#include -#include -#include -#include - namespace albatross { -namespace detail { -// This is effectively just a container that allows us to develop methods -// which behave different conditional on the type of predictions desired. -template struct PredictTypeIdentity { typedef T type; }; -} - -// This can be used to make intentions more obvious when calling -// predict variants for which you only want the mean. -using PredictMeanOnly = Eigen::VectorXd; using Insights = std::map; -/* - * A model that uses a single Feature to estimate the value of a double typed - * target. - */ -template -class RegressionModel : public ParameterHandlingMixin { -public: - using Feature = FeatureType; - RegressionModel() : ParameterHandlingMixin(), has_been_fit_(){}; - virtual ~RegressionModel(){}; - - virtual bool operator==(const RegressionModel &other) const { - // If the fit method has been called it's possible that some unknown - // class members may have been modified. As such, if a model has been - // fit we fail hard to avoid possibly unexpected behavior. Any - // implementation that wants a functional equality operator after - // having been fit will need to override this one. - assert(!has_been_fit()); - return (get_name() == other.get_name() && - get_params() == other.get_params() && - has_been_fit() == other.has_been_fit()); - } - - /* - * Provides a wrapper around the implementation `fit_` which performs - * simple size checks and makes sure the fit method is called before - * predict. - */ - void fit(const std::vector &features, - const MarginalDistribution &targets) { - assert(features.size() > 0); - assert(features.size() == static_cast(targets.size())); - has_been_fit_ = true; - insights_["input_feature_count"] = std::to_string(features.size()); - fit_(features, targets); - } - - /* - * Convenience function which assumes zero target covariance. - */ - void fit(const std::vector &features, - const Eigen::VectorXd &targets) { - return fit(features, MarginalDistribution(targets)); - } - - /* - * Convenience function which unpacks a dataset into features and targets. - */ - void fit(const RegressionDataset &dataset) { - return fit(dataset.features, dataset.targets); - } - - /* - * Similar to fit, this predict methods wrap the implementation `predict_*_` - * and makes simple checks to confirm the implementation is returning - * properly sized Distribution. - */ - template - PredictType predict(const std::vector &features) const { - return predict(features, detail::PredictTypeIdentity()); - } - - template - PredictType predict(const FeatureType &feature) const { - std::vector features = {feature}; - return predict(features); - } - - template - std::vector - cross_validated_predictions(const RegressionDataset &dataset, - const FoldIndexer &fold_indexer) { - return cross_validated_predictions_( - dataset, fold_indexer, detail::PredictTypeIdentity()); - } - - // Because cross validation can never properly produce a full - // joint distribution it is common to only use the marginal - // predictions, hence the different default from predict. - template - std::vector cross_validated_predictions( - const std::vector> &folds) { - // Iteratively make predictions and assemble the output vector - std::vector predictions; - for (std::size_t i = 0; i < folds.size(); i++) { - fit(folds[i].train_dataset); - predictions.push_back( - predict(folds[i].test_dataset.features)); - } - return predictions; - } - - std::string pretty_string() const { - std::ostringstream ss; - ss << get_name() << std::endl; - ss << ParameterHandlingMixin::pretty_string(); - return ss.str(); - } - - virtual bool has_been_fit() const { return has_been_fit_; } +template class ModelBase : public ParameterHandlingMixin { - virtual std::string get_name() const = 0; + template + friend class Prediction; - virtual Insights get_insights() const { return insights_; } + template + friend class fit_model_type; - virtual void add_insights(const Insights &insights) { - for (const auto &insight : insights) { - insights_[insight.first] = insight.second; - } - }; - - virtual std::unique_ptr> - ransac_model(double inlier_threshold, std::size_t min_inliers, - std::size_t random_sample_size, std::size_t max_iterations) { - static_assert( - is_complete>::value, - "ransac methods aren't complete yet, be sure you've included ransac.h"); - return make_generic_ransac_model( - this, inlier_threshold, min_inliers, random_sample_size, max_iterations, - leave_one_out_indexer); - } + private: + // Declaring these private makes it impossible to accidentally do things like: + // class A : public ModelBase {} + // or + // using A = ModelBase; + // + // which if unchecked can lead to some very strange behavior. + ModelBase() : insights_(){}; + friend ModelType; + Insights insights_; /* - * Here we define the serialization routines. Note that while in most - * cases we could use the cereal method `serialize`, in this case we don't - * know for sure where the parameters are stored. The - * GaussianProcessRegression - * model, for example, derives its parameters from its covariance function, - * so it's `params_` are actually empty. As a result we need to use the - * save/load cereal variant and deal with parameters through the get/set - * interface. + * Fit */ - template void save(Archive &archive) const { - auto params = get_params(); - archive(cereal::make_nvp("parameters", params)); - archive(cereal::make_nvp("has_been_fit", has_been_fit_)); - } - - template void load(Archive &archive) { - auto params = get_params(); - archive(cereal::make_nvp("parameters", params)); - archive(cereal::make_nvp("has_been_fit", has_been_fit_)); - set_params(params); - } + template < + typename FeatureType, + typename std::enable_if::value, + int>::type = 0> + auto + fit_(const std::vector &features, + const MarginalDistribution &targets) const { + auto fit = derived().fit(features, targets); + return FitModel(derived(), std::move(fit)); + } + + template ::value && + !has_valid_fit::value, + int>::type = 0> + void + fit_(const std::vector &features, + const MarginalDistribution &targets) const = delete; // Invalid fit_impl_ + + template ::value && + !has_valid_fit::value, + int>::type = 0> + void + fit_(const std::vector &features, + const MarginalDistribution &targets) const = delete; // No fit_impl_ found. + + template ::value, + int>::type = 0> + PredictType predict_(const std::vector &features, + const FitType &fit, + PredictTypeIdentity &&) const { + return derived().predict(features, fit, PredictTypeIdentity()); + } + + template ::value, + int>::type = 0> + PredictType predict_(const std::vector &features, + const FitType &fit, + PredictTypeIdentity &&) const = delete; // No valid predict. -protected: - virtual void fit_(const std::vector &features, - const MarginalDistribution &targets) = 0; /* - * Predict specializations + * CRTP Helpers */ - - JointDistribution - predict(const std::vector &features, - detail::PredictTypeIdentity &&) const { - assert(has_been_fit()); - JointDistribution preds = predict_(features); - assert(static_cast(preds.mean.size()) == features.size()); - return preds; - } - - MarginalDistribution - predict(const std::vector &features, - detail::PredictTypeIdentity &&) const { - assert(has_been_fit()); - MarginalDistribution preds = predict_marginal_(features); - assert(static_cast(preds.mean.size()) == features.size()); - return preds; + ModelType &derived() { return *static_cast(this); } + const ModelType &derived() const { + return *static_cast(this); } - Eigen::VectorXd - predict(const std::vector &features, - detail::PredictTypeIdentity &&) const { - assert(has_been_fit()); - Eigen::VectorXd preds = predict_mean_(features); - assert(static_cast(preds.size()) == features.size()); - return preds; - } - - /* - * Cross validation specializations - * - * Note the naming here uses a trailing underscore. This is to avoid - * name hiding when implementing one of these methods in a derived - * class: - * - * https://stackoverflow.com/questions/1628768/why-does-an-overridden-function-in-the-derived-class-hide-other-overloads-of-the - */ - virtual std::vector cross_validated_predictions_( - const RegressionDataset &dataset, - const FoldIndexer &fold_indexer, - const detail::PredictTypeIdentity &) { - const auto folds = folds_from_fold_indexer(dataset, fold_indexer); - return cross_validated_predictions(folds); - } +public: - virtual std::vector cross_validated_predictions_( - const RegressionDataset &dataset, - const FoldIndexer &fold_indexer, - const detail::PredictTypeIdentity &) { - const auto folds = folds_from_fold_indexer(dataset, fold_indexer); - return cross_validated_predictions(folds); + template ::value, int>::type = 0> + std::string get_name() { + return typeid(ModelType).name(); } - virtual std::vector cross_validated_predictions_( - const RegressionDataset &dataset, - const FoldIndexer &fold_indexer, - const detail::PredictTypeIdentity &) { - const auto folds = folds_from_fold_indexer(dataset, fold_indexer); - return cross_validated_predictions(folds); + template ::value, int>::type = 0> + std::string get_name() { + return derived().name(); } - virtual JointDistribution - predict_(const std::vector &features) const = 0; - - virtual MarginalDistribution - predict_marginal_(const std::vector &features) const { - const auto full_distribution = predict_(features); - return MarginalDistribution( - full_distribution.mean, - full_distribution.covariance.diagonal().asDiagonal()); + template + auto + get_fit_model(const std::vector &features, + const MarginalDistribution &targets) const { + return fit_(features, targets); } - virtual Eigen::VectorXd - predict_mean_(const std::vector &features) const { - const auto marginal_distribution = predict_marginal_(features); - return marginal_distribution.mean; + template + auto + get_fit_model(const RegressionDataset &dataset) const { + return fit_(dataset.features, dataset.targets); } - bool has_been_fit_; - Insights insights_; }; -template -using RegressionModelCreator = - std::function>()>; -} // namespace albatross - +} #endif diff --git a/albatross/core/model_adapter.h b/albatross/core/model_adapter.h index 5307ff73..711f5240 100644 --- a/albatross/core/model_adapter.h +++ b/albatross/core/model_adapter.h @@ -13,11 +13,6 @@ #ifndef ALBATROSS_CORE_MODEL_ADAPTER_H #define ALBATROSS_CORE_MODEL_ADAPTER_H -#include "functional_model.h" -#include "model.h" -#include "serialize.h" -#include "traits.h" - namespace albatross { /* @@ -31,39 +26,16 @@ namespace albatross { * to adapt something that has extended RegressionModel. */ template -class AdaptedRegressionModel - : public choose_regression_model_implementation::type { - +class AdaptedRegressionModel : public ModelBase { public: - using SubFeature = typename SubModelType::Feature; - using RegressionModelImplementation = - typename choose_regression_model_implementation::type; - - static_assert(std::is_same>::value || - std::is_base_of, - RegressionModelImplementation>::value, - "The template parameter RegressionModelImplementation must be " - "derived from RegressionModel"); - - static_assert( - !has_fit_type::value || - std::is_same< - typename fit_type_or_void::type, - typename fit_type_or_void::type>::value, - "If the RegressionModelImplementation is serializable, it must have the " - "same FitType as the sub_model"); AdaptedRegressionModel() : sub_model_(){}; AdaptedRegressionModel(const SubModelType &sub_model) : sub_model_(sub_model){}; - virtual ~AdaptedRegressionModel(){}; // This function will often be required by AdaptedModels // The default implementation is a null operation. - virtual SubFeature + template -#include -#include -#include -#include -#include -#include - -#include "cereal/cereal.hpp" -#include "keys.h" -#include "map_utils.h" -#include "priors.h" -#include - namespace albatross { -using ParameterKey = std::string; -// If you change the way these are stored, be sure there's -// a corresponding cereal type included or you'll get some -// really impressive compilation errors. -using ParameterPrior = std::shared_ptr; -using ParameterValue = double; - struct TunableParameters { std::vector values; std::vector lower_bounds; @@ -89,8 +68,6 @@ struct Parameter { } }; -using ParameterStore = std::map; - /* * Prints out a set of parameters in a way that is both * readable and can be easily copy/pasted into code. diff --git a/albatross/core/parameter_macros.h b/albatross/core/parameter_macros.h index da4771c0..086ef53c 100644 --- a/albatross/core/parameter_macros.h +++ b/albatross/core/parameter_macros.h @@ -13,15 +13,6 @@ #ifndef ALBATROSS_CORE_PARAMETER_MACROS_H #define ALBATROSS_CORE_PARAMETER_MACROS_H -#include "core/parameter_handling_mixin.h" -#include -#include - -using albatross::Parameter; -using albatross::ParameterKey; -using albatross::ParameterValue; -using albatross::ParameterStore; - /* * The for each functionality was taken from: * https://codecraft.co/2014/11/25/variadic-macros-tricks/ @@ -82,7 +73,7 @@ using albatross::ParameterStore; * $1 = value; * } else if (key == "$2") { * $2 = value; - * } + * } else if { * ... * } else { * assert(false); diff --git a/albatross/core/prediction.h b/albatross/core/prediction.h new file mode 100644 index 00000000..15cac69c --- /dev/null +++ b/albatross/core/prediction.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2019 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_CORE_PREDICTION_H +#define ALBATROSS_CORE_PREDICTION_H + +namespace albatross { + +// This is effectively just a container that allows us to develop methods +// which behave different conditional on the type of predictions desired. +template struct PredictTypeIdentity { typedef T type; }; + +template +class Prediction { + +public: + Prediction(const FitModel &fit_model, + const std::vector &features) + : fit_model_(fit_model), features_(features) {} + + /* + * MEAN + */ + template < + typename DummyType = FeatureType, + typename std::enable_if< + has_valid_predict_mean::value, int>::type = 0> + Eigen::VectorXd mean() const { + static_assert(std::is_same::value, + "never do prediction.mean()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()); + } + + template ::value && + has_valid_predict_marginal::value, + int>::type = 0> + Eigen::VectorXd mean() const { + static_assert(std::is_same::value, + "never do prediction.mean()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()) + .mean; + } + + template ::value && + !has_valid_predict_marginal::value && + has_valid_predict_joint::value, + int>::type = 0> + Eigen::VectorXd mean() const { + static_assert(std::is_same::value, + "never do prediction.mean()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()) + .mean; + } + + /* + * MARGINAL + */ + template ::value, + int>::type = 0> + MarginalDistribution marginal() const { + static_assert(std::is_same::value, + "never do prediction.marginal()"); + return fit_model_.model_.predict_(features_, + fit_model_.fit_, + PredictTypeIdentity()); + } + + template ::value && + has_valid_predict_joint::value, + int>::type = 0> + MarginalDistribution marginal() const { + static_assert(std::is_same::value, + "never do prediction.marginal()"); + const auto joint_pred = + fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()); + if (joint_pred.has_covariance()) { + Eigen::VectorXd diag = joint_pred.covariance.diagonal(); + return MarginalDistribution(joint_pred.mean, diag.asDiagonal()); + } else { + return MarginalDistribution(joint_pred.mean); + } + } + + /* + * JOINT + */ + template < + typename DummyType = FeatureType, + typename std::enable_if< + has_valid_predict_joint::value, int>::type = 0> + JointDistribution joint() const { + static_assert(std::is_same::value, + "never do prediction.joint()"); + return fit_model_.model_.predict_(features_, fit_model_.fit_, PredictTypeIdentity()); + } + + /* + * CATCH FAILURE MODES + */ + template ::value && + !has_valid_predict_marginal::value && + !has_valid_predict_joint::value, + int>::type = 0> + Eigen::VectorXd mean() const = delete; // No valid predict method found. + + template ::value && + !has_valid_predict_joint::value, + int>::type = 0> + Eigen::VectorXd marginal() const = delete; // No valid predict marginal method found. + + template ::value, + int>::type = 0> + Eigen::VectorXd joint() const = delete; // No valid predict joint method found. + +private: + const FitModel &fit_model_; + const std::vector &features_; +}; + +} +#endif diff --git a/albatross/core/priors.h b/albatross/core/priors.h index 97f258c4..97e603c0 100644 --- a/albatross/core/priors.h +++ b/albatross/core/priors.h @@ -13,11 +13,6 @@ #ifndef ALBATROSS_CORE_PRIORS_H #define ALBATROSS_CORE_PRIORS_H -#include -#include -#include -#include - namespace albatross { constexpr double LOG_2PI_ = 1.8378770664093453; diff --git a/albatross/core/serialize.h b/albatross/core/serialize.h index 3106cc1d..24d29f4f 100644 --- a/albatross/core/serialize.h +++ b/albatross/core/serialize.h @@ -13,14 +13,6 @@ #ifndef ALBATROSS_CORE_SERIALIZE_H #define ALBATROSS_CORE_SERIALIZE_H -#include "core/traits.h" -#include -#include -#include -#include - -#include - namespace albatross { template diff --git a/albatross/core/traits.h b/albatross/core/traits.h index 92884a44..2d1a0e5e 100644 --- a/albatross/core/traits.h +++ b/albatross/core/traits.h @@ -13,268 +13,170 @@ #ifndef ALBATROSS_CORE_TRAITS_H #define ALBATROSS_CORE_TRAITS_H -#include "cereal/details/traits.hpp" -#include "core/declarations.h" -#include - namespace albatross { /* - * This little trick was borrowed from cereal, you an think of it as - * a function that will always return false ... but that doesn't - * get resolved until template instantiation, which when combined - * with a static assert let's you include a static assert that - * only triggers with a particular template parameter is used. + * Checks if a class type is complete by using sizeof. + * + * https://stackoverflow.com/questions/25796126/static-assert-that-template-typename-t-is-not-complete */ -template struct delay_static_assert : std::false_type {}; +template class is_complete { + template + static std::true_type test(int); + template static std::false_type test(...); -/* - * In CovarianceFunction we frequently inspect for definitions of - * call_impl_ which MUST be defined for const references to objects - * (so that repeated covariance matrix evaluations return the same thing - * and so the computations are not repeatedly copying.) - * This type conversion utility will turn a type `T` into `const T&` - */ -template struct call_impl_arg_type { - typedef - typename std::add_lvalue_reference::type>::type - type; +public: + static constexpr bool value = decltype(test(0))::value; }; /* - * This determines whether or not a class has a method defined for, - * `operator() (const X &x, const Y &y, const Z &z, ...)` - * The result of the inspection gets stored in the member `value`. + * This determines whether or not a class, T, has a method, + * `std::string T.name() const` */ -template class has_call_operator { +template class has_name { + template ().name())> + static + typename std::enable_if::value, std::true_type>::type + test(int); - template ()( - std::declval::type>()...))> - static std::true_type test(C *); - template static std::false_type test(...); + template static std::false_type test(...); public: static constexpr bool value = decltype(test(0))::value; }; /* - * This determines whether or not a class has a method defined for, - * `double call_impl_(const X &x, const Y &y, const Z &z, ...)` - * The result of the inspection gets stored in the member `value`. + * Like std::is_base_of except compares the first template parameter. Ie, + * + * first_template_param_is_base_of, C>::value == is_base_of::value */ -template class has_valid_call_impl { +template +struct first_template_param_is_base_of : public std::false_type {}; - template - static typename std::is_same< - decltype(std::declval().call_impl_( - std::declval::type>()...)), - double>::type - test(C *); - template static std::false_type test(...); +template