diff --git a/.gitmodules b/.gitmodules index 4b5acef5..c35d88e7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,18 +1,18 @@ [submodule "third_party/eigen"] path = third_party/eigen url = https://github.com/PX4/eigen.git -[submodule "third_party/yaml-cpp"] - path = third_party/yaml-cpp - url = https://github.com/jbeder/yaml-cpp.git [submodule "third_party/gflags"] path = third_party/gflags url = https://github.com/gflags/gflags.git -[submodule "third_party/rapidcheck"] - path = third_party/rapidcheck - url = https://github.com/emil-e/rapidcheck.git [submodule "third_party/googletest"] path = third_party/googletest url = https://github.com/google/googletest.git [submodule "third_party/fast-cpp-csv-parser"] path = third_party/fast-cpp-csv-parser url = https://github.com/ben-strasser/fast-cpp-csv-parser.git +[submodule "third_party/cereal"] + path = third_party/cereal + url = https://github.com/USCiLab/cereal.git +[submodule "third_party/Optional"] + path = third_party/Optional + url = https://github.com/akrzemi1/Optional.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d5e58dc..e3ab2f93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,6 @@ project(albatross) cmake_minimum_required(VERSION 2.8.7) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") -include(ClangTools) -include(SanitizeTargets) # Enable Clang auto-formatting, defaults to on. option(ENABLE_AUTOFORMAT "Enable automatic formatting and replacement with clang-format." ON) @@ -13,12 +11,11 @@ option(ENABLE_AUTOLINT "Enable automatic linting and warning with clang-tidy. " option(ENABLE_STACK_ANALYSIS "Enable stack analysis. Requires gcc." OFF) add_subdirectory(third_party/googletest) -add_subdirectory(third_party/rapidcheck) -add_subdirectory(third_party/rapidcheck/extras/gtest) include_directories(SYSTEM ${gtest_SOURCE_DIR} ${gtest_SOURCE_DIR}/include) include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/third_party/eigen/") -include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/third_party/yaml-cpp/include") +include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/third_party/cereal/include") +include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/third_party/Optional/") include_directories(SYSTEM "${PROJECT_SOURCE_DIR}/third_party/fast-cpp-csv-parser/") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -fexceptions -frtti") @@ -27,7 +24,6 @@ set(CMAKE_CXX_FLAGS "-std=c++14 -Wshadow -Wswitch-default -Wswitch-enum -Wundef include_directories("${PROJECT_SOURCE_DIR}/albatross") include(${PROJECT_SOURCE_DIR}/cmake/FindGFlags.cmake) -include(${PROJECT_SOURCE_DIR}/cmake/FindYaml-Cpp.cmake) set(albatross_SRCS CACHE INTERNAL "" @@ -40,13 +36,14 @@ set(albatross_HEADERS albatross/covariance_functions/noise.h albatross/covariance_functions/polynomials.h albatross/covariance_functions/radial.h - albatross/gp/gp.h + albatross/models/gp.h albatross/core/keys.h albatross/core/model.h albatross/core/static_inspection.h albatross/core/parameter_handling_mixin.h albatross/evaluate.h albatross/map_utils.h + tests/test_utils.h CACHE INTERNAL "" ) diff --git a/albatross/core/model.h b/albatross/core/model.h index dbdd0eed..165be5ac 100644 --- a/albatross/core/model.h +++ b/albatross/core/model.h @@ -16,9 +16,13 @@ #include #include #include +#include "optional.hpp" #include "map_utils.h" +#include "static_inspection.h" #include "parameter_handling_mixin.h" +using std::experimental::optional; + namespace albatross { struct PredictionDistribution { @@ -39,12 +43,12 @@ struct PredictionDistribution { * it is assumed that each feature is regressed to a single double typed * target. */ -template +template struct RegressionDataset { - std::vector features; + std::vector features; Eigen::VectorXd targets; - RegressionDataset(const std::vector &features_, + RegressionDataset(const std::vector &features_, const Eigen::VectorXd &targets_) : features(features_), targets(targets_) { // If the two inputs aren't the same size they clearly aren't @@ -63,15 +67,15 @@ using FoldIndexer = std::map; * A combination of training and testing datasets, typically used in cross * validation. */ -template +template struct RegressionFold { - RegressionDataset train; - RegressionDataset test; + RegressionDataset train; + RegressionDataset test; FoldName name; FoldIndices test_indices; - RegressionFold(const RegressionDataset &train_, - const RegressionDataset &test_, + RegressionFold(const RegressionDataset &train_, + const RegressionDataset &test_, const FoldName &name_, const FoldIndices &test_indices_) : train(train_), test(test_), name(name_), test_indices(test_indices_){}; }; @@ -80,10 +84,11 @@ struct RegressionFold { * A model that uses a single Feature to estimate the value of a double typed * target. */ -template +template class RegressionModel : public ParameterHandlingMixin { public: - RegressionModel() : ParameterHandlingMixin(), has_been_fit_(false){}; + typedef FeatureType Feature; + RegressionModel() : ParameterHandlingMixin(), model_fit_() {}; virtual ~RegressionModel(){}; /* @@ -91,19 +96,20 @@ class RegressionModel : public ParameterHandlingMixin { * simple size checks and makes sure the fit method is called before * predict. */ - void fit(const std::vector &features, + ModelFit fit(const std::vector &features, const Eigen::VectorXd &targets) { assert(static_cast(features.size()) == static_cast(targets.size())); - fit_(features, targets); - has_been_fit_ = true; + auto model_fit = fit_(features, targets); + model_fit_ = model_fit; + return model_fit; } /* * Convenience function which unpacks a dataset into features and targets. */ - void fit(const RegressionDataset &dataset) { - fit(dataset.features, dataset.targets); + ModelFit fit(const RegressionDataset &dataset) { + return fit(dataset.features, dataset.targets); } /* @@ -112,8 +118,8 @@ class RegressionModel : public ParameterHandlingMixin { * properly sized PredictionDistributions. */ PredictionDistribution predict( - const std::vector &features) const { - assert(has_been_fit_); + const std::vector &features) const { + assert(model_fit_); PredictionDistribution preds = predict_(features); assert(static_cast(preds.mean.size()) == static_cast(features.size())); @@ -127,9 +133,9 @@ class RegressionModel : public ParameterHandlingMixin { * some models. */ PredictionDistribution fit_and_predict( - const std::vector &train_features, + const std::vector &train_features, const Eigen::VectorXd &train_targets, - const std::vector &test_features) { + const std::vector &test_features) { // Fit using the training data, then predict with the test. fit(train_features, train_targets); return predict(test_features); @@ -140,24 +146,31 @@ class RegressionModel : public ParameterHandlingMixin { * in a RegressionFold struct */ PredictionDistribution fit_and_predict( - const RegressionFold &fold) { + const RegressionFold &fold) { return fit_and_predict(fold.train.features, fold.train.targets, fold.test.features); } + virtual std::string get_name() const = 0; + protected: - virtual void fit_(const std::vector &features, - const Eigen::VectorXd &targets) = 0; + /* + * These methods are required from an implmenting class, notice that + * the methods are marked `const`. Anything that needs to be stored + * in the model should be returned in the `ModelFit` type. + */ + virtual ModelFit fit_(const std::vector &features, + const Eigen::VectorXd &targets) const = 0; virtual PredictionDistribution predict_( - const std::vector &features) const = 0; + const std::vector &features) const = 0; - bool has_been_fit_ = false; + optional model_fit_; }; -template +template using RegressionModelCreator = - std::function>()>; + std::function>()>; } #endif diff --git a/albatross/core/parameter_handling_mixin.h b/albatross/core/parameter_handling_mixin.h index b05396b8..14714ace 100644 --- a/albatross/core/parameter_handling_mixin.h +++ b/albatross/core/parameter_handling_mixin.h @@ -13,7 +13,6 @@ #ifndef ALBATROSS_CORE_PARAMETER_HANDLING_MIXIN_H #define ALBATROSS_CORE_PARAMETER_HANDLING_MIXIN_H -#include #include #include #include @@ -34,50 +33,6 @@ class ParameterHandlingMixin { virtual ~ParameterHandlingMixin(){}; - virtual std::string get_name() const = 0; - - YAML::Node to_yaml() const { - YAML::Node yaml_model; - yaml_model[keys::YAML_MODEL_NAME] = get_name(); - - YAML::Node yaml_params; - for (const auto &pair : get_params()) { - yaml_params[pair.first] = pair.second; - } - yaml_model[keys::YAML_MODEL_PARAMS] = yaml_params; - return yaml_model; - } - - std::string to_string() const { return YAML::Dump(to_yaml()); } - - void to_file(const std::string &path) const { - std::ofstream output_file; - output_file.open(path); - output_file << to_string(); - output_file.close(); - } - - void from_string(const std::string &serialized_string) { - // Load the YAML config file - const YAML::Node yaml_params = YAML::Load(serialized_string); - from_yaml(yaml_params); - } - - void from_yaml(const YAML::Node &yaml_input) { - YAML::Node yaml_params = yaml_input; - if (YAML::Node model_name = yaml_params[keys::YAML_MODEL_NAME]) { - assert(model_name.as() == get_name()); - yaml_params = yaml_params[keys::YAML_MODEL_PARAMS].as(); - } - - ParameterStore params; - for (YAML::const_iterator it = yaml_params.begin(); it != yaml_params.end(); - ++it) { - params[it->first.as()] = it->second.as(); - } - set_params(params); - } - /* * Provides a safe interface to the parameter values */ @@ -98,10 +53,9 @@ class ParameterHandlingMixin { * Prints out a set of parameters in a way that is both * readable and can be easily copy/pasted into code. */ - std::string pretty_params() { + std::string to_string() const { std::stringstream ss; - ss << "name = " << get_name() << std::endl; - ss << "params = {" << std::endl; + ss << "{" << std::endl; for (const auto &pair : get_params()) { ss << " {\"" << pair.first << "\", " << pair.second << "}," << std::endl; @@ -152,6 +106,7 @@ class ParameterHandlingMixin { protected: ParameterStore params_; }; + } #endif diff --git a/albatross/covariance_functions/covariance_functions.h b/albatross/covariance_functions/covariance_functions.h index 3c6300c8..ad80637e 100644 --- a/albatross/covariance_functions/covariance_functions.h +++ b/albatross/covariance_functions/covariance_functions.h @@ -68,25 +68,14 @@ struct CovarianceFunction { */ inline auto get_name() const { return covariance_term.get_name(); }; - inline auto to_string() const { return covariance_term.to_string(); }; inline auto get_params() const { return covariance_term.get_params(); }; - inline auto to_yaml() const { return covariance_term.to_yaml(); }; - inline auto to_file(const std::string &path) const { - return covariance_term.to_file(path); - }; - inline auto from_string(const std::string &serialized_string) { - return covariance_term.from_string(serialized_string); - }; - inline auto from_yaml(const YAML::Node &yaml_input) { - return covariance_term.from_yaml(yaml_input); - } inline auto set_params(const ParameterStore ¶ms) { return covariance_term.set_params(params); }; inline auto set_param(const ParameterKey &key, const ParameterValue &value) { return covariance_term.set_param(key, value); }; - inline auto pretty_params() const { return covariance_term.pretty_params(); }; + inline auto to_string() const { return covariance_term.to_string(); }; inline auto get_params_as_vector() const { return covariance_term.get_params_as_vector(); }; diff --git a/albatross/covariance_functions/covariance_term.h b/albatross/covariance_functions/covariance_term.h index 5f65a522..19b879b0 100644 --- a/albatross/covariance_functions/covariance_term.h +++ b/albatross/covariance_functions/covariance_term.h @@ -29,6 +29,8 @@ class CovarianceTerm : public ParameterHandlingMixin { public: CovarianceTerm() : ParameterHandlingMixin(){}; virtual ~CovarianceTerm(){}; + + virtual std::string get_name() const = 0; }; /* diff --git a/albatross/covariance_functions/distance_metrics.h b/albatross/covariance_functions/distance_metrics.h index ea37df07..6d1f10d0 100644 --- a/albatross/covariance_functions/distance_metrics.h +++ b/albatross/covariance_functions/distance_metrics.h @@ -23,6 +23,7 @@ class DistanceMetric : public ParameterHandlingMixin { DistanceMetric(){}; virtual ~DistanceMetric(){}; + virtual std::string get_name() const = 0; protected: }; diff --git a/albatross/evaluate.h b/albatross/evaluate.h index 023522df..8247be3a 100644 --- a/albatross/evaluate.h +++ b/albatross/evaluate.h @@ -54,12 +54,12 @@ static inline double standard_deviation(const PredictionDistribution& prediction * test cases. This function takes a map from FoldName to * FoldIndices and a dataset and creates the resulting folds. */ -template -static inline std::vector> folds_from_fold_indexer( - const RegressionDataset& dataset, const FoldIndexer& groups) { +template +static inline std::vector> folds_from_fold_indexer( + const RegressionDataset& dataset, const FoldIndexer& groups) { // For a dataset with n features, we'll have n folds. const s32 n = static_cast(dataset.features.size()); - std::vector> folds; + std::vector> folds; // For each fold, partition into train and test sets. for (const auto& pair : groups) { // These get exposed inside the returned RegressionFold and because @@ -69,9 +69,9 @@ static inline std::vector> folds_from_fold_indexer( const FoldIndices indices(pair.second); const s32 k = static_cast(indices.size()); - std::vector train_features(static_cast(n - k)); + std::vector train_features(static_cast(n - k)); Eigen::VectorXd train_targets(n - k); - std::vector test_features(static_cast(k)); + std::vector test_features(static_cast(k)); Eigen::VectorXd test_targets(k); s32 train_cnt = 0; @@ -95,18 +95,18 @@ static inline std::vector> folds_from_fold_indexer( } assert(test_cnt == k); assert(train_cnt == n - k); - const RegressionDataset train_split(train_features, + const RegressionDataset train_split(train_features, train_targets); - const RegressionDataset test_split(test_features, + const RegressionDataset test_split(test_features, test_targets); - folds.push_back(RegressionFold(train_split, test_split, + folds.push_back(RegressionFold(train_split, test_split, group_name, indices)); } return folds; } -template -static inline FoldIndexer leave_one_out_indexer(const RegressionDataset& dataset) { +template +static inline FoldIndexer leave_one_out_indexer(const RegressionDataset& dataset) { FoldIndexer groups; for (s32 i = 0; i < static_cast(dataset.features.size()); i++) { FoldName group_name = std::to_string(i); @@ -119,10 +119,10 @@ static inline FoldIndexer leave_one_out_indexer(const RegressionDataset +template static inline FoldIndexer leave_one_group_out_indexer( - const RegressionDataset& dataset, - const std::function& get_group_name) { + const RegressionDataset& dataset, + const std::function& get_group_name) { FoldIndexer groups; for (s32 i = 0; i < static_cast(dataset.features.size()); i++) { const std::string k = @@ -146,24 +146,24 @@ static inline FoldIndexer leave_one_group_out_indexer( * Generates cross validation folds which represent leave one out * cross validation. */ -template -static inline std::vector> leave_one_out( - const RegressionDataset& dataset) { - return folds_from_fold_indexer( - dataset, leave_one_out_indexer(dataset)); +template +static inline std::vector> leave_one_out( + const RegressionDataset& dataset) { + return folds_from_fold_indexer( + dataset, leave_one_out_indexer(dataset)); } /* - * Uses a `get_group_name` function to bucket each Predictor into + * Uses a `get_group_name` function to bucket each FeatureType into * a group, then holds out one group at a time. */ -template -static inline std::vector> leave_one_group_out( - const RegressionDataset& dataset, - const std::function& get_group_name) { +template +static inline std::vector> leave_one_group_out( + const RegressionDataset& dataset, + const std::function& get_group_name) { const FoldIndexer indexer = - leave_one_group_out_indexer(dataset, get_group_name); - return folds_from_fold_indexer(dataset, indexer); + leave_one_group_out_indexer(dataset, get_group_name); + return folds_from_fold_indexer(dataset, indexer); } /* @@ -172,10 +172,10 @@ static inline std::vector> leave_one_group_out( * for things like computing an EvaluationMetric for each fold, or assembling * all the predictions into a single cross validated PredictionDistribution. */ -template +template static inline std::vector cross_validated_predictions( - const std::vector>& folds, - RegressionModel* model) { + const std::vector>& folds, + RegressionModel* model) { // Iteratively make predictions and assemble the output vector std::vector predictions; for (std::size_t i = 0; i < folds.size(); i++) { @@ -190,9 +190,9 @@ static inline std::vector cross_validated_predictions( * Iterates over previously computed predictions for each fold and * returns a vector of scores for each fold. */ -template +template static inline Eigen::VectorXd compute_scores( - const std::vector>& folds, + const std::vector>& folds, const EvaluationMetric& metric, const std::vector& predictions) { // Create a vector of metrics, one for each fold. @@ -210,13 +210,13 @@ static inline Eigen::VectorXd compute_scores( * Iterates over each fold in a cross validation set and fits/predicts and * scores the fold, returning a vector of scores for each fold. */ -template +template static inline Eigen::VectorXd cross_validated_scores( - const std::vector>& folds, - const EvaluationMetric& metric, RegressionModel* model) { + const std::vector>& folds, + const EvaluationMetric& metric, RegressionModel* model) { // Create a vector of predictions. std::vector predictions = - cross_validated_predictions(folds, model); + cross_validated_predictions(folds, model); return compute_scores(folds, metric, predictions); } @@ -230,15 +230,15 @@ static inline Eigen::VectorXd cross_validated_scores( * a time, so the full dense prediction covariance is * unknown. */ -template +template static inline PredictionDistribution cross_validated_predict( - const std::vector>& folds, - RegressionModel* model) { + const std::vector>& folds, + RegressionModel* model) { // Get the cross validated predictions, note however that // depending on the type of folds, these predictions may // be shuffled. const std::vector predictions = - cross_validated_predictions(folds, model); + cross_validated_predictions(folds, model); // Create a new prediction mean that will eventually contain // the ordered concatenation of each fold's predictions. s32 n = 0; diff --git a/albatross/gp/gp.h b/albatross/models/gp.h similarity index 67% rename from albatross/gp/gp.h rename to albatross/models/gp.h index 7349748a..db1aa82f 100644 --- a/albatross/gp/gp.h +++ b/albatross/models/gp.h @@ -22,41 +22,47 @@ namespace albatross { +template +struct GaussianProcessFit { + std::vector train_features; + Eigen::VectorXd information; + Eigen::LDLT ldlt; +}; + template -class GaussianProcessRegression : public RegressionModel { +class GaussianProcessRegression : public RegressionModel> { public: GaussianProcessRegression(CovarianceFunction& covariance_function) - : covariance_function_(covariance_function), - train_features_(), - ldlt_(), - information_(){}; + : covariance_function_(covariance_function) {}; ~GaussianProcessRegression(){}; std::string get_name() const override { return "gaussian_process_regression"; }; - void fit_(const std::vector& features, - const Eigen::VectorXd& targets) override { - train_features_ = features; - Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, train_features_); + GaussianProcessFit fit_(const std::vector& features, + const Eigen::VectorXd& targets) const override { + GaussianProcessFit model_fit; + Eigen::MatrixXd cov = symmetric_covariance(covariance_function_, features); // Precompute the information vector which is all we need in // order to make predictions. - ldlt_ = cov.ldlt(); - information_ = ldlt_.solve(targets); + model_fit.train_features = features; + model_fit.information = cov.ldlt().solve(targets); + model_fit.ldlt = cov.ldlt(); + return model_fit; } PredictionDistribution predict_( const std::vector& features) const override { const auto cross_cov = asymmetric_covariance(covariance_function_, - features, train_features_); - + features, + this->model_fit_->train_features); // Then we can use the information vector to determine the posterior - const Eigen::VectorXd pred = cross_cov * information_; + const Eigen::VectorXd pred = cross_cov * this->model_fit_->information; Eigen::MatrixXd pred_cov = symmetric_covariance(covariance_function_, features); - pred_cov -= cross_cov * ldlt_.solve(cross_cov.transpose()); + pred_cov -= cross_cov * this->model_fit_->ldlt.solve(cross_cov.transpose()); return PredictionDistribution(pred, pred_cov); } @@ -64,13 +70,14 @@ class GaussianProcessRegression : public RegressionModel { template PredictionDistribution inspect( const std::vector& features) const { - assert(this->has_been_fit_); + assert(this->model_fit_); const auto cross_cov = asymmetric_covariance(covariance_function_, - features, train_features_); + features, + this->model_fit_->train_features); // Then we can use the information vector to determine the posterior - const Eigen::VectorXd pred = cross_cov * information_; + const Eigen::VectorXd pred = cross_cov * this->model_fit_->information; Eigen::MatrixXd pred_cov = symmetric_covariance(covariance_function_, features); - pred_cov -= cross_cov * ldlt_.solve(cross_cov.transpose()); + pred_cov -= cross_cov * this->model_fit_->ldlt.solve(cross_cov.transpose()); assert(static_cast(pred.size()) == static_cast(features.size())); return PredictionDistribution(pred, pred_cov); @@ -87,9 +94,6 @@ class GaussianProcessRegression : public RegressionModel { private: CovarianceFunction covariance_function_; - std::vector train_features_; - Eigen::LDLT ldlt_; - Eigen::VectorXd information_; }; template diff --git a/albatross/models/linear_regression.h b/albatross/models/linear_regression.h new file mode 100644 index 00000000..db8b815c --- /dev/null +++ b/albatross/models/linear_regression.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2018 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_MODELS_LINEAR_REGRESSION_H +#define ALBATROSS_MODELS_LINEAR_REGRESSION_H + +/* + * Here we define a LinearRegression model which is less because + * it'll be super useful on its own (albatross is probably overkill + * if all you need is linear regression) but serves as an example of + * extending the `RegressionModel` for non Gaussian process models. + */ + +#include +#include +#include +#include +#include + +namespace albatross { + +struct LinearRegressionCoefs { + Eigen::VectorXd coefs; +}; + +class LinearRegression : public RegressionModel { + public: + LinearRegression() {}; + std::string get_name() const { return "linear_regression"; }; + + private: + + LinearRegressionCoefs fit_(const std::vector &features, + const Eigen::VectorXd &targets) const override { + int m = static_cast(features.size()); + int n = static_cast(features[0].size()); + + Eigen::MatrixXd A(m, n); + for (int i = 0; i < m; i++) { + A.row(i) = features[static_cast(i)]; + } + + Eigen::VectorXd rhs = A.transpose() * targets; + LinearRegressionCoefs model_fit; + model_fit.coefs = (A.transpose() * A).ldlt().solve(rhs); + return model_fit; + } + + PredictionDistribution predict_(const std::vector &features) const { + int n = static_cast(features.size()); + Eigen::VectorXd predictions(n); + + for (s32 i = 0; i < n; i++) { + predictions(i) = features[static_cast(i)].dot(model_fit_->coefs); + } + + return PredictionDistribution(predictions); + } + + Eigen::VectorXd coefs_; +}; + +} + +#endif diff --git a/cmake/ClangTools.cmake b/cmake/ClangTools.cmake deleted file mode 100644 index dac5375a..00000000 --- a/cmake/ClangTools.cmake +++ /dev/null @@ -1,118 +0,0 @@ -# CMake script searches for clang-tidy and clang-format and sets the following -# variables: -# -# CLANG_TIDY_PATH : Fully-qualified path to the clang-tidy executable -# CLANG_FORMAT_PATH : Fully-qualified path to the clang-format executable -# -# Additionally defines the following targets: -# -# clang-tidy-all : Run clang-tidy over all files. -# clang-tidy-diff : Run clang-tidy over all files differing from master. -# clang-format-all : Run clang-format over all files. -# clang-format-diff : Run clang-format over all files differing from master. -# cppcheck-all : Run cppcheck over all files. - -# Do not use clang tooling when cross compiling. -if(CMAKE_CROSSCOMPILING) - return() -endif(CMAKE_CROSSCOMPILING) - -################################################################################ -# Search for tools. -################################################################################ - -# Check for Clang Tidy -set(CLANG_TIDY_PATH "NOTSET" CACHE STRING "Absolute path to the clang-tidy executable") -if("${CLANG_TIDY_PATH}" STREQUAL "NOTSET") - find_program(CLANG_TIDY NAMES - clang-tidy39 clang-tidy-3.9 - clang-tidy38 clang-tidy-3.8 - clang-tidy37 clang-tidy-3.7 - clang-tidy36 clang-tidy-3.6 - clang-tidy35 clang-tidy-3.5 - clang-tidy34 clang-tidy-3.4 - clang-tidy) - if("${CLANG_TIDY}" STREQUAL "CLANG_TIDY-NOTFOUND") - message(WARNING "Could not find 'clang-tidy' please set CLANG_TIDY_PATH:STRING") - else() - set(CLANG_TIDY_PATH ${CLANG_TIDY}) - message(STATUS "Found: ${CLANG_TIDY_PATH}") - endif() -else() - if(NOT EXISTS ${CLANG_TIDY_PATH}) - message(WARNING "Could not find 'clang-tidy': ${CLANG_TIDY_PATH}") - else() - message(STATUS "Found: ${CLANG_TIDY_PATH}") - endif() -endif() - -# Check for Clang format -set(CLANG_FORMAT_PATH "NOTSET" CACHE STRING "Absolute path to the clang-format executable") -if("${CLANG_FORMAT_PATH}" STREQUAL "NOTSET") - find_program(CLANG_FORMAT NAMES - clang-format39 clang-format-3.9 - clang-format38 clang-format-3.8 - clang-format37 clang-format-3.7 - clang-format36 clang-format-3.6 - clang-format35 clang-format-3.5 - clang-format34 clang-format-3.4 - clang-format) - if("${CLANG_FORMAT}" STREQUAL "CLANG_FORMAT-NOTFOUND") - message(WARNING "Could not find 'clang-format' please set CLANG_FORMAT_PATH:STRING") - else() - set(CLANG_FORMAT_PATH ${CLANG_FORMAT}) - message(STATUS "Found: ${CLANG_FORMAT_PATH}") - endif() -else() - if(NOT EXISTS ${CLANG_FORMAT_PATH}) - message(WARNING "Could not find 'clang-format': ${CLANG_FORMAT_PATH}") - else() - message(STATUS "Found: ${CLANG_FORMAT_PATH}") - endif() -endif() - -################################################################################ -# Conditionally add targets. -################################################################################ - -if (EXISTS ${CLANG_TIDY_PATH}) - # Tidy all files .cc files (and their headers) in project - # Second stage of pipeline makes an absolute path for each file. Note that - # git ls-files and diff-tree behave differently in prepending the file path. - add_custom_target(clang-tidy-all - COMMAND git ls-files -- '../src/*.cc' - | sed 's/^...//' | sed 's\#\^\#${CMAKE_SOURCE_DIR}/\#' - | xargs -P 2 -I file "${CLANG_TIDY_PATH}" - -export-fixes="${CMAKE_SOURCE_DIR}/fixes.yaml" file -- -stdlib=libc++ -std=c++14 "-I${CMAKE_SOURCE_DIR}/include/" "-isystem${CMAKE_SOURCE_DIR}/third_party/eigen/" "-I${CMAKE_SOURCE_DIR}/libfec/include/" "-isystem${CMAKE_SOURCE_DIR}/third_party/Optional" "-isystem${CMAKE_SOURCE_DIR}/third_party/variant/include" "-I${CMAKE_SOURCE_DIR}/refactor/common" "-I${CMAKE_SOURCE_DIR}/include/libswiftnav" "-isystem${CMAKE_SOURCE_DIR}/third_party/json/src" - ) - # Lint *.cc files that differ from master, and are not listed as being - # DELETED. - add_custom_target(clang-tidy-diff - COMMAND git diff --diff-filter=ACMRTUXB --name-only master -- '../src/*.cc' - | sed 's\#\^\#${CMAKE_SOURCE_DIR}/\#' - | xargs -P 2 -I file "${CLANG_TIDY_PATH}" file -- -std=c++14 -stdlib=libc++ - "-I${CMAKE_SOURCE_DIR}/include/" "-isystem${CMAKE_SOURCE_DIR}/third_party/eigen/" "-I${CMAKE_SOURCE_DIR}/libfec/include/" "-isystem${CMAKE_SOURCE_DIR}/third_party/Optional" "-isystem${CMAKE_SOURCE_DIR}/third_party/variant/include" "-I${CMAKE_SOURCE_DIR}/refactor/common" "-I${CMAKE_SOURCE_DIR}/include/libswiftnav" "-isystem${CMAKE_SOURCE_DIR}/third_party/json/src" - ) -endif() -if (EXISTS ${CLANG_FORMAT_PATH}) - # Format all files .cc files (and their headers) in project - add_custom_target(clang-format-all - COMMAND git ls-files -- '../*.cc' '../include/libswiftnav/pvt_engine/*.h' '../test_pvt_engine/*.h' - | sed 's/^...//' | sed 's\#\^\#${CMAKE_SOURCE_DIR}/\#' - | xargs "${CLANG_FORMAT_PATH}" -i - ) - # In-place format *.cc files that differ from master, and are not listed as - # being DELETED. - add_custom_target(clang-format-diff - COMMAND git diff --diff-filter=ACMRTUXB --name-only master -- '../*.cc' '../include/libswiftnav/pvt_engine/*.h' '../test_pvt_engine/*.h' - | sed 's\#\^\#${CMAKE_SOURCE_DIR}/\#' - | xargs "${CLANG_FORMAT_PATH}" -i - ) -endif() -if (EXISTS ${CPPCHECK_PATH}) - add_custom_target(cppcheck-all - COMMAND git ls-files -- '../*.cc' '../include/libswiftnav/pvt_engine/*.h' '../test_pvt_engine/*.h' - | ${CPPCHECK_PATH} --enable=all --std=c++14 -I../include -I../test_pvt_engine/include -isystem../third_party/eigen -isystem../third_party/Optional -isystem../third_party/googletest/googletest/include -isystem../third_party/variant/include -isystem../third_party/json/src -q --file-list=- - ) -endif() - diff --git a/cmake/FindYaml-Cpp.cmake b/cmake/FindYaml-Cpp.cmake deleted file mode 100644 index 8cb71fb6..00000000 --- a/cmake/FindYaml-Cpp.cmake +++ /dev/null @@ -1,50 +0,0 @@ -cmake_minimum_required(VERSION 2.8) - -# This brings in the external project support in cmake -include(ExternalProject) - -set(SWIFT_LIBYAML_CMAKE_CXX_FLAGS "") -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(SWIFT_LIBYAML_CMAKE_CXX_FLAGS "-stdlib=libc++") -endif() - -# This adds yaml-cpp as an external project with the specified parameters. -ExternalProject_Add(libyaml-cpp - # We use SOURCE_DIR because we use version control to track the - # version of this library instead of using the build tool - SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/yaml-cpp - # We don't want to install this globally; we just want to use it in - # place. - INSTALL_COMMAND cmake -E echo "Not installing yaml-cpp globally." - # This determines the subdirectory of the build directory in which - # yaml-cpp gets built. - PREFIX yaml-cpp - # This simply passes down cmake arguments, which allows us to define - # yaml-cpp-specific cmake flags as arguments to the toplevel cmake - # invocation. - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${SWIFT_LIBYAML_CMAKE_CXX_FLAGS} -DYAML_CPP_BUILD_TOOLS=OFF -DYAML_CPP_BUILD_CONTRIB=OFF ${CMAKE_ARGS}) - -# This pulls out the variables `source_dir` and `binary_dir` from the -# yaml-cpp project, so we can refer to them below. -ExternalProject_Get_Property(libyaml-cpp source_dir binary_dir) - -# This tells later `target_link_libraries` commands about the yaml-cpp -# library. -add_library(yaml-cpp STATIC IMPORTED GLOBAL) - -# This tells where the static yaml-cpp binary will end up. I have no -# idea how to control this and just found it with `locate`. -set_property(TARGET yaml-cpp - PROPERTY IMPORTED_LOCATION "${binary_dir}/libyaml-cpp.a") - -# This makes the yaml-cpp library depend on the yaml-cpp external -# project, so that when you ask to link against yaml-cpp, the external -# project will get built. -add_dependencies(yaml-cpp libyaml-cpp) - -# This tells where the yaml-cpp headers generated during the build -# process will end up. I have no idea how to control this and just -# found it with `locate`. Note that any targets specified after this -# file fragment is included will now include yaml-cpp headers as part of -# their compile commands. -include_directories(SYSTEM "${binary_dir}") diff --git a/cmake/SanitizeTargets.cmake b/cmake/SanitizeTargets.cmake deleted file mode 100644 index 270d0569..00000000 --- a/cmake/SanitizeTargets.cmake +++ /dev/null @@ -1,87 +0,0 @@ -# Runtime analysis using Clang sanitization flags. - -option(ENABLE_SANITIZERS "Enable sanitizers." OFF) -option(ANALYZE_ADDRESS "Enable address sanitizer." OFF) -option(ANALYZE_LEAK "Enable leak sanitizer." OFF) -option(ANALYZE_MEMORY "Enable memory sanitizer." OFF) -option(ANALYZE_THREAD "Enable thread sanitizer." OFF) -option(ANALYZE_UNDEFINED "Enable undefined behavior sanitizer." OFF) -option(ANALYZE_DATAFLOW "Enable dataflow sanitizer." OFF) - -if (ENABLE_SANITIZERS) - # Some of these options can't be used simultaneously. - # - if (ANALYZE_ADDRESS AND ANALYZE_MEMORY ) - message(WARNING "Can't -fsanitize address/memory simultaneously.") - endif () - if (ANALYZE_MEMORY AND ANALYZE_THREAD ) - message(WARNING "Can't -fsanitize memory/thread simultaneously.") - endif () - if (ANALYZE_ADDRESS AND ANALYZE_THREAD ) - message(WARNING "Can't -fsanitize address/thread simultaneously.") - endif () - # Instantiate C/C++ and C++-specific flags. - # - set(SANITIZE_FLAGS "") - # Dispatch sanitizer options based on compiler. - # - message(STATUS "Enabling runtime analysis sanitizers!") - if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # See http://clang.llvm.org/docs and - # http://clang.llvm.org/docs/UsersManual.html#controlling-code-generation - # for more details. - set(SANITIZE_FLAGS "-g -O0 -fno-omit-frame-pointer") - if (ANALYZE_ADDRESS) - message(STATUS "Enabling address sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=address") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fno-optimize-sibling-calls") - elseif (ANALYZE_MEMORY) - message(STATUS "Enabling memory sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=memory") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fno-optimize-sibling-calls") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize-memory-track-origins=2") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize-memory-use-after-dtor") - elseif (ANALYZE_THREAD) - message(STATUS "Enabling thread sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=thread") - endif () - if (ANALYZE_LEAK) - message(STATUS "Enabling leak sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=leak") - endif () - if (ANALYZE_UNDEFINED) - message(STATUS "Enabling undefined behavior sanitizer.") - # The `vptr` sanitizer won't work with `-fno-rtti`. - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=undefined -fno-sanitize=vptr") - endif () - elseif (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.8) - # See: https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html - # - # We seem to need `-fuse-ld=gold` on Travis. - set(SANITIZE_FLAGS "-g -O0") - if (ANALYZE_ADDRESS) - message(STATUS "Enabling address sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=address") - elseif (ANALYZE_MEMORY) - message(STATUS "Enabling memory sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=memory") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize-memory-track-origins=2") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize-memory-use-after-dtor") - elseif (ANALYZE_THREAD) - message(STATUS "Enabling thread sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=thread") - elseif (ANALYZE_LEAK) - message(STATUS "Enabling leak sanitizer.") - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=leak") - endif () - if (ANALYZE_UNDEFINED) - message(STATUS "Enabling undefined behavior sanitizer.") - # The `vptr` sanitizer won't work with `-fno-rtti`. - set(SANITIZE_FLAGS "${SANITIZE_FLAGS} -fsanitize=undefined -fno-sanitize=vptr") - endif () - else () - message(FATAL_ERROR "Oh noes! We don't support your compiler.") - endif () - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SANITIZE_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZE_FLAGS}") -endif () diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 00552f16..f54ad838 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -12,7 +12,7 @@ add_dependencies(sinc_example albatross ) -target_link_libraries(sinc_example m gflags yaml-cpp pthread) +target_link_libraries(sinc_example m gflags pthread) add_custom_target( run_sinc_example ALL @@ -34,7 +34,7 @@ add_dependencies(inspection_example albatross ) -target_link_libraries(inspection_example m gflags yaml-cpp pthread) +target_link_libraries(inspection_example m gflags pthread) add_custom_target( run_inspection_example ALL diff --git a/examples/example_utils.h b/examples/example_utils.h index 4ce915f9..ea95be27 100644 --- a/examples/example_utils.h +++ b/examples/example_utils.h @@ -20,7 +20,7 @@ #include #include "csv.h" -#include "gp/gp.h" +#include "models/gp.h" #include "core/model.h" #include "covariance_functions/covariance_functions.h" @@ -174,8 +174,9 @@ void maybe_create_training_data(std::string input_path, } } +template void write_predictions_to_csv(const std::string output_path, - const albatross::RegressionModel &model, + const albatross::RegressionModel &model, const double low, const double high) { std::ofstream output; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 663b7734..858d7fb8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,7 @@ test_core_model.cc test_covariance_functions.cc test_evaluate.cc test_map_utils.cc +test_serialize.cc test_parameter_handling_mixin.cc ) @@ -16,7 +17,7 @@ add_dependencies(albatross_unit_tests albatross ) -target_link_libraries(albatross_unit_tests m gtest gtest_main pthread gflags yaml-cpp) +target_link_libraries(albatross_unit_tests m gtest gtest_main pthread gflags) add_custom_target( run_albatross_unit_tests ALL diff --git a/tests/test_core_model.cc b/tests/test_core_model.cc index d3b20110..6ccea67f 100644 --- a/tests/test_core_model.cc +++ b/tests/test_core_model.cc @@ -12,54 +12,9 @@ #include #include "core/model.h" +#include "test_utils.h" namespace albatross { - -// A simple predictor which is effectively just an integer. -struct MockPredictor { - int value; - MockPredictor(int v) : value(v){}; -}; - -/* - * A simple model which builds a map from MockPredict (aka, int) - * to a double value. - */ -class MockModel : public RegressionModel { - public: - MockModel() : train_data_(){}; - - std::string get_name() const { return "mock_model"; }; - - private: - // builds the map from int to value - void fit_(const std::vector &features, - const Eigen::VectorXd &targets) { - int n = static_cast(features.size()); - Eigen::VectorXd predictions(n); - - for (int i = 0; i < n; i++) { - train_data_[features[static_cast(i)].value] = targets[i]; - } - } - - // looks up the prediction in the map - PredictionDistribution predict_( - const std::vector &features) const { - int n = static_cast(features.size()); - Eigen::VectorXd predictions(n); - - for (int i = 0; i < n; i++) { - int index = features[static_cast(i)].value; - predictions[i] = train_data_.find(index)->second; - } - - return PredictionDistribution(predictions); - } - - std::map train_data_; -}; - /* * Simply makes sure that a BaseModel that should be able to * make perfect predictions compiles and runs as expected. @@ -74,9 +29,19 @@ TEST(test_base_model, test_base_model) { } MockModel m; - m.fit(features, targets); - PredictionDistribution predictions = m.predict(features); + RegressionDataset dataset(features, targets); + + auto model_fit_direct = m.fit(features, targets); + auto model_fit_dataset = m.fit(dataset); + // It shouldn't matter how we call fit. + EXPECT_EQ(model_fit_direct, model_fit_direct); + + // We shoudl be able to perfectly predict in this case. + PredictionDistribution predictions = m.predict(features); EXPECT_LT((predictions.mean - targets).norm(), 1e-10); } + + + } diff --git a/tests/test_evaluate.cc b/tests/test_evaluate.cc index 3a72af04..bc52d94b 100644 --- a/tests/test_evaluate.cc +++ b/tests/test_evaluate.cc @@ -16,84 +16,13 @@ #include #include #include "evaluate.h" +#include "models/linear_regression.h" -namespace albatross { - -class LinearRegression : public RegressionModel { - public: - LinearRegression() : params_(){}; - std::string get_name() const { return "linear_regression"; }; - - private: - // builds the map from int to value - void fit_(const std::vector &features, - const Eigen::VectorXd &targets) { - (void)targets; - s32 n = static_cast(features.size()); - - Eigen::VectorXd x(n); - for (s32 i = 0; i < n; i++) { - x[i] = features[static_cast(i)]; - } - - Eigen::MatrixXd A(n, 2); - A << Eigen::VectorXd::Ones(n), x; - - // use the normal equations to solve for the params - Eigen::VectorXd rhs = A.transpose() * targets; - params_ = (A.transpose() * A).ldlt().solve(rhs); - } - - // looks up the prediction in the map - PredictionDistribution predict_(const std::vector &features) const { - s32 n = static_cast(features.size()); - Eigen::VectorXd predictions(n); - - Eigen::VectorXd x(n); - for (s32 i = 0; i < n; i++) { - x[i] = features[static_cast(i)]; - } +#include "test_utils.h" - Eigen::MatrixXd A(n, 2); - A << Eigen::VectorXd::Ones(n), x; - - return PredictionDistribution(A * params_); - } - - Eigen::VectorXd params_; -}; - -class LinearModelTest : public ::testing::Test { - public: - LinearModelTest() : model_ptr_(), dataset_({}, {}) { - double a = 5.; - double b = 1.; - double sigma = 0.1; - - std::random_device rd{}; - std::mt19937 gen{rd()}; - gen.seed(3); - std::normal_distribution<> d{0., sigma}; - - s32 n = 10; - std::vector features(static_cast(n)); - Eigen::VectorXd targets(n); - - std::map hist{}; - for (s32 i = 0; i < n; i++) { - features[static_cast(i)] = static_cast(i); - targets[i] = a + b * features[static_cast(i)] + d(gen); - } - - model_ptr_ = std::make_unique(); - dataset_ = RegressionDataset(features, targets); - }; - - std::unique_ptr> model_ptr_; - RegressionDataset dataset_; -}; +namespace albatross { -TEST_F(LinearModelTest, test_leave_one_out) { +TEST_F(LinearRegressionTest, test_leave_one_out) { PredictionDistribution preds = model_ptr_->fit_and_predict( dataset_.features, dataset_.targets, dataset_.features); double in_sample_rmse = root_mean_square_error(preds, dataset_.targets); @@ -111,10 +40,10 @@ TEST_F(LinearModelTest, test_leave_one_out) { // Group values by interval, but return keys that once sorted won't be // in order -std::string group_by_interval(double x) { - if (x <= 3) { +std::string group_by_interval(Eigen::VectorXd x) { + if (x[1] <= 3) { return "2"; - } else if (x <= 6) { + } else if (x[1] <= 6) { return "3"; } else { return "1"; @@ -130,8 +59,8 @@ bool is_monotonic_increasing(Eigen::VectorXd &x) { return true; } -TEST_F(LinearModelTest, test_cross_validated_predict) { - const auto folds = leave_one_group_out(dataset_, group_by_interval); +TEST_F(LinearRegressionTest, test_cross_validated_predict) { + const auto folds = leave_one_group_out(dataset_, group_by_interval); PredictionDistribution preds = cross_validated_predict(folds, model_ptr_.get()); @@ -143,8 +72,8 @@ TEST_F(LinearModelTest, test_cross_validated_predict) { EXPECT_TRUE(is_monotonic_increasing(preds.mean)); } -TEST_F(LinearModelTest, test_leave_one_group_out) { - const auto folds = leave_one_group_out(dataset_, group_by_interval); +TEST_F(LinearRegressionTest, test_leave_one_group_out) { + const auto folds = leave_one_group_out(dataset_, group_by_interval); Eigen::VectorXd rmses = cross_validated_scores(folds, root_mean_square_error, model_ptr_.get()); diff --git a/tests/test_parameter_handling_mixin.cc b/tests/test_parameter_handling_mixin.cc index db8f6246..80d55b35 100644 --- a/tests/test_parameter_handling_mixin.cc +++ b/tests/test_parameter_handling_mixin.cc @@ -19,7 +19,6 @@ class MockParameterHandler : public ParameterHandlingMixin { public: MockParameterHandler(const ParameterStore ¶ms) : ParameterHandlingMixin(params){}; - std::string get_name() const { return "mock_parameter_handler"; }; }; class TestParameterHandler : public ParameterHandlingMixin { @@ -27,7 +26,6 @@ class TestParameterHandler : public ParameterHandlingMixin { TestParameterHandler() : ParameterHandlingMixin() { params_ = {{"A", 1.}, {"B", 2.}}; }; - std::string get_name() const { return "test_parameter_handler"; }; }; /* @@ -123,25 +121,35 @@ TEST(test_parameter_handler, test_get_set_from_vector) { * Tests to make sure we can serialize from one parameter handler to * another. */ -TEST(test_parameter_handler, test_serialization) { +TEST(test_parameter_handler, test_to_string) { const ParameterStore original = {{"2", 2.}, {"1", 1.}, {"3", 3.}}; const std::vector original_param_vector = {1., 2., 3.}; MockParameterHandler original_handler(original); - // Make another handler that starts with different parameters - MockParameterHandler new_handler({{"2", 4.}, {"1", 5.}, {"3", 6.}}); - // can serialize + // can print as string std::ostringstream os; os << original_handler.to_string() << std::endl; - - // can deserialize - new_handler.from_string(original_handler.to_string()); - - // deserialized has the same paremters - expect_params_equal(original, new_handler.get_params()); - - // And the same parameter vector (ie, order was preserved) - expect_parameter_vector_equal(original_param_vector, - new_handler.get_params_as_vector()); } + +///* +// * Tests to make sure we can serialize from one parameter handler to +// * another. +// */ +//TEST(test_parameter_handler, test_to_string) { +// const ParameterStore original = {{"2", 2.}, {"1", 1.}, {"3", 3.}}; +// const std::vector original_param_vector = {1., 2., 3.}; +// MockParameterHandler original_handler(original); +// // Make another handler that starts with different parameters +// MockParameterHandler new_handler({{"2", 4.}, {"1", 5.}, {"3", 6.}}); +// +// // can deserialize +// new_handler.from_string(original_handler.to_string()); +// +// // deserialized has the same paremters +// expect_params_equal(original, new_handler.get_params()); +// +// // And the same parameter vector (ie, order was preserved) +// expect_parameter_vector_equal(original_param_vector, +// new_handler.get_params_as_vector()); +//} } diff --git a/tests/test_utils.h b/tests/test_utils.h new file mode 100644 index 00000000..1a535453 --- /dev/null +++ b/tests/test_utils.h @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2018 Swift Navigation Inc. + * Contact: Swift Navigation + * + * This source is subject to the license found in the file 'LICENSE' which must + * be distributed together with this source. All other rights reserved. + * + * THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, + * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef ALBATROSS_TESTS_TEST_UTILS_H +#define ALBATROSS_TESTS_TEST_UTILS_H + +#include +#include +#include +#include +#include +#include "evaluate.h" +#include "models/linear_regression.h" + +namespace albatross { + +// A simple predictor which is effectively just an integer. +struct MockPredictor { + int value; + MockPredictor(int v) : value(v){}; +}; + +struct MockFit { + std::map train_data; + + bool operator == (const MockFit &other) const { + return train_data == other.train_data; + }; +}; + +/* + * A simple model which builds a map from MockPredict (aka, int) + * to a double value. + */ +class MockModel : public RegressionModel { + public: + MockModel() {}; + + std::string get_name() const override{ return "mock_model"; }; + + private: + // builds the map from int to value + MockFit fit_(const std::vector &features, + const Eigen::VectorXd &targets) const override { + int n = static_cast(features.size()); + Eigen::VectorXd predictions(n); + + MockFit model_fit; + for (int i = 0; i < n; i++) { + model_fit.train_data[features[static_cast(i)].value] = targets[i]; + } + return model_fit; + } + + // looks up the prediction in the map + PredictionDistribution predict_( + const std::vector &features) const { + int n = static_cast(features.size()); + Eigen::VectorXd predictions(n); + + for (int i = 0; i < n; i++) { + int index = features[static_cast(i)].value; + predictions[i] = fit_storage_->train_data.find(index)->second; + } + + return PredictionDistribution(predictions); + } +}; + +static inline RegressionDataset make_toy_linear_regression_data(const double a = 5., + const double b = 1., + const double sigma = 0.1) { + + std::random_device rd{}; + std::mt19937 gen{rd()}; + gen.seed(3); + std::normal_distribution<> d{0., sigma}; + + s32 n = 10; + std::vector features; + Eigen::VectorXd targets(n); + Eigen::VectorXd coefs(2); + coefs << a, b; + + for (s32 i = 0; i < n; i++) { + double x = static_cast(i); + auto feature = Eigen::VectorXd(2); + feature << 1., x; + features.push_back(feature); + targets[i] = feature.dot(coefs) + d(gen); + } + + return RegressionDataset(features, targets); +} + +class LinearRegressionTest : public ::testing::Test { + public: + LinearRegressionTest() : model_ptr_(), dataset_({}, {}) { + + model_ptr_ = std::make_unique(); + dataset_ = make_toy_linear_regression_data(); + }; + + std::unique_ptr model_ptr_; + RegressionDataset dataset_; +}; + +} + +#endif diff --git a/third_party/Optional b/third_party/Optional new file mode 160000 index 00000000..f27e7908 --- /dev/null +++ b/third_party/Optional @@ -0,0 +1 @@ +Subproject commit f27e79084a9176672ed1eae50b3397fa8035d50d diff --git a/third_party/cereal b/third_party/cereal new file mode 160000 index 00000000..51cbda5f --- /dev/null +++ b/third_party/cereal @@ -0,0 +1 @@ +Subproject commit 51cbda5f30e56c801c07fe3d3aba5d7fb9e6cca4 diff --git a/third_party/rapidcheck b/third_party/rapidcheck deleted file mode 160000 index 10fc0cba..00000000 --- a/third_party/rapidcheck +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 10fc0cbaa46a765d8134e99995de9c81c048a331 diff --git a/third_party/yaml-cpp b/third_party/yaml-cpp deleted file mode 160000 index 562aefc1..00000000 --- a/third_party/yaml-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 562aefc114938e388457e6a531ed7b54d9dc1b62