From db059c44510e8abca950fc96e9f194284d17a59a Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 9 Mar 2018 18:07:16 +0100 Subject: [PATCH 001/150] Add a first implementation of KDE Just a preview of Kernel Density Estimation algorithm implemented with dual trees --- src/mlpack/methods/CMakeLists.txt | 1 + src/mlpack/methods/kde/CMakeLists.txt | 20 ++++ src/mlpack/methods/kde/kde.hpp | 65 +++++++++++ src/mlpack/methods/kde/kde_impl.hpp | 108 ++++++++++++++++++ src/mlpack/methods/kde/kde_main.cpp | 69 ++++++++++++ src/mlpack/methods/kde/kde_rules.hpp | 102 +++++++++++++++++ src/mlpack/methods/kde/kde_rules_impl.hpp | 129 ++++++++++++++++++++++ 7 files changed, 494 insertions(+) create mode 100644 src/mlpack/methods/kde/CMakeLists.txt create mode 100644 src/mlpack/methods/kde/kde.hpp create mode 100644 src/mlpack/methods/kde/kde_impl.hpp create mode 100644 src/mlpack/methods/kde/kde_main.cpp create mode 100644 src/mlpack/methods/kde/kde_rules.hpp create mode 100644 src/mlpack/methods/kde/kde_rules_impl.hpp diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index 4e6fc3df987..739557bd95a 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -66,6 +66,7 @@ set(DIRS sparse_autoencoder sparse_coding sparse_svm + kde ) foreach(dir ${DIRS}) diff --git a/src/mlpack/methods/kde/CMakeLists.txt b/src/mlpack/methods/kde/CMakeLists.txt new file mode 100644 index 00000000000..268fb55b8dc --- /dev/null +++ b/src/mlpack/methods/kde/CMakeLists.txt @@ -0,0 +1,20 @@ +# Define the files we need to compile. +# Anything not in this list will not be compiled into mlpack. +set(SOURCES + kde.hpp + kde_impl.hpp + kde_rules.hpp + kde_rules_impl.hpp +) + +# Add directory name to sources. +set(DIR_SRCS) +foreach(file ${SOURCES}) + set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file}) +endforeach() +# Append sources (with directory name) to list of all mlpack sources (used at +# the parent scope). +set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE) + +add_cli_executable(kde) +add_python_binding(kde) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp new file mode 100644 index 00000000000..8453c9df0cd --- /dev/null +++ b/src/mlpack/methods/kde/kde.hpp @@ -0,0 +1,65 @@ +/** + * @file kde.hpp + * @author Roberto Hueso (robertohueso96@gmail.com) + * + * Kernel Density Estimation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#ifndef MLPACK_METHODS_KDE_KDE_HPP +#define MLPACK_METHODS_KDE_KDE_HPP + +#include +#include +#include + +namespace mlpack { +namespace kde /** Kernel Density Estimation. */ { + +template class TreeType = tree::KDTree> +class KDE +{ + public: + + typedef TreeType Tree; + + KDE(const MatType& referenceSet, + const double error = 1e-8, + const double bandwidth = 1.0, + const size_t leafSize = 2); + + ~KDE(); + + void Evaluate(const MatType& query, arma::vec& estimations); + + private: + + const MatType& referenceSet; + + KernelType* kernel; + + Tree* referenceTree; + + double error; + + double bandwidth; + + int leafSize; +}; + +} // namespace kde +} // namespace mlpack + +// Include implementation. +#include "kde_impl.hpp" + +#endif // MLPACK_METHODS_KDE_KDE_HPP diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp new file mode 100644 index 00000000000..ee59b9c121a --- /dev/null +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -0,0 +1,108 @@ +/** + * @file kde_impl.hpp + * @author Roberto Hueso (robertohueso96@gmail.com) + * + * Implementation of Kernel Density Estimation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#include "kde.hpp" +#include "kde_rules.hpp" +#include + +namespace mlpack { +namespace kde { + +template class TreeType> +KDE:: +KDE(const MatType& referenceSet, + const double error, + const double bandwidth, + const size_t leafSize) : + referenceSet(referenceSet) +{ + this->referenceTree = new Tree(referenceSet, leafSize); + this->kernel = new KernelType(bandwidth); + this->error = error; + this->bandwidth = bandwidth; + this->leafSize = leafSize; +} + +template class TreeType> +KDE::~KDE() +{ + delete this->referenceTree; + delete this->kernel; +} + +template class TreeType> +void KDE:: +Evaluate(const MatType& query, arma::vec& estimations) +{ + Tree* queryTree = new Tree(query, leafSize); + MetricType metric = MetricType(); + + typedef KDERules RuleType; + RuleType rules = RuleType(this->referenceSet, + query, + estimations, + error, + metric, + *kernel); + // SingleTreeTraverser + /* + typename Tree::template SingleTreeTraverser traverser(rules); + for(size_t i = 0; i < query.n_cols; ++i) + traverser.Traverse(i, *referenceTree); + */ + + //DualTreeTraverser + typename Tree::template DualTreeTraverser traverser(rules); + traverser.Traverse(*queryTree, *referenceTree); + + estimations /= referenceSet.n_cols; + + delete queryTree; + + //Brute force + /*arma::vec result = arma::vec(query.n_cols); + result = arma::zeros(query.n_cols); + + for(size_t i = 0; i < query.n_cols; ++i) + { + arma::vec density = arma::zeros(referenceSet.n_cols); + + for(size_t j = 0; j < this->referenceSet.n_cols; ++j) + { + density(j) = this->kernel.Evaluate(query.col(i), + this->referenceSet.col(j)); + } + result(i) = arma::trunc_log(arma::sum(density)) - + std::log(referenceSet.n_cols); + //this->kernel.Normalizer(query.n_rows); + //result(i) = (1/referenceSet.n_cols)*(accumulated); + } + return result;*/ +} + +} // namespace kde +} // namespace mlpack diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp new file mode 100644 index 00000000000..2c700d5a6eb --- /dev/null +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -0,0 +1,69 @@ +/** + * @file kde_main.cpp + * @author Roberto Hueso (robertohueso96@gmail.com) + * + * Executable for running Kernel Density Estimation. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#include +#include +#include +#include + +#include "kde.hpp" + +using namespace mlpack; +using namespace mlpack::kde; +using namespace mlpack::util; +using namespace std; + +// Define parameters for the executable. +PROGRAM_INFO("Kernel Density Estimation", "This program performs a Kernel " + "Density Estimation for a given reference dataset."); + +// Required options. +PARAM_DOUBLE_IN_REQ("bandwidth", "Bandwidth of the kernel", "b"); +PARAM_MATRIX_IN_REQ("reference", "Input dataset to KDE on.", "i"); +PARAM_MATRIX_IN_REQ("query", "Query dataset to KDE on.", "q"); + +// Configuration options +PARAM_STRING_IN("kernel", "Kernel to use for the estimation" + "('gaussian').", "k", "gaussian"); +PARAM_STRING_IN("tree", "Tree to use for the estimation" + "('kd-tree', 'ball-tree).", "t", "kd-tree"); +PARAM_STRING_IN("metric", "Metric to use for the estimation" + "('euclidean').", "m", "euclidean"); +PARAM_INT_IN("leaf-size", "Leaf size to use for the tree", "l", 2); +PARAM_DOUBLE_IN("error", "Relative error tolerance for the result" , "e", 1e-8); +PARAM_FLAG("breadth-first", "Use breadth-first traversal instead of depth" + "first.", "w"); + +// Output options. +PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", + "o"); + +static void mlpackMain() +{ + arma::mat reference = CLI::GetParam("reference"); + arma::mat query = CLI::GetParam("query"); + double error = CLI::GetParam("error"); + double bandwidth = CLI::GetParam("bandwidth"); + int leafSize = CLI::GetParam("leaf-size"); + + arma::vec estimations = arma::vec(reference.n_cols, arma::fill::zeros); + kde::KDE + model = kde::KDE<>(reference, error, bandwidth, leafSize); + + model.Evaluate(query, estimations); + //Just for testing purposes. + std::cout.precision(40); + estimations.raw_print(std::cout); +} diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp new file mode 100644 index 00000000000..cb12cb94638 --- /dev/null +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -0,0 +1,102 @@ +/** + * @file kde_rules.hpp + * @author Roberto Hueso (robertohueso96@gmail.com) + * + * Rules Kernel Density estimation, so that it can be done with arbitrary tree + * types. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#ifndef MLPACK_METHODS_KDE_RULES_HPP +#define MLPACK_METHODS_KDE_RULES_HPP + +#include + +namespace mlpack { +namespace kde { + +template +class KDERules +{ + public: + + KDERules(const arma::mat& referenceSet, + const arma::mat& querySet, + arma::vec& densities, + const double error, + MetricType& metric, + const KernelType& kernel); + + double BaseCase(const size_t queryIndex, const size_t referenceIndex); + + //SingleTree + double Score(const size_t queryIndex, TreeType& referenceNode); + + //SingleTree + double Rescore(const size_t queryIndex, + TreeType& referenceNode, + const double oldScore) const; + + //DoubleTree + double Score(TreeType& queryNode, TreeType& referenceNode); + + //DoubleTree + double Rescore(TreeType& queryNode, + TreeType& referenceNode, + const double oldScore) const; + + typedef typename tree::TraversalInfo TraversalInfoType; + + const TraversalInfoType& TraversalInfo() const { return traversalInfo; } + + TraversalInfoType& TraversalInfo() { return traversalInfo; } + + //! Get the number of base cases. + size_t BaseCases() const { return baseCases; } + + //! Get the number of scores. + size_t Scores() const { return scores; } + + private: + //! The reference set. + const arma::mat& referenceSet; + + //! The query set. + const arma::mat& querySet; + + //! Density values + arma::vec& densities; + + const double error; + + //! The instantiated metric. + MetricType& metric; + + const KernelType& kernel; + + //! The last query index. + size_t lastQueryIndex; + + //! The last reference index. + size_t lastReferenceIndex; + + TraversalInfoType traversalInfo; + + //! The number of base cases. + size_t baseCases; + + //! The number of scores. + size_t scores; +}; + +} // namespace kde +} // namespace mlpack + +// Include implementation. +#include "kde_rules_impl.hpp" + +#endif diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp new file mode 100644 index 00000000000..5190039e3f8 --- /dev/null +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -0,0 +1,129 @@ +/** + * @file kde_rules_impl.hpp + * @author Roberto Hueso (robertohueso96@gmail.com) + * + * Implementation of rules for Kernel Density Estimation with generic trees. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ + +#ifndef MLPACK_METHODS_KDE_RULES_IMPL_HPP +#define MLPACK_METHODS_KDE_RULES_IMPL_HPP + +// In case it hasn't been included yet. +#include "kde_rules.hpp" + +namespace mlpack { +namespace kde { + +template +KDERules::KDERules( + const arma::mat& referenceSet, + const arma::mat& querySet, + arma::vec& densities, + const double error, + MetricType& metric, + const KernelType& kernel) : + referenceSet(referenceSet), + querySet(querySet), + densities(densities), + error(error), + metric(metric), + kernel(kernel), + lastQueryIndex(querySet.n_cols), + lastReferenceIndex(referenceSet.n_cols), + baseCases(0), + scores(0) +{ + // Nothing to do. +} + +//! The base case. +template +inline force_inline +double KDERules::BaseCase( + const size_t queryIndex, + const size_t referenceIndex) +{ + double distance = metric.Evaluate(querySet.col(queryIndex), + referenceSet.col(referenceIndex)); + densities(queryIndex) += kernel.Evaluate(distance); + + ++baseCases; + lastQueryIndex = queryIndex; + lastReferenceIndex = referenceIndex; + return distance; +} + +//! Single-tree scoring function. +template +double KDERules:: +Score(const size_t /* queryIndex */, TreeType& /* referenceNode */) +{ + ++scores; + traversalInfo.LastScore() = 0.0; + return 0.0; +} + +template +double KDERules::Rescore( + const size_t /* queryIndex */, + TreeType& /* referenceNode */, + const double oldScore) const +{ + // If it's pruned it continues to be pruned. + return oldScore; +} + +//! Double-tree scoring function. +template +double KDERules:: +Score(TreeType& queryNode, TreeType& referenceNode) +{ + double score, bound; + bound = kernel.Evaluate(queryNode.MinDistance(referenceNode)) - + kernel.Evaluate(queryNode.MaxDistance(referenceNode)); + + if (bound <= (error / referenceSet.n_cols)) + { + //std::cout << referenceNode.Point(0) << "\n"; + arma::vec center = arma::vec(); + referenceNode.Center(center); + for (size_t i = 0; i < queryNode.NumDescendants(); ++i) + { + densities(queryNode.Point(i)) += + referenceNode.NumDescendants() * + kernel.Evaluate(metric.Evaluate(querySet.col(queryNode.Point(i)), + center)); + } + score = DBL_MAX; + } + else + { + score = queryNode.MinDistance(referenceNode); + } + + ++scores; + traversalInfo.LastQueryNode() = &queryNode; + traversalInfo.LastReferenceNode() = &referenceNode; + traversalInfo.LastScore() = score; + return score; +} + +//! Double-tree +template +double KDERules:: +Rescore(TreeType& /*queryNode*/, + TreeType& /*referenceNode*/, + const double oldScore) const +{ + return oldScore; +} + +} // namespace kde +} // namespace mlpack + +#endif From 245e3c39878d192e2a6160163c7d3e8f0ff6dd54 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 6 Apr 2018 16:58:30 +0200 Subject: [PATCH 002/150] Style fix --- src/mlpack/methods/kde/kde.hpp | 8 +++----- src/mlpack/methods/kde/kde_impl.hpp | 11 ++++------ src/mlpack/methods/kde/kde_main.cpp | 4 ++-- src/mlpack/methods/kde/kde_rules.hpp | 25 +++++++++++------------ src/mlpack/methods/kde/kde_rules_impl.hpp | 14 ++++++------- 5 files changed, 27 insertions(+), 35 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 8453c9df0cd..111531fa8c5 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -29,20 +29,18 @@ template Tree; - + KDE(const MatType& referenceSet, const double error = 1e-8, const double bandwidth = 1.0, const size_t leafSize = 2); ~KDE(); - + void Evaluate(const MatType& query, arma::vec& estimations); - - private: + private: const MatType& referenceSet; KernelType* kernel; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index ee59b9c121a..1756fcaefd6 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -59,8 +59,7 @@ void KDE:: Evaluate(const MatType& query, arma::vec& estimations) { Tree* queryTree = new Tree(query, leafSize); - MetricType metric = MetricType(); - + MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceSet, query, @@ -75,15 +74,13 @@ Evaluate(const MatType& query, arma::vec& estimations) traverser.Traverse(i, *referenceTree); */ - //DualTreeTraverser + // DualTreeTraverser typename Tree::template DualTreeTraverser traverser(rules); traverser.Traverse(*queryTree, *referenceTree); - estimations /= referenceSet.n_cols; - delete queryTree; - - //Brute force + + // Brute force /*arma::vec result = arma::vec(query.n_cols); result = arma::zeros(query.n_cols); diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 2c700d5a6eb..c63183f991e 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -54,7 +54,7 @@ static void mlpackMain() double error = CLI::GetParam("error"); double bandwidth = CLI::GetParam("bandwidth"); int leafSize = CLI::GetParam("leaf-size"); - + arma::vec estimations = arma::vec(reference.n_cols, arma::fill::zeros); kde::KDE(reference, error, bandwidth, leafSize); model.Evaluate(query, estimations); - //Just for testing purposes. + // Just for testing purposes. std::cout.precision(40); estimations.raw_print(std::cout); } diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index cb12cb94638..fcb0b96cdeb 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -19,32 +19,31 @@ namespace mlpack { namespace kde { -template +template class KDERules { public: - KDERules(const arma::mat& referenceSet, const arma::mat& querySet, arma::vec& densities, const double error, MetricType& metric, const KernelType& kernel); - + double BaseCase(const size_t queryIndex, const size_t referenceIndex); - //SingleTree + // SingleTree double Score(const size_t queryIndex, TreeType& referenceNode); - //SingleTree + // SingleTree double Rescore(const size_t queryIndex, TreeType& referenceNode, const double oldScore) const; - - //DoubleTree + + // DoubleTree double Score(TreeType& queryNode, TreeType& referenceNode); - //DoubleTree + // DoubleTree double Rescore(TreeType& queryNode, TreeType& referenceNode, const double oldScore) const; @@ -57,7 +56,7 @@ class KDERules //! Get the number of base cases. size_t BaseCases() const { return baseCases; } - + //! Get the number of scores. size_t Scores() const { return scores; } @@ -70,9 +69,9 @@ class KDERules //! Density values arma::vec& densities; - + const double error; - + //! The instantiated metric. MetricType& metric; @@ -83,12 +82,12 @@ class KDERules //! The last reference index. size_t lastReferenceIndex; - + TraversalInfoType traversalInfo; //! The number of base cases. size_t baseCases; - + //! The number of scores. size_t scores; }; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 5190039e3f8..0ef774b00ca 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -42,7 +42,7 @@ KDERules::KDERules( } //! The base case. -template +template inline force_inline double KDERules::BaseCase( const size_t queryIndex, @@ -51,7 +51,6 @@ double KDERules::BaseCase( double distance = metric.Evaluate(querySet.col(queryIndex), referenceSet.col(referenceIndex)); densities(queryIndex) += kernel.Evaluate(distance); - ++baseCases; lastQueryIndex = queryIndex; lastReferenceIndex = referenceIndex; @@ -59,7 +58,7 @@ double KDERules::BaseCase( } //! Single-tree scoring function. -template +template double KDERules:: Score(const size_t /* queryIndex */, TreeType& /* referenceNode */) { @@ -79,7 +78,7 @@ double KDERules::Rescore( } //! Double-tree scoring function. -template +template double KDERules:: Score(TreeType& queryNode, TreeType& referenceNode) { @@ -89,14 +88,13 @@ Score(TreeType& queryNode, TreeType& referenceNode) if (bound <= (error / referenceSet.n_cols)) { - //std::cout << referenceNode.Point(0) << "\n"; arma::vec center = arma::vec(); referenceNode.Center(center); for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { - densities(queryNode.Point(i)) += + densities(queryNode.Descendant(i)) += referenceNode.NumDescendants() * - kernel.Evaluate(metric.Evaluate(querySet.col(queryNode.Point(i)), + kernel.Evaluate(metric.Evaluate(querySet.col(queryNode.Descendant(i)), center)); } score = DBL_MAX; @@ -114,7 +112,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) } //! Double-tree -template +template double KDERules:: Rescore(TreeType& /*queryNode*/, TreeType& /*referenceNode*/, From f973e8d825f94ef1a1f3c6ad7022f633f3dc427a Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 6 Apr 2018 18:00:09 +0200 Subject: [PATCH 003/150] Add KDE output to file --- src/mlpack/methods/kde/kde_impl.hpp | 4 ++-- src/mlpack/methods/kde/kde_main.cpp | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 1756fcaefd6..b6556cb6c18 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -59,7 +59,7 @@ void KDE:: Evaluate(const MatType& query, arma::vec& estimations) { Tree* queryTree = new Tree(query, leafSize); - MetricType metric = MetricType(); + MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceSet, query, @@ -79,7 +79,7 @@ Evaluate(const MatType& query, arma::vec& estimations) traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceSet.n_cols; delete queryTree; - + // Brute force /*arma::vec result = arma::vec(query.n_cols); result = arma::zeros(query.n_cols); diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index c63183f991e..86a16799930 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -63,7 +63,14 @@ static void mlpackMain() model = kde::KDE<>(reference, error, bandwidth, leafSize); model.Evaluate(query, estimations); - // Just for testing purposes. - std::cout.precision(40); - estimations.raw_print(std::cout); + // Output estimations to file if defined. + if (CLI::HasParam("output")) + { + CLI::GetParam("output") = std::move(estimations); + } + else + { + std::cout.precision(40); + estimations.raw_print(std::cout); + } } From 28d0d7681424cbe0e8081a5255f02a9f3d9eb16a Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 7 Apr 2018 03:49:07 +0200 Subject: [PATCH 004/150] Add KDE simple test Also fix small compilation error on KDE Python binding --- src/mlpack/methods/kde/kde_main.cpp | 8 ++-- src/mlpack/tests/CMakeLists.txt | 1 + src/mlpack/tests/kde_test.cpp | 59 +++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 src/mlpack/tests/kde_test.cpp diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 86a16799930..cdff99aa198 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -38,9 +38,9 @@ PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree).", "t", "kd-tree"); PARAM_STRING_IN("metric", "Metric to use for the estimation" "('euclidean').", "m", "euclidean"); -PARAM_INT_IN("leaf-size", "Leaf size to use for the tree", "l", 2); +PARAM_INT_IN("leaf_size", "Leaf size to use for the tree", "l", 2); PARAM_DOUBLE_IN("error", "Relative error tolerance for the result" , "e", 1e-8); -PARAM_FLAG("breadth-first", "Use breadth-first traversal instead of depth" +PARAM_FLAG("breadth_first", "Use breadth-first traversal instead of depth" "first.", "w"); // Output options. @@ -53,9 +53,9 @@ static void mlpackMain() arma::mat query = CLI::GetParam("query"); double error = CLI::GetParam("error"); double bandwidth = CLI::GetParam("bandwidth"); - int leafSize = CLI::GetParam("leaf-size"); + int leafSize = CLI::GetParam("leaf_size"); - arma::vec estimations = arma::vec(reference.n_cols, arma::fill::zeros); + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); kde::KDE + +#include +#include + +#include +#include "test_tools.hpp" + +using namespace mlpack; +using namespace mlpack::kde; +using namespace mlpack::metric; +using namespace mlpack::tree; +using namespace mlpack::kernel; + +BOOST_AUTO_TEST_SUITE(KDETest); + +/** + * Test if simple case is correct. + */ +BOOST_AUTO_TEST_CASE(KDESimpleTest) +{ + // Transposed reference and query sets because it's easier to read. + arma::mat reference = { {-1.0, -1.0}, + {-2.0, -1.0}, + {-3.0, -2.0}, + { 1.0, 1.0}, + { 2.0, 1.0}, + { 3.0, 2.0} }; + arma::mat query = { { 0.0, 0.5}, + { 0.4, -3.0}, + { 0.0, 0.0}, + {-2.1, 1.0} }; + arma::inplace_trans(reference); + arma::inplace_trans(query); + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec estimations_result = {0.07008107430791211955867225924521335400641, + 0.0001001563617562331180753723569587521069479, + 0.07658867126520703394465527935608406551182, + 0.01028120384800740999553525512055784929544}; + KDE + kde = KDE<>(reference, 1e-8, 0.8, 2); + kde.Evaluate(query, estimations); + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_EQUAL(estimations[i], estimations_result[i]); +} + +BOOST_AUTO_TEST_SUITE_END(); From 5dcbcb8b4277886ab01cfb0a5409c505b79e55c7 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 14 Apr 2018 01:30:37 +0200 Subject: [PATCH 005/150] Fix KDE dual-tree algorithm There was a problem with trees using a wrong dataset --- src/mlpack/methods/kde/kde.hpp | 2 +- src/mlpack/methods/kde/kde_impl.hpp | 15 ++++++++++----- src/mlpack/methods/kde/kde_rules.hpp | 6 +++++- src/mlpack/methods/kde/kde_rules_impl.hpp | 19 +++++++++++-------- src/mlpack/tests/kde_test.cpp | 4 ++-- 5 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 111531fa8c5..58769f9b1b5 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -51,7 +51,7 @@ class KDE double bandwidth; - int leafSize; + const int leafSize; }; } // namespace kde diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index b6556cb6c18..98ae21c1787 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -28,13 +28,13 @@ KDE(const MatType& referenceSet, const double error, const double bandwidth, const size_t leafSize) : - referenceSet(referenceSet) + referenceSet(referenceSet), + leafSize(leafSize) { this->referenceTree = new Tree(referenceSet, leafSize); this->kernel = new KernelType(bandwidth); this->error = error; this->bandwidth = bandwidth; - this->leafSize = leafSize; } template:: Evaluate(const MatType& query, arma::vec& estimations) { - Tree* queryTree = new Tree(query, leafSize); + std::vector* oldFromNewQueries; + Tree* queryTree; + oldFromNewQueries = new std::vector(query.n_cols); + queryTree = new Tree(query, *oldFromNewQueries, leafSize); MetricType metric = MetricType(); typedef KDERules RuleType; - RuleType rules = RuleType(this->referenceSet, - query, + RuleType rules = RuleType(this->referenceTree->Dataset(), + queryTree->Dataset(), estimations, error, + *oldFromNewQueries, metric, *kernel); // SingleTreeTraverser @@ -78,6 +82,7 @@ Evaluate(const MatType& query, arma::vec& estimations) typename Tree::template DualTreeTraverser traverser(rules); traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceSet.n_cols; + delete oldFromNewQueries; delete queryTree; // Brute force diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index fcb0b96cdeb..2ac85ca2032 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -27,6 +27,7 @@ class KDERules const arma::mat& querySet, arma::vec& densities, const double error, + const std::vector& oldFromNewQueries, MetricType& metric, const KernelType& kernel); @@ -67,11 +68,14 @@ class KDERules //! The query set. const arma::mat& querySet; - //! Density values + //! Density values. arma::vec& densities; const double error; + //! New query dataset order. + const std::vector& oldFromNewQueries; + //! The instantiated metric. MetricType& metric; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 0ef774b00ca..c7771b9a4e8 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -25,12 +25,14 @@ KDERules::KDERules( const arma::mat& querySet, arma::vec& densities, const double error, + const std::vector& oldFromNewQueries, MetricType& metric, const KernelType& kernel) : referenceSet(referenceSet), querySet(querySet), densities(densities), error(error), + oldFromNewQueries(oldFromNewQueries), metric(metric), kernel(kernel), lastQueryIndex(querySet.n_cols), @@ -50,7 +52,7 @@ double KDERules::BaseCase( { double distance = metric.Evaluate(querySet.col(queryIndex), referenceSet.col(referenceIndex)); - densities(queryIndex) += kernel.Evaluate(distance); + densities(oldFromNewQueries.at(queryIndex)) += kernel.Evaluate(distance); ++baseCases; lastQueryIndex = queryIndex; lastReferenceIndex = referenceIndex; @@ -86,16 +88,17 @@ Score(TreeType& queryNode, TreeType& referenceNode) bound = kernel.Evaluate(queryNode.MinDistance(referenceNode)) - kernel.Evaluate(queryNode.MaxDistance(referenceNode)); - if (bound <= (error / referenceSet.n_cols)) + if (bound <= error / referenceSet.n_cols) { - arma::vec center = arma::vec(); - referenceNode.Center(center); + arma::vec queryCenter, referenceCenter; + referenceNode.Center(referenceCenter); + queryNode.Center(queryCenter); + const double kernelValue = kernel.Evaluate(metric.Evaluate(referenceCenter, + queryCenter)); for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { - densities(queryNode.Descendant(i)) += - referenceNode.NumDescendants() * - kernel.Evaluate(metric.Evaluate(querySet.col(queryNode.Descendant(i)), - center)); + densities(oldFromNewQueries.at(queryNode.Descendant(i))) += + referenceNode.NumDescendants() * kernelValue; } score = DBL_MAX; } diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 4bdf38b2e6b..9ce0fd07f76 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -42,8 +42,8 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::inplace_trans(reference); arma::inplace_trans(query); arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); - arma::vec estimations_result = {0.07008107430791211955867225924521335400641, - 0.0001001563617562331180753723569587521069479, + arma::vec estimations_result = {0.08323668699564207296148765635734889656305, + 0.00167470061366603324010116082831700623501, 0.07658867126520703394465527935608406551182, 0.01028120384800740999553525512055784929544}; KDE Date: Wed, 25 Apr 2018 20:04:03 +0200 Subject: [PATCH 006/150] Avoid matrix copy in KDE main --- src/mlpack/methods/kde/kde_main.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index cdff99aa198..edca04a0155 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -49,13 +49,13 @@ PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", static void mlpackMain() { - arma::mat reference = CLI::GetParam("reference"); - arma::mat query = CLI::GetParam("query"); + arma::mat reference = std::move(CLI::GetParam("reference")); + arma::mat query = std::move(CLI::GetParam("query")); double error = CLI::GetParam("error"); double bandwidth = CLI::GetParam("bandwidth"); int leafSize = CLI::GetParam("leaf_size"); - arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec estimations = std::move(arma::vec(query.n_cols, arma::fill::zeros)); kde::KDE Date: Thu, 26 Apr 2018 18:11:46 +0200 Subject: [PATCH 007/150] Delete unused variable --- src/mlpack/methods/kde/kde.hpp | 2 -- src/mlpack/methods/kde/kde_impl.hpp | 1 - 2 files changed, 3 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 58769f9b1b5..602d6361ada 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -49,8 +49,6 @@ class KDE double error; - double bandwidth; - const int leafSize; }; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 98ae21c1787..ebd0d04cc66 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -34,7 +34,6 @@ KDE(const MatType& referenceSet, this->referenceTree = new Tree(referenceSet, leafSize); this->kernel = new KernelType(bandwidth); this->error = error; - this->bandwidth = bandwidth; } template Date: Thu, 26 Apr 2018 18:44:42 +0200 Subject: [PATCH 008/150] Delete leafSize parameter for KDE trees A new constructor with a tree as a parameter will handle different leaf sizes --- src/mlpack/methods/kde/kde.hpp | 5 +---- src/mlpack/methods/kde/kde_impl.hpp | 10 ++++------ src/mlpack/methods/kde/kde_main.cpp | 2 +- src/mlpack/tests/kde_test.cpp | 4 ++-- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 602d6361ada..7d6f72f8261 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -33,8 +33,7 @@ class KDE KDE(const MatType& referenceSet, const double error = 1e-8, - const double bandwidth = 1.0, - const size_t leafSize = 2); + const double bandwidth = 1.0); ~KDE(); @@ -48,8 +47,6 @@ class KDE Tree* referenceTree; double error; - - const int leafSize; }; } // namespace kde diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index ebd0d04cc66..dc06cbb50f7 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -26,12 +26,10 @@ template:: KDE(const MatType& referenceSet, const double error, - const double bandwidth, - const size_t leafSize) : - referenceSet(referenceSet), - leafSize(leafSize) + const double bandwidth) : + referenceSet(referenceSet) { - this->referenceTree = new Tree(referenceSet, leafSize); + this->referenceTree = new Tree(referenceSet); this->kernel = new KernelType(bandwidth); this->error = error; } @@ -60,7 +58,7 @@ Evaluate(const MatType& query, arma::vec& estimations) std::vector* oldFromNewQueries; Tree* queryTree; oldFromNewQueries = new std::vector(query.n_cols); - queryTree = new Tree(query, *oldFromNewQueries, leafSize); + queryTree = new Tree(query, *oldFromNewQueries); MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceTree->Dataset(), diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index edca04a0155..47dc585f17e 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -60,7 +60,7 @@ static void mlpackMain() arma::mat, kernel::GaussianKernel, tree::KDTree> - model = kde::KDE<>(reference, error, bandwidth, leafSize); + model = kde::KDE<>(reference, error, bandwidth); model.Evaluate(query, estimations); // Output estimations to file if defined. diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 9ce0fd07f76..0e135a9943f 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -50,10 +50,10 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::mat, GaussianKernel, KDTree> - kde = KDE<>(reference, 1e-8, 0.8, 2); + kde = KDE<>(reference, 1e-8, 0.8); kde.Evaluate(query, estimations); for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_EQUAL(estimations[i], estimations_result[i]); + BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 1e-8); } BOOST_AUTO_TEST_SUITE_END(); From ba9f83de55246e98101fa5b69ca569282ef65e28 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 3 May 2018 01:04:40 +0200 Subject: [PATCH 009/150] Improve KDE API --- src/mlpack/methods/kde/kde.hpp | 27 ++++-- src/mlpack/methods/kde/kde_impl.hpp | 112 +++++++++++++++++----- src/mlpack/methods/kde/kde_main.cpp | 7 +- src/mlpack/methods/kde/kde_rules.hpp | 7 +- src/mlpack/methods/kde/kde_rules_impl.hpp | 21 ++-- src/mlpack/tests/kde_test.cpp | 3 +- 6 files changed, 135 insertions(+), 42 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 7d6f72f8261..b2e4c441006 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -31,22 +31,35 @@ class KDE public: typedef TreeType Tree; - KDE(const MatType& referenceSet, - const double error = 1e-8, - const double bandwidth = 1.0); + KDE(const double bandwidth = 1.0, + const double relError = 1e-5, + const double absError = 0, + const bool breadthFirst = false); ~KDE(); - void Evaluate(const MatType& query, arma::vec& estimations); + void Train(const MatType& referenceSet); - private: - const MatType& referenceSet; + void Train(const Tree& referenceTree); + + void Evaluate(const MatType& querySet, arma::vec& estimations); + void Evaluate(const Tree& queryTree, arma::vec& estimations); + + private: KernelType* kernel; Tree* referenceTree; - double error; + double relError; + + double absError; + + bool breadthFirst; + + bool ownsReferenceTree; + + bool trained; }; } // namespace kde diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index dc06cbb50f7..8449e64d39a 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -24,14 +24,17 @@ template class TreeType> KDE:: -KDE(const MatType& referenceSet, - const double error, - const double bandwidth) : - referenceSet(referenceSet) +KDE(const double bandwidth, + const double relError, + const double absError, + const bool breadthFirst) { - this->referenceTree = new Tree(referenceSet); this->kernel = new KernelType(bandwidth); - this->error = error; + this->relError = relError; + this->absError = absError; + this->breadthFirst = breadthFirst; + this->ownsReferenceTree = false; + this->trained = false; } template class TreeType> KDE::~KDE() { - delete this->referenceTree; + if (ownsReferenceTree) + delete this->referenceTree; delete this->kernel; } @@ -53,39 +57,72 @@ template class TreeType> void KDE:: -Evaluate(const MatType& query, arma::vec& estimations) +Train(const MatType& referenceSet) +{ + this->ownsReferenceTree = true; + this->referenceTree = new Tree(referenceSet); + this->trained = true; +} + +template class TreeType> +void KDE:: +Train(const Tree& referenceTree) +{ + if (this->ownsReferenceTree == true) + delete this->referenceTree; + this->ownsReferenceTree = false; + this->referenceTree = referenceTree; + this->trained = true; +} + +template class TreeType> +void KDE:: +Evaluate(const MatType& querySet, arma::vec& estimations) { + // TODO Manage trees that don't rearrange datasets std::vector* oldFromNewQueries; Tree* queryTree; - oldFromNewQueries = new std::vector(query.n_cols); - queryTree = new Tree(query, *oldFromNewQueries); + oldFromNewQueries = new std::vector(querySet.n_cols); + queryTree = new Tree(querySet, *oldFromNewQueries); MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceTree->Dataset(), queryTree->Dataset(), estimations, - error, + relError, + absError, *oldFromNewQueries, metric, *kernel); - // SingleTreeTraverser - /* - typename Tree::template SingleTreeTraverser traverser(rules); - for(size_t i = 0; i < query.n_cols; ++i) - traverser.Traverse(i, *referenceTree); - */ - // DualTreeTraverser typename Tree::template DualTreeTraverser traverser(rules); traverser.Traverse(*queryTree, *referenceTree); - estimations /= referenceSet.n_cols; + estimations /= referenceTree->Dataset().n_cols; delete oldFromNewQueries; delete queryTree; + // Ideas for the future... + // SingleTreeTraverser + /* + typename Tree::template SingleTreeTraverser traverser(rules); + for(size_t i = 0; i < query.n_cols; ++i) + traverser.Traverse(i, *referenceTree); + */ // Brute force - /*arma::vec result = arma::vec(query.n_cols); + /* + arma::vec result = arma::vec(query.n_cols); result = arma::zeros(query.n_cols); - + for(size_t i = 0; i < query.n_cols; ++i) { arma::vec density = arma::zeros(referenceSet.n_cols); @@ -100,8 +137,39 @@ Evaluate(const MatType& query, arma::vec& estimations) //this->kernel.Normalizer(query.n_rows); //result(i) = (1/referenceSet.n_cols)*(accumulated); } - return result;*/ + return result; + */ } +// TODO Implement +/* +template class TreeType> +void KDE:: +Evaluate(const Tree& queryTree, arma::vec& estimations) +{ + std::vector* oldFromNewQueries; + //Tree* queryTree; + oldFromNewQueries = new std::vector(querySet.n_cols); + queryTree = new Tree(querySet, *oldFromNewQueries); + MetricType metric = MetricType(); + typedef KDERules RuleType; + RuleType rules = RuleType(this->referenceTree->Dataset(), + queryTree->Dataset(), + estimations, + relError, + absError, + *oldFromNewQueries, + metric, + *kernel); + // DualTreeTraverser + typename Tree::template DualTreeTraverser traverser(rules); + traverser.Traverse(*queryTree, *referenceTree); + estimations /= referenceTree->Dataset().n_cols;} + */ } // namespace kde } // namespace mlpack diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 47dc585f17e..75b02c25489 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -38,7 +38,6 @@ PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree).", "t", "kd-tree"); PARAM_STRING_IN("metric", "Metric to use for the estimation" "('euclidean').", "m", "euclidean"); -PARAM_INT_IN("leaf_size", "Leaf size to use for the tree", "l", 2); PARAM_DOUBLE_IN("error", "Relative error tolerance for the result" , "e", 1e-8); PARAM_FLAG("breadth_first", "Use breadth-first traversal instead of depth" "first.", "w"); @@ -53,15 +52,15 @@ static void mlpackMain() arma::mat query = std::move(CLI::GetParam("query")); double error = CLI::GetParam("error"); double bandwidth = CLI::GetParam("bandwidth"); - int leafSize = CLI::GetParam("leaf_size"); + bool breadthFirst = CLI::GetParam("breadth_first"); arma::vec estimations = std::move(arma::vec(query.n_cols, arma::fill::zeros)); kde::KDE - model = kde::KDE<>(reference, error, bandwidth); - + model(bandwidth, 0.0, error, breadthFirst); + model.Train(reference); model.Evaluate(query, estimations); // Output estimations to file if defined. if (CLI::HasParam("output")) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index 2ac85ca2032..1232f3451d8 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -26,7 +26,8 @@ class KDERules KDERules(const arma::mat& referenceSet, const arma::mat& querySet, arma::vec& densities, - const double error, + const double relError, + const double absError, const std::vector& oldFromNewQueries, MetricType& metric, const KernelType& kernel); @@ -71,7 +72,9 @@ class KDERules //! Density values. arma::vec& densities; - const double error; + const double absError; + + const double relError; //! New query dataset order. const std::vector& oldFromNewQueries; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index c7771b9a4e8..a553d24594f 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -24,14 +24,16 @@ KDERules::KDERules( const arma::mat& referenceSet, const arma::mat& querySet, arma::vec& densities, - const double error, + const double relError, + const double absError, const std::vector& oldFromNewQueries, MetricType& metric, const KernelType& kernel) : referenceSet(referenceSet), querySet(querySet), densities(densities), - error(error), + absError(absError), + relError(relError), oldFromNewQueries(oldFromNewQueries), metric(metric), kernel(kernel), @@ -52,7 +54,10 @@ double KDERules::BaseCase( { double distance = metric.Evaluate(querySet.col(queryIndex), referenceSet.col(referenceIndex)); - densities(oldFromNewQueries.at(queryIndex)) += kernel.Evaluate(distance); + if (tree::TreeTraits::RearrangesDataset) + densities(oldFromNewQueries.at(queryIndex)) += kernel.Evaluate(distance); + else + densities(queryIndex) += kernel.Evaluate(distance); ++baseCases; lastQueryIndex = queryIndex; lastReferenceIndex = referenceIndex; @@ -88,7 +93,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) bound = kernel.Evaluate(queryNode.MinDistance(referenceNode)) - kernel.Evaluate(queryNode.MaxDistance(referenceNode)); - if (bound <= error / referenceSet.n_cols) + if (bound <= absError / referenceSet.n_cols) { arma::vec queryCenter, referenceCenter; referenceNode.Center(referenceCenter); @@ -97,8 +102,12 @@ Score(TreeType& queryNode, TreeType& referenceNode) queryCenter)); for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { - densities(oldFromNewQueries.at(queryNode.Descendant(i))) += - referenceNode.NumDescendants() * kernelValue; + if (tree::TreeTraits::RearrangesDataset) + densities(oldFromNewQueries.at(queryNode.Descendant(i))) += + referenceNode.NumDescendants() * kernelValue; + else + densities(queryNode.Descendant(i)) += + referenceNode.NumDescendants() * kernelValue; } score = DBL_MAX; } diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 0e135a9943f..f670691393b 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -50,7 +50,8 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::mat, GaussianKernel, KDTree> - kde = KDE<>(reference, 1e-8, 0.8); + kde(0.8, 0.0, 1e-8, false); + kde.Train(reference); kde.Evaluate(query, estimations); for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 1e-8); From ead87a5d349d03d07643619cd252bfc5823bac94 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 3 May 2018 21:05:12 +0200 Subject: [PATCH 010/150] Handle FirstPointIsCentroid and RearrangesDataset --- src/mlpack/methods/kde/kde_impl.hpp | 22 ++++++++++++++++++---- src/mlpack/methods/kde/kde_rules_impl.hpp | 12 ++++++++++-- src/mlpack/tests/kde_test.cpp | 1 + 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 8449e64d39a..ea87eb6dfed 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -89,11 +89,21 @@ template:: Evaluate(const MatType& querySet, arma::vec& estimations) { - // TODO Manage trees that don't rearrange datasets std::vector* oldFromNewQueries; Tree* queryTree; - oldFromNewQueries = new std::vector(querySet.n_cols); - queryTree = new Tree(querySet, *oldFromNewQueries); + // Check whether Tree has a constructor that allows to handle rearrangements + // of the dataset or not on compile time. + if constexpr(std::is_constructible&>::value) + { + oldFromNewQueries = new std::vector(querySet.n_cols); + queryTree = new Tree(querySet, *oldFromNewQueries); + } + else + { + queryTree = new Tree(querySet); + } MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceTree->Dataset(), @@ -108,7 +118,11 @@ Evaluate(const MatType& querySet, arma::vec& estimations) typename Tree::template DualTreeTraverser traverser(rules); traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; - delete oldFromNewQueries; + //TODO Handle better oldFromNewQueries when not used + if constexpr(std::is_constructible&>::value) + delete oldFromNewQueries; delete queryTree; // Ideas for the future... diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index a553d24594f..fcfc989126f 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -96,8 +96,16 @@ Score(TreeType& queryNode, TreeType& referenceNode) if (bound <= absError / referenceSet.n_cols) { arma::vec queryCenter, referenceCenter; - referenceNode.Center(referenceCenter); - queryNode.Center(queryCenter); + if (tree::TreeTraits::FirstPointIsCentroid) + { + queryCenter = queryNode.Dataset().col(queryNode.Point(0)); + referenceCenter = referenceNode.Dataset().col(referenceNode.Point(0)); + } + else + { + referenceNode.Center(referenceCenter); + queryNode.Center(queryCenter); + } const double kernelValue = kernel.Evaluate(metric.Evaluate(referenceCenter, queryCenter)); for (size_t i = 0; i < queryNode.NumDescendants(); ++i) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index f670691393b..229dcacfa2d 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include "test_tools.hpp" From df4e030b5cc929056ab8e5d78e12a6402dcdee55 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 May 2018 00:48:34 +0200 Subject: [PATCH 011/150] Fix tree building --- src/mlpack/methods/kde/kde_impl.hpp | 42 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index ea87eb6dfed..d49d124caa3 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -17,6 +17,28 @@ namespace mlpack { namespace kde { +//! Construct tree that rearranges the dataset +template +TreeType* BuildTree( + MatType&& dataset, + std::vector& oldFromNew, + const typename std::enable_if< + tree::TreeTraits::RearrangesDataset>::type* = 0) +{ + return new TreeType(std::forward(dataset), oldFromNew); +} + +//! Construct tree that doesn't rearrange the dataset +template +TreeType* BuildTree( + MatType&& dataset, + const std::vector& /* oldFromNew */, + const typename std::enable_if< + !tree::TreeTraits::RearrangesDataset>::type* = 0) +{ + return new TreeType(std::forward(dataset)); +} + template* oldFromNewQueries; Tree* queryTree; - // Check whether Tree has a constructor that allows to handle rearrangements - // of the dataset or not on compile time. - if constexpr(std::is_constructible&>::value) - { + // If the tree rearranges the dataset, the new mapping is needed + if (tree::TreeTraits::RearrangesDataset) oldFromNewQueries = new std::vector(querySet.n_cols); - queryTree = new Tree(querySet, *oldFromNewQueries); - } - else - { - queryTree = new Tree(querySet); - } + queryTree = BuildTree(querySet, *oldFromNewQueries); MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceTree->Dataset(), @@ -118,10 +131,7 @@ Evaluate(const MatType& querySet, arma::vec& estimations) typename Tree::template DualTreeTraverser traverser(rules); traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; - //TODO Handle better oldFromNewQueries when not used - if constexpr(std::is_constructible&>::value) + if (tree::TreeTraits::RearrangesDataset) delete oldFromNewQueries; delete queryTree; From 9fa129bd1524c7a0be28f2dedf554221cb875ad1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 May 2018 11:17:42 +0200 Subject: [PATCH 012/150] Fix uninitialized pointer --- src/mlpack/methods/kde/kde_impl.hpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index d49d124caa3..d3ee56d26cf 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -111,12 +111,8 @@ template:: Evaluate(const MatType& querySet, arma::vec& estimations) { - std::vector* oldFromNewQueries; - Tree* queryTree; - // If the tree rearranges the dataset, the new mapping is needed - if (tree::TreeTraits::RearrangesDataset) - oldFromNewQueries = new std::vector(querySet.n_cols); - queryTree = BuildTree(querySet, *oldFromNewQueries); + std::vector oldFromNewQueries; + Tree* queryTree = BuildTree(querySet, oldFromNewQueries); MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(this->referenceTree->Dataset(), @@ -124,15 +120,13 @@ Evaluate(const MatType& querySet, arma::vec& estimations) estimations, relError, absError, - *oldFromNewQueries, + oldFromNewQueries, metric, *kernel); // DualTreeTraverser typename Tree::template DualTreeTraverser traverser(rules); traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; - if (tree::TreeTraits::RearrangesDataset) - delete oldFromNewQueries; delete queryTree; // Ideas for the future... From 2cca9f55a4acdbaf1203208957a6ef0910371e72 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 May 2018 13:04:03 +0200 Subject: [PATCH 013/150] Implement relative error tolerance --- src/mlpack/methods/kde/kde_impl.hpp | 19 ++++++++++++------- src/mlpack/methods/kde/kde_rules.hpp | 14 +++++++++----- src/mlpack/methods/kde/kde_rules_impl.hpp | 9 +++++---- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index d3ee56d26cf..3dbf4180bdd 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -49,14 +49,19 @@ KDE:: KDE(const double bandwidth, const double relError, const double absError, - const bool breadthFirst) + const bool breadthFirst) : + kernel(new KernelType(bandwidth)), + relError(relError), + absError(absError), + breadthFirst(breadthFirst), + ownsReferenceTree(false), + trained(false) { - this->kernel = new KernelType(bandwidth); - this->relError = relError; - this->absError = absError; - this->breadthFirst = breadthFirst; - this->ownsReferenceTree = false; - this->trained = false; + if (relError > 0 && absError > 0) + Log::Warn << "Absolute and relative error tolerances will be sumed up" + << std::endl; + if (relError < 0 || absError < 0) + Log::Fatal << "Error tolerance can't be less than 0" << std::endl; } template& oldFromNewQueries; - //! The instantiated metric. + //! Instantiated metric. MetricType& metric; + //! Instantiated kernel const KernelType& kernel; //! The last query index. diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index fcfc989126f..f7f885d9490 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -89,11 +89,12 @@ template double KDERules:: Score(TreeType& queryNode, TreeType& referenceNode) { - double score, bound; - bound = kernel.Evaluate(queryNode.MinDistance(referenceNode)) - - kernel.Evaluate(queryNode.MaxDistance(referenceNode)); + const double maxKernel = kernel.Evaluate(queryNode.MinDistance(referenceNode)); + const double minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); + const double bound = maxKernel - minKernel; + double score; - if (bound <= absError / referenceSet.n_cols) + if (bound <= (absError + relError * minKernel) / referenceSet.n_cols) { arma::vec queryCenter, referenceCenter; if (tree::TreeTraits::FirstPointIsCentroid) From 8398686570bcd3c26549788fe379125a6d6a3adf Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 May 2018 14:26:09 +0200 Subject: [PATCH 014/150] Implement Evaluate(Tree...) Also add TreeAsArguments test --- src/mlpack/methods/kde/kde.hpp | 6 ++-- src/mlpack/methods/kde/kde_impl.hpp | 29 ++++++++--------- src/mlpack/methods/kde/kde_rules_impl.hpp | 6 ++-- src/mlpack/tests/kde_test.cpp | 38 +++++++++++++++++++++++ 4 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index b2e4c441006..5304b18c3a2 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -40,11 +40,13 @@ class KDE void Train(const MatType& referenceSet); - void Train(const Tree& referenceTree); + void Train(Tree& referenceTree); void Evaluate(const MatType& querySet, arma::vec& estimations); - void Evaluate(const Tree& queryTree, arma::vec& estimations); + void Evaluate(Tree& queryTree, + const std::vector& oldFromNewQueries, + arma::vec& estimations); private: KernelType* kernel; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 3dbf4180bdd..c198a945d39 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -98,12 +98,12 @@ template class TreeType> void KDE:: -Train(const Tree& referenceTree) +Train(Tree& referenceTree) { if (this->ownsReferenceTree == true) delete this->referenceTree; this->ownsReferenceTree = false; - this->referenceTree = referenceTree; + this->referenceTree = &referenceTree; this->trained = true; } @@ -120,7 +120,7 @@ Evaluate(const MatType& querySet, arma::vec& estimations) Tree* queryTree = BuildTree(querySet, oldFromNewQueries); MetricType metric = MetricType(); typedef KDERules RuleType; - RuleType rules = RuleType(this->referenceTree->Dataset(), + RuleType rules = RuleType(referenceTree->Dataset(), queryTree->Dataset(), estimations, relError, @@ -164,8 +164,6 @@ Evaluate(const MatType& querySet, arma::vec& estimations) */ } -// TODO Implement -/* template class TreeType> void KDE:: -Evaluate(const Tree& queryTree, arma::vec& estimations) +Evaluate(Tree& queryTree, + const std::vector& oldFromNewQueries, + arma::vec& estimations) { - std::vector* oldFromNewQueries; - //Tree* queryTree; - oldFromNewQueries = new std::vector(querySet.n_cols); - queryTree = new Tree(querySet, *oldFromNewQueries); MetricType metric = MetricType(); typedef KDERules RuleType; - RuleType rules = RuleType(this->referenceTree->Dataset(), - queryTree->Dataset(), + RuleType rules = RuleType(referenceTree->Dataset(), + queryTree.Dataset(), estimations, relError, absError, - *oldFromNewQueries, + oldFromNewQueries, metric, *kernel); // DualTreeTraverser typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(*queryTree, *referenceTree); - estimations /= referenceTree->Dataset().n_cols;} - */ + traverser.Traverse(queryTree, *referenceTree); + estimations /= referenceTree->Dataset().n_cols; +} + } // namespace kde } // namespace mlpack diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index f7f885d9490..23f919d7555 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -89,8 +89,10 @@ template double KDERules:: Score(TreeType& queryNode, TreeType& referenceNode) { - const double maxKernel = kernel.Evaluate(queryNode.MinDistance(referenceNode)); - const double minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); + const double maxKernel = + kernel.Evaluate(queryNode.MinDistance(referenceNode)); + const double minKernel = + kernel.Evaluate(queryNode.MaxDistance(referenceNode)); const double bound = maxKernel - minKernel; double score; diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 229dcacfa2d..80d7a2993ed 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -58,4 +58,42 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 1e-8); } +/** + * Test Train(Tree...) and Evaluate(Tree...) + */ +BOOST_AUTO_TEST_CASE(KDETreeAsArguments) +{ + // Transposed reference and query sets because it's easier to read. + arma::mat reference = { {-1.0, -1.0}, + {-2.0, -1.0}, + {-3.0, -2.0}, + { 1.0, 1.0}, + { 2.0, 1.0}, + { 3.0, 2.0} }; + arma::mat query = { { 0.0, 0.5}, + { 0.4, -3.0}, + { 0.0, 0.0}, + {-2.1, 1.0} }; + arma::inplace_trans(reference); + arma::inplace_trans(query); + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec estimations_result = {0.08323668699564207296148765635734889656305, + 0.00167470061366603324010116082831700623501, + 0.07658867126520703394465527935608406551182, + 0.01028120384800740999553525512055784929544}; + typedef KDTree Tree; + std::vector oldFromNewQueries; + Tree queryTree = Tree(query, oldFromNewQueries, 2); + Tree referenceTree = Tree(reference, 2); + KDE + kde(0.8, 0.0, 1e-8, false); + kde.Train(referenceTree); + kde.Evaluate(queryTree, oldFromNewQueries, estimations); + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 1e-8); +} + BOOST_AUTO_TEST_SUITE_END(); From c1dedccf8138d66c813bb0847b420fa274c5e5e4 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 May 2018 18:47:36 +0200 Subject: [PATCH 015/150] Add methods to get and modify KDE parameters --- src/mlpack/methods/kde/kde.hpp | 30 +++++++++++++++++++++++++++ src/mlpack/methods/kde/kde_impl.hpp | 32 +++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 5304b18c3a2..628af8379a0 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -48,6 +48,36 @@ class KDE const std::vector& oldFromNewQueries, arma::vec& estimations); + const KernelType& Kernel() const { return kernel; } + + KernelType& Kernel() { return kernel; } + + const Tree& ReferenceTree() const { return referenceTree; } + + //! Get relative error tolerance. + double RelativeError() const { return relError; } + + //! Modify relative error tolerance. + void RelativeError(const double newError); + + //! Get absolute error tolerance. + double AbsoluteError() const { return absError; } + + //! Modify absolute error tolerance. + void AbsoluteError(const double newError); + + //! Get whether breadth-first traversal is being used. + bool BreadthFirst() const { return breadthFirst; } + + //! Modify whether breadth-first traversal is being used. + bool& BreadthFirst() { return breadthFirst; } + + //! Check if reference tree is owned by the KDE model. + bool OwnsReferenceTree() const { return ownsReferenceTree; } + + //! Check if KDE model is trained or not. + bool IsTrained() const { return trained; } + private: KernelType* kernel; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index c198a945d39..e4bd5de4fc8 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -191,5 +191,37 @@ Evaluate(Tree& queryTree, estimations /= referenceTree->Dataset().n_cols; } +template class TreeType> +void KDE:: +RelativeError(const double newError) +{ + if (newError < 0 || newError > 1) + Log::Fatal << "Relative error tolerance must be a value between 0 and 1" + << std::endl; + else + this->relError = newError; +} + +template class TreeType> +void KDE:: +AbsoluteError(const double newError) +{ + if (newError < 0) + Log::Fatal << "Absolute error tolerance must be a value greater or equal " + << "to 0" << std::endl; + else + this->absError = newError; +} + } // namespace kde } // namespace mlpack From 1bd1eec282b6fbca6e95061be89e2ce0d6cfe123 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 May 2018 19:33:26 +0200 Subject: [PATCH 016/150] Add KDE copy constructor --- src/mlpack/methods/kde/kde.hpp | 2 ++ src/mlpack/methods/kde/kde_impl.hpp | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 628af8379a0..6f51aa641cc 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -36,6 +36,8 @@ class KDE const double absError = 0, const bool breadthFirst = false); + KDE(const KDE& other); + ~KDE(); void Train(const MatType& referenceSet); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index e4bd5de4fc8..6bdad8e254b 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -64,6 +64,30 @@ KDE(const double bandwidth, Log::Fatal << "Error tolerance can't be less than 0" << std::endl; } +template class TreeType> +KDE:: +KDE(const KDE& other) : + kernel(new KernelType(other.kernel)), + relError(other.relError), + absError(other.absError), + breadthFirst(other.breadthFirst), + ownsReferenceTree(other.ownsReferenceTree), + trained(other.trained) +{ + if (trained) + { + if (ownsReferenceTree) + referenceTree = new Tree(other.referenceTree); + else + referenceTree = other.referenceTree; + } +} + template Date: Sat, 5 May 2018 01:03:38 +0200 Subject: [PATCH 017/150] Add KDE operator= --- src/mlpack/methods/kde/kde.hpp | 2 ++ src/mlpack/methods/kde/kde_impl.hpp | 29 +++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 6f51aa641cc..13a1a8196e1 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -38,6 +38,8 @@ class KDE KDE(const KDE& other); + KDE& operator=(KDE other); + ~KDE(); void Train(const MatType& referenceSet); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 6bdad8e254b..0ca37bfab12 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -70,8 +70,7 @@ template class TreeType> -KDE:: -KDE(const KDE& other) : +KDE::KDE(const KDE& other) : kernel(new KernelType(other.kernel)), relError(other.relError), absError(other.absError), @@ -88,6 +87,32 @@ KDE(const KDE& other) : } } +template class TreeType> +KDE& +KDE::operator=(KDE other) +{ + // Clean memory + if (ownsReferenceTree) + delete referenceTree; + delete kernel; + + // Move + this->kernel = std::move(other.kernel); + this->referenceTree = std::move(other.referenceTree); + this->relError = other.relError; + this->absError = other.absError; + this->breadthFirst = other.breadthFirst; + this->ownsReferenceTree = other.ownsReferenceTree; + this->trained = other.trained; + + return *this; +} + template Date: Sat, 5 May 2018 12:33:31 +0200 Subject: [PATCH 018/150] Add KDE move constructor --- src/mlpack/methods/kde/kde.hpp | 2 ++ src/mlpack/methods/kde/kde_impl.hpp | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 13a1a8196e1..1e4582da7d4 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -38,6 +38,8 @@ class KDE KDE(const KDE& other); + KDE(KDE&& other); + KDE& operator=(KDE other); ~KDE(); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 0ca37bfab12..dda38a0489e 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -87,6 +87,27 @@ KDE::KDE(const KDE& other) : } } +template class TreeType> +KDE::KDE(KDE&& other) : + kernel(other.kernel), + referenceTree(other.referenceTree), + relError(other.relError), + absError(other.absError), + breadthFirst(other.breadthFirst), + ownsReferenceTree(other.ownsReferenceTree), + trained(other.trained) +{ + other.kernel = new KernelType(); + other.referenceTree = nullptr; + other.ownsReferenceTree = false; + other.trained = false; +} + template Date: Thu, 10 May 2018 17:59:52 +0200 Subject: [PATCH 019/150] Remove const requirement from KernelType --- src/mlpack/methods/kde/kde_rules.hpp | 4 ++-- src/mlpack/methods/kde/kde_rules_impl.hpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index 6b2f59e2edc..9c9a5707eb0 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -30,7 +30,7 @@ class KDERules const double absError, const std::vector& oldFromNewQueries, MetricType& metric, - const KernelType& kernel); + KernelType& kernel); //! Base Case double BaseCase(const size_t queryIndex, const size_t referenceIndex); @@ -86,7 +86,7 @@ class KDERules MetricType& metric; //! Instantiated kernel - const KernelType& kernel; + KernelType& kernel; //! The last query index. size_t lastQueryIndex; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 23f919d7555..bccfed7d211 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -28,7 +28,7 @@ KDERules::KDERules( const double absError, const std::vector& oldFromNewQueries, MetricType& metric, - const KernelType& kernel) : + KernelType& kernel) : referenceSet(referenceSet), querySet(querySet), densities(densities), From 3052579c3eeb476ab86411bd01a8b13c8323c5b3 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 10 May 2018 18:27:22 +0200 Subject: [PATCH 020/150] Use unsafe_col to speed up KDE score --- src/mlpack/methods/kde/kde_rules_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index bccfed7d211..4105e9302e5 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -101,8 +101,8 @@ Score(TreeType& queryNode, TreeType& referenceNode) arma::vec queryCenter, referenceCenter; if (tree::TreeTraits::FirstPointIsCentroid) { - queryCenter = queryNode.Dataset().col(queryNode.Point(0)); - referenceCenter = referenceNode.Dataset().col(referenceNode.Point(0)); + queryCenter = querySet.unsafe_col(queryNode.Point(0)); + referenceCenter = referenceSet.unsafe_col(referenceNode.Point(0)); } else { From d172dbfc4d18ccac2e1e2f2f2960db6a3b25918b Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 10 May 2018 20:34:22 +0200 Subject: [PATCH 021/150] Handle kernel and metric as KDE member objects --- src/mlpack/methods/kde/kde.hpp | 6 ++++++ src/mlpack/methods/kde/kde_impl.hpp | 31 ++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 1e4582da7d4..7e2ca90605c 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -87,6 +87,8 @@ class KDE private: KernelType* kernel; + MetricType* metric; + Tree* referenceTree; double relError; @@ -95,6 +97,10 @@ class KDE bool breadthFirst; + bool ownsKernel; + + bool ownsMetric; + bool ownsReferenceTree; bool trained; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index dda38a0489e..34c22c6ba2a 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -51,9 +51,12 @@ KDE(const double bandwidth, const double absError, const bool breadthFirst) : kernel(new KernelType(bandwidth)), + metric(new MetricType()), relError(relError), absError(absError), breadthFirst(breadthFirst), + ownsKernel(false), + ownsMetric(false), ownsReferenceTree(false), trained(false) { @@ -72,9 +75,12 @@ template class TreeType> KDE::KDE(const KDE& other) : kernel(new KernelType(other.kernel)), + metric(new MetricType(other.metric)), relError(other.relError), absError(other.absError), breadthFirst(other.breadthFirst), + ownsKernel(other.ownsKernel), + ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), trained(other.trained) { @@ -95,14 +101,18 @@ template class TreeType> KDE::KDE(KDE&& other) : kernel(other.kernel), + metric(other.metric), referenceTree(other.referenceTree), relError(other.relError), absError(other.absError), breadthFirst(other.breadthFirst), + ownsKernel(other.ownsKernel), + ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), trained(other.trained) { other.kernel = new KernelType(); + other.metric = new MetricType(); other.referenceTree = nullptr; other.ownsReferenceTree = false; other.trained = false; @@ -118,16 +128,22 @@ KDE& KDE::operator=(KDE other) { // Clean memory + if (ownsKernel) + delete kernel; + if (ownsMetric) + delete metric; if (ownsReferenceTree) delete referenceTree; - delete kernel; // Move this->kernel = std::move(other.kernel); + this->metric = std::move(other.metric); this->referenceTree = std::move(other.referenceTree); this->relError = other.relError; this->absError = other.absError; this->breadthFirst = other.breadthFirst; + this->ownsKernel = other.ownsKernel; + this->ownsMetric = other.ownsMetric; this->ownsReferenceTree = other.ownsReferenceTree; this->trained = other.trained; @@ -142,9 +158,12 @@ template class TreeType> KDE::~KDE() { + if (ownsKernel) + delete kernel; + if (ownsMetric) + delete metric; if (ownsReferenceTree) - delete this->referenceTree; - delete this->kernel; + delete referenceTree; } template oldFromNewQueries; Tree* queryTree = BuildTree(querySet, oldFromNewQueries); - MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), queryTree->Dataset(), @@ -196,7 +214,7 @@ Evaluate(const MatType& querySet, arma::vec& estimations) relError, absError, oldFromNewQueries, - metric, + *metric, *kernel); // DualTreeTraverser typename Tree::template DualTreeTraverser traverser(rules); @@ -245,7 +263,6 @@ Evaluate(Tree& queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) { - MetricType metric = MetricType(); typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), queryTree.Dataset(), @@ -253,7 +270,7 @@ Evaluate(Tree& queryTree, relError, absError, oldFromNewQueries, - metric, + *metric, *kernel); // DualTreeTraverser typename Tree::template DualTreeTraverser traverser(rules); From c3dd7fa27f39141d85f5a7196392b947338d6b40 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 10 May 2018 20:54:00 +0200 Subject: [PATCH 022/150] Fix small mistake --- src/mlpack/methods/kde/kde_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 34c22c6ba2a..dfd20c35f1a 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -55,8 +55,8 @@ KDE(const double bandwidth, relError(relError), absError(absError), breadthFirst(breadthFirst), - ownsKernel(false), - ownsMetric(false), + ownsKernel(true), + ownsMetric(true), ownsReferenceTree(false), trained(false) { From 838208306ea3fa7c8d251787db8cec34786e4677 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 11 May 2018 16:48:27 +0200 Subject: [PATCH 023/150] Add KDE custom kernel and metric constructor --- src/mlpack/methods/kde/kde.hpp | 6 ++++++ src/mlpack/methods/kde/kde_impl.hpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 7e2ca90605c..4e8f6a31697 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -36,6 +36,12 @@ class KDE const double absError = 0, const bool breadthFirst = false); + KDE(MetricType& metric = MetricType(), + KernelType& kernel = KernelType(), + const double relError = 1e-5, + const double absError = 0, + const bool breadthFirst = false); + KDE(const KDE& other); KDE(KDE&& other); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index dfd20c35f1a..f0fce2ed2b6 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -67,6 +67,35 @@ KDE(const double bandwidth, Log::Fatal << "Error tolerance can't be less than 0" << std::endl; } +template class TreeType> +KDE:: +KDE(MetricType& metric, + KernelType& kernel, + const double relError, + const double absError, + const bool breadthFirst) : + kernel(kernel), + metric(metric), + relError(relError), + absError(absError), + breadthFirst(breadthFirst), + ownsKernel(false), + ownsMetric(false), + ownsReferenceTree(false), + trained(false) +{ + if (relError > 0 && absError > 0) + Log::Warn << "Absolute and relative error tolerances will be sumed up" + << std::endl; + if (relError < 0 || absError < 0) + Log::Fatal << "Error tolerance can't be less than 0" << std::endl; +} + template Date: Fri, 11 May 2018 17:13:04 +0200 Subject: [PATCH 024/150] Add KDE breadth-first support --- src/mlpack/methods/kde/kde_impl.hpp | 32 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index f0fce2ed2b6..981bfdd6b19 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -245,9 +245,19 @@ Evaluate(const MatType& querySet, arma::vec& estimations) oldFromNewQueries, *metric, *kernel); - // DualTreeTraverser - typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(*queryTree, *referenceTree); + if (breadthFirst) + { + // DualTreeTraverser Breadth-First + typename Tree::template BreadthFirstDualTreeTraverser + traverser(rules); + traverser.Traverse(*queryTree, *referenceTree); + } + else + { + // DualTreeTraverser Depth-First + typename Tree::template DualTreeTraverser traverser(rules); + traverser.Traverse(*queryTree, *referenceTree); + } estimations /= referenceTree->Dataset().n_cols; delete queryTree; @@ -301,9 +311,19 @@ Evaluate(Tree& queryTree, oldFromNewQueries, *metric, *kernel); - // DualTreeTraverser - typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(queryTree, *referenceTree); + if (breadthFirst) + { + // DualTreeTraverser Breadth-First + typename Tree::template BreadthFirstDualTreeTraverser + traverser(rules); + traverser.Traverse(queryTree, *referenceTree); + } + else + { + // DualTreeTraverser Depth-First + typename Tree::template DualTreeTraverser traverser(rules); + traverser.Traverse(queryTree, *referenceTree); + } estimations /= referenceTree->Dataset().n_cols; } From 9c39afb71a815f296f15b8efdff381ff4716edd1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 8 Jul 2018 14:23:58 +0200 Subject: [PATCH 025/150] Fix constructor error Kernel and metric pass by reference --- src/mlpack/methods/kde/kde_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 981bfdd6b19..abb253bba2a 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -79,8 +79,8 @@ KDE(MetricType& metric, const double relError, const double absError, const bool breadthFirst) : - kernel(kernel), - metric(metric), + kernel(&kernel), + metric(&metric), relError(relError), absError(absError), breadthFirst(breadthFirst), From 4c9aaffdf35315913379aaf7debf7a92edfc4476 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 8 Jul 2018 15:18:36 +0200 Subject: [PATCH 026/150] Add gaussian kernel support in KDE main --- src/mlpack/methods/kde/kde_main.cpp | 74 ++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 75b02c25489..7f0f3f8e4a1 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -27,19 +27,26 @@ PROGRAM_INFO("Kernel Density Estimation", "This program performs a Kernel " "Density Estimation for a given reference dataset."); // Required options. -PARAM_DOUBLE_IN_REQ("bandwidth", "Bandwidth of the kernel", "b"); -PARAM_MATRIX_IN_REQ("reference", "Input dataset to KDE on.", "i"); +PARAM_MATRIX_IN_REQ("reference", "Input dataset to KDE on.", "r"); PARAM_MATRIX_IN_REQ("query", "Query dataset to KDE on.", "q"); +PARAM_DOUBLE_IN_REQ("bandwidth", "Bandwidth of the kernel", "b"); // Configuration options PARAM_STRING_IN("kernel", "Kernel to use for the estimation" - "('gaussian').", "k", "gaussian"); + "('gaussian', 'epanechnikov').", "k", "gaussian"); PARAM_STRING_IN("tree", "Tree to use for the estimation" - "('kd-tree', 'ball-tree).", "t", "kd-tree"); + "('kd-tree', 'ball-tree').", "t", "kd-tree"); PARAM_STRING_IN("metric", "Metric to use for the estimation" "('euclidean').", "m", "euclidean"); -PARAM_DOUBLE_IN("error", "Relative error tolerance for the result" , "e", 1e-8); -PARAM_FLAG("breadth_first", "Use breadth-first traversal instead of depth" +PARAM_DOUBLE_IN("rel-error", + "Relative error tolerance for the result", + "e", + 1e-8); +PARAM_DOUBLE_IN("abs-error", + "Relative error tolerance for the result", + "E", + 0.0); +PARAM_FLAG("breadth-first", "Use breadth-first traversal instead of depth" "first.", "w"); // Output options. @@ -48,20 +55,53 @@ PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", static void mlpackMain() { + // Get all parameters. arma::mat reference = std::move(CLI::GetParam("reference")); arma::mat query = std::move(CLI::GetParam("query")); - double error = CLI::GetParam("error"); - double bandwidth = CLI::GetParam("bandwidth"); - bool breadthFirst = CLI::GetParam("breadth_first"); - + const double bandwidth = CLI::GetParam("bandwidth"); + const std::string kernelStr = CLI::GetParam("kernel"); + const std::string treeStr = CLI::GetParam("tree"); + const std::string metricStr = CLI::GetParam("metric"); + const double relError = CLI::GetParam("rel-error"); + const double absError = CLI::GetParam("abs-error"); + const bool breadthFirst = CLI::GetParam("breadth-first"); + // Initialize results vector. arma::vec estimations = std::move(arma::vec(query.n_cols, arma::fill::zeros)); - kde::KDE - model(bandwidth, 0.0, error, breadthFirst); - model.Train(reference); - model.Evaluate(query, estimations); + + // Handle KD-Tree, Gaussian, Euclidean KDE. + if (treeStr == "kd-tree" && + kernelStr == "gaussian" && + metricStr == "euclidean") + { + kernel::GaussianKernel kernel(bandwidth); + metric::EuclideanDistance metric; + kde::KDE + model(metric, kernel, relError, absError, breadthFirst); + model.Train(reference); + model.Evaluate(query, estimations); + estimations = estimations / (kernel.Normalizer(query.n_rows)); + } + + // Handle Ball-Tree, Gaussian, Euclidean KDE. + else if (treeStr == "ball-tree" && + kernelStr == "gaussian" && + metricStr == "euclidean") + { + kernel::GaussianKernel kernel(bandwidth); + metric::EuclideanDistance metric; + kde::KDE + model(metric, kernel, relError, absError, breadthFirst); + model.Train(reference); + model.Evaluate(query, estimations); + estimations = estimations / (kernel.Normalizer(query.n_rows)); + } + // Output estimations to file if defined. if (CLI::HasParam("output")) { From 6b4733d1b1b040f493d84510598cbebf7d0ac344 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 8 Jul 2018 19:16:55 +0200 Subject: [PATCH 027/150] Fix KDE main typo --- src/mlpack/methods/kde/kde_main.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 7f0f3f8e4a1..0cf56142384 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -38,15 +38,15 @@ PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree').", "t", "kd-tree"); PARAM_STRING_IN("metric", "Metric to use for the estimation" "('euclidean').", "m", "euclidean"); -PARAM_DOUBLE_IN("rel-error", +PARAM_DOUBLE_IN("rel_error", "Relative error tolerance for the result", "e", 1e-8); -PARAM_DOUBLE_IN("abs-error", +PARAM_DOUBLE_IN("abs_error", "Relative error tolerance for the result", "E", 0.0); -PARAM_FLAG("breadth-first", "Use breadth-first traversal instead of depth" +PARAM_FLAG("breadth_first", "Use breadth-first traversal instead of depth" "first.", "w"); // Output options. @@ -62,9 +62,9 @@ static void mlpackMain() const std::string kernelStr = CLI::GetParam("kernel"); const std::string treeStr = CLI::GetParam("tree"); const std::string metricStr = CLI::GetParam("metric"); - const double relError = CLI::GetParam("rel-error"); - const double absError = CLI::GetParam("abs-error"); - const bool breadthFirst = CLI::GetParam("breadth-first"); + const double relError = CLI::GetParam("rel_error"); + const double absError = CLI::GetParam("abs_error"); + const bool breadthFirst = CLI::GetParam("breadth_first"); // Initialize results vector. arma::vec estimations = std::move(arma::vec(query.n_cols, arma::fill::zeros)); From 18fb7141e1e8b6f99d2a303602d9c05338748a3d Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 8 Jul 2018 19:35:01 +0200 Subject: [PATCH 028/150] Add epanechnikov kernel support in KDE main --- src/mlpack/methods/kde/kde_main.cpp | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 0cf56142384..4683ea8b6d9 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -102,6 +102,46 @@ static void mlpackMain() estimations = estimations / (kernel.Normalizer(query.n_rows)); } + // Handle KD-Tree, Epanechnikov, Euclidean KDE. + else if (treeStr == "kd-tree" && + kernelStr == "epanechnikov" && + metricStr == "euclidean") + { + kernel::EpanechnikovKernel kernel(bandwidth); + metric::EuclideanDistance metric; + kde::KDE + model(metric, kernel, relError, absError, breadthFirst); + model.Train(reference); + model.Evaluate(query, estimations); + estimations = estimations / (kernel.Normalizer(query.n_rows)); + } + + // Handle Ball-Tree, Epanechnikov, Euclidean KDE. + else if (treeStr == "ball-tree" && + kernelStr == "epanechnikov" && + metricStr == "euclidean") + { + kernel::EpanechnikovKernel kernel(bandwidth); + metric::EuclideanDistance metric; + kde::KDE + model(metric, kernel, relError, absError, breadthFirst); + model.Train(reference); + model.Evaluate(query, estimations); + estimations = estimations / (kernel.Normalizer(query.n_rows)); + } + + // Input parameters are wrong or are not supported yet. + else + { + Log::Fatal << "Input parameters are not valid or are not supported yet." + << std::endl; + } // Output estimations to file if defined. if (CLI::HasParam("output")) { From 8734d3c3667bf2f7c1cd459a01ad9bc47a69ef1f Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 16 Jul 2018 20:55:20 +0200 Subject: [PATCH 029/150] Add brute force gaussian KDE algorithm Just a function to test KDE implementation --- src/mlpack/tests/kde_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 80d7a2993ed..ebb05602a93 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -24,6 +24,25 @@ using namespace mlpack::kernel; BOOST_AUTO_TEST_SUITE(KDETest); +// Brute force gaussian KDE +void BruteForceGaussianKDE(const arma::mat& reference, + const arma::mat& query, + arma::vec& densities, + const double bandwidth) +{ + metric::EuclideanDistance metric; + kernel::GaussianKernel kernel(bandwidth); + for (size_t i = 0; i < query.n_cols; ++i) + { + for (size_t j = 0; j < reference.n_cols; ++j) + { + double distance = metric.Evaluate(query.col(i),reference.col(j)); + densities(i) += kernel.Evaluate(distance); + } + } + densities /= reference.n_cols; +} + /** * Test if simple case is correct. */ From 88de6b2e9303cef87b16706118c44b6671a46b8b Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 16 Jul 2018 20:58:27 +0200 Subject: [PATCH 030/150] Add gaussian KDE brute force test --- src/mlpack/tests/kde_test.cpp | 50 ++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index ebb05602a93..af32a90ec83 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -44,7 +44,7 @@ void BruteForceGaussianKDE(const arma::mat& reference, } /** - * Test if simple case is correct. + * Test if simple case is correct according to manually calculated results. */ BOOST_AUTO_TEST_CASE(KDESimpleTest) { @@ -62,6 +62,7 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::inplace_trans(reference); arma::inplace_trans(query); arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + // Manually calculated results. arma::vec estimations_result = {0.08323668699564207296148765635734889656305, 0.00167470061366603324010116082831700623501, 0.07658867126520703394465527935608406551182, @@ -96,10 +97,13 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) arma::inplace_trans(reference); arma::inplace_trans(query); arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); - arma::vec estimations_result = {0.08323668699564207296148765635734889656305, - 0.00167470061366603324010116082831700623501, - 0.07658867126520703394465527935608406551182, - 0.01028120384800740999553525512055784929544}; + arma::vec estimationsResult = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.8; + + // Get brute force results. + BruteForceGaussianKDE(reference, query, estimationsResult, kernelBandwidth); + + // Get dual-tree results. typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree = Tree(query, oldFromNewQueries, 2); @@ -108,11 +112,43 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) arma::mat, GaussianKernel, KDTree> - kde(0.8, 0.0, 1e-8, false); + kde(kernelBandwidth, 0.0, 1e-8, false); kde.Train(referenceTree); kde.Evaluate(queryTree, oldFromNewQueries, estimations); for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 1e-8); + BOOST_REQUIRE_CLOSE(estimations[i], estimationsResult[i], 1e-8); +} + +/** + * Test dual-tree implementation results against brute force results. + */ +BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) +{ + // Transposed reference and query sets because it's easier to read. + arma::mat reference = arma::randu(2, 200); + arma::mat query = arma::randu(2, 60); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.3; + const double relError = 1e-8; + + // Brute force KDE + BruteForceGaussianKDE(reference, query, bfEstimations, kernelBandwidth); + + // Optimized KDE + metric::EuclideanDistance metric; + kernel::GaussianKernel kernel(kernelBandwidth); + KDE + kde(metric, kernel, relError, 0.0, false); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); + + // Check wether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } BOOST_AUTO_TEST_SUITE_END(); From ca70157a80a9ae6300316d00fb7ac7c7db26459e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 02:22:32 +0200 Subject: [PATCH 031/150] Generic KDE brute force for all kernels --- src/mlpack/tests/kde_test.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index af32a90ec83..9e0e0d458d4 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -25,13 +25,13 @@ using namespace mlpack::kernel; BOOST_AUTO_TEST_SUITE(KDETest); // Brute force gaussian KDE -void BruteForceGaussianKDE(const arma::mat& reference, - const arma::mat& query, - arma::vec& densities, - const double bandwidth) +template +void BruteForceKDE(const arma::mat& reference, + const arma::mat& query, + arma::vec& densities, + T& kernel) { metric::EuclideanDistance metric; - kernel::GaussianKernel kernel(bandwidth); for (size_t i = 0; i < query.n_cols; ++i) { for (size_t j = 0; j < reference.n_cols; ++j) @@ -101,7 +101,11 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) const double kernelBandwidth = 0.8; // Get brute force results. - BruteForceGaussianKDE(reference, query, estimationsResult, kernelBandwidth); + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + estimationsResult, + kernel); // Get dual-tree results. typedef KDTree Tree; @@ -133,11 +137,15 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) const double relError = 1e-8; // Brute force KDE - BruteForceGaussianKDE(reference, query, bfEstimations, kernelBandwidth); + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); // Optimized KDE metric::EuclideanDistance metric; - kernel::GaussianKernel kernel(kernelBandwidth); + kernel = GaussianKernel(kernelBandwidth); KDE Date: Tue, 17 Jul 2018 02:24:30 +0200 Subject: [PATCH 032/150] Add KDE gaussian ball-tree test --- src/mlpack/tests/kde_test.cpp | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 9e0e0d458d4..673aef5195b 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -159,4 +159,42 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test BallTree dual-tree implementation results against brute force results. + */ +BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) +{ + // Transposed reference and query sets because it's easier to read. + arma::mat reference = arma::randu(2, 200); + arma::mat query = arma::randu(2, 60); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.4; + const double relError = 1e-5; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // BallTree KDE + typedef BallTree Tree; + std::vector oldFromNewQueries; + Tree queryTree = Tree(query, oldFromNewQueries, 2); + Tree referenceTree = Tree(reference, 2); + KDE + kde(kernelBandwidth, relError, 0.0, false); + kde.Train(referenceTree); + kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); + + // Check wether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + BOOST_AUTO_TEST_SUITE_END(); From 91ad4f7209181eb8018e9a8d32bb6262955e4919 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 15:15:43 +0200 Subject: [PATCH 033/150] Add duplicated reference value KDE test --- src/mlpack/tests/kde_test.cpp | 51 ++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 673aef5195b..eac3a59b9dd 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -110,8 +110,8 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) // Get dual-tree results. typedef KDTree Tree; std::vector oldFromNewQueries; - Tree queryTree = Tree(query, oldFromNewQueries, 2); - Tree referenceTree = Tree(reference, 2); + Tree queryTree(query, oldFromNewQueries, 2); + Tree referenceTree(reference, 2); KDE Tree; std::vector oldFromNewQueries; - Tree queryTree = Tree(query, oldFromNewQueries, 2); - Tree referenceTree = Tree(reference, 2); + Tree queryTree(query, oldFromNewQueries, 2); + Tree referenceTree(reference, 2); KDE(reference, + query, + bfEstimations, + kernel); + + // Dual-tree KDE + typedef KDTree Tree; + std::vector oldFromNewQueries; + Tree queryTree(query, oldFromNewQueries, 2); + Tree referenceTree(reference, 2); + KDE + kde(kernelBandwidth, relError, 0.0, false); + kde.Train(referenceTree); + kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); + + // Check wether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + BOOST_AUTO_TEST_SUITE_END(); From a58a72d1f5181030e70d4e807f5dc798fb9e5fcd Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 15:30:12 +0200 Subject: [PATCH 034/150] Add duplicated query value KDE test --- src/mlpack/tests/kde_test.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index eac3a59b9dd..855d3cdab1f 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -234,4 +234,35 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test duplicated value in query matrix. + */ +BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) +{ + arma::mat reference = arma::randu(2, 30); + arma::mat query = arma::randu(2, 10); + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.4; + const double relError = 1e-5; + + // Duplicate value + query.col(2) = query.col(3); + + // Dual-tree KDE + typedef KDTree Tree; + std::vector oldFromNewQueries; + Tree queryTree(query, oldFromNewQueries, 2); + Tree referenceTree(reference, 2); + KDE + kde(kernelBandwidth, relError, 0.0, false); + kde.Train(referenceTree); + kde.Evaluate(queryTree, oldFromNewQueries, estimations); + + // Check wether results are equal. + BOOST_REQUIRE_CLOSE(estimations[2], estimations[3], relError); +} + BOOST_AUTO_TEST_SUITE_END(); From ae18fff6bca0b0f2f1691c02130cd417d35cfcf7 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 15:40:54 +0200 Subject: [PATCH 035/150] Add breadth-first KDE test --- src/mlpack/tests/kde_test.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 855d3cdab1f..4fc792d1e22 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -265,4 +265,39 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) BOOST_REQUIRE_CLOSE(estimations[2], estimations[3], relError); } +/** + * Test dual-tree breadth-first implementation results against brute force + * results. + */ +BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) +{ + arma::mat reference = arma::randu(2, 200); + arma::mat query = arma::randu(2, 60); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.8; + const double relError = 1e-8; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Breadth-First KDE + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0, true); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); + + // Check wether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + BOOST_AUTO_TEST_SUITE_END(); From 3b5bd3779dbc983f3b03e30f6638ba4079090eec Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 16:16:01 +0200 Subject: [PATCH 036/150] Add 1D KDE test --- src/mlpack/tests/kde_test.cpp | 44 +++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 4fc792d1e22..dfbf2f5e03e 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -152,7 +152,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) kde.Train(reference); kde.Evaluate(query, treeEstimations); - // Check wether results are equal. + // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } @@ -189,7 +189,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) kde.Train(referenceTree); kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); - // Check wether results are equal. + // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } @@ -229,7 +229,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) kde.Train(referenceTree); kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); - // Check wether results are equal. + // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } @@ -261,7 +261,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) kde.Train(referenceTree); kde.Evaluate(queryTree, oldFromNewQueries, estimations); - // Check wether results are equal. + // Check whether results are equal. BOOST_REQUIRE_CLOSE(estimations[2], estimations[3], relError); } @@ -295,7 +295,41 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) kde.Train(reference); kde.Evaluate(query, treeEstimations); - // Check wether results are equal. + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + +/** + * Test 1-dimensional implementation results against brute force results. + */ +BOOST_AUTO_TEST_CASE(OneDimensionalTest) +{ + arma::mat reference = arma::randu(1, 200); + arma::mat query = arma::randu(1, 60); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.7; + const double relError = 1e-8; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0, false); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); + + // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } From cbee486dee69491821bdbf4e7265399c24b8dc6b Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 21:21:43 +0200 Subject: [PATCH 037/150] Handle empty reference dataset in KDE training --- src/mlpack/methods/kde/kde_impl.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index abb253bba2a..ff814e1f1d0 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -204,6 +204,10 @@ template:: Train(const MatType& referenceSet) { + // Check if referenceSet is not an empty set. + if (referenceSet.n_cols == 0) + throw std::invalid_argument("cannot train KDE model with an empty " + "reference set"); this->ownsReferenceTree = true; this->referenceTree = new Tree(referenceSet); this->trained = true; @@ -218,6 +222,10 @@ template:: Train(Tree& referenceTree) { + // Check if referenceTree dataset is not an empty set. + if (referenceTree.Dataset().n_cols == 0) + throw std::invalid_argument("cannot train KDE model with an empty " + "reference set"); if (this->ownsReferenceTree == true) delete this->referenceTree; this->ownsReferenceTree = false; From cca7d36d90309665561602c5f3ee5b68a5c22f24 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 17 Jul 2018 21:22:42 +0200 Subject: [PATCH 038/150] Add empty reference dataset KDE test --- src/mlpack/tests/kde_test.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index dfbf2f5e03e..719bf5d8443 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -334,4 +334,30 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +BOOST_AUTO_TEST_CASE(EmptyReferenceTest) +{ + arma::mat reference; + arma::mat query = arma::randu(1, 10); + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.7; + const double relError = 1e-8; + + // KDE + metric::EuclideanDistance metric; + GaussianKernel kernel(kernelBandwidth); + KDE + kde(metric, kernel, relError, 0.0, false); + + // When training using the dataset matrix + BOOST_CHECK_THROW(kde.Train(reference), std::invalid_argument); + + // When training using a tree + typedef KDTree Tree; + Tree referenceTree(reference, 2); + BOOST_CHECK_THROW(kde.Train(referenceTree), std::invalid_argument); +} + BOOST_AUTO_TEST_SUITE_END(); From 6123075618332f42afef337b6a278786796fb35c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 18 Jul 2018 18:06:08 +0200 Subject: [PATCH 039/150] Handle dimension mismatch in KDE evaluation --- src/mlpack/methods/kde/kde_impl.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index ff814e1f1d0..8d8d2ba7d54 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -242,6 +242,12 @@ template:: Evaluate(const MatType& querySet, arma::vec& estimations) { + // Check whether dimensions match. + if (querySet.n_rows != referenceTree->Dataset().n_rows) + throw std::invalid_argument("cannot train KDE model: querySet and " + "referenceSet dimensions don't match"); + + // Evaluate std::vector oldFromNewQueries; Tree* queryTree = BuildTree(querySet, oldFromNewQueries); typedef KDERules RuleType; @@ -310,6 +316,12 @@ Evaluate(Tree& queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) { + // Check whether dimensions match. + if (queryTree.Dataset().n_rows != referenceTree->Dataset().n_rows) + throw std::invalid_argument("cannot train KDE model: querySet and " + "referenceSet dimensions don't match"); + + // Evaluate typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), queryTree.Dataset(), From 07f0df13636342583abaca61c515baf564041645 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 18 Jul 2018 18:07:10 +0200 Subject: [PATCH 040/150] Add dimension mismatch KDE test --- src/mlpack/tests/kde_test.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 719bf5d8443..31f348e7200 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -360,4 +360,34 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) BOOST_CHECK_THROW(kde.Train(referenceTree), std::invalid_argument); } +BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) +{ + arma::mat reference = arma::randu(3, 10); + arma::mat query = arma::randu(1, 10); + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.7; + const double relError = 1e-8; + + // KDE + metric::EuclideanDistance metric; + GaussianKernel kernel(kernelBandwidth); + KDE + kde(metric, kernel, relError, 0.0, false); + kde.Train(reference); + + // When evaluating using the query dataset matrix + BOOST_CHECK_THROW(kde.Evaluate(query, estimations), + std::invalid_argument); + + // When evaluating using a query tree + typedef KDTree Tree; + std::vector oldFromNewQueries; + Tree queryTree(query, oldFromNewQueries, 3); + BOOST_CHECK_THROW(kde.Evaluate(queryTree, oldFromNewQueries, estimations), + std::invalid_argument); +} + BOOST_AUTO_TEST_SUITE_END(); From e9efbd6c4b8977eee22cd20da947c46affbcfd6e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 18 Jul 2018 18:40:08 +0200 Subject: [PATCH 041/150] Handle empty querySet in KDE evaluation --- src/mlpack/methods/kde/kde_impl.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 8d8d2ba7d54..565eea9dd3c 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -242,6 +242,12 @@ template:: Evaluate(const MatType& querySet, arma::vec& estimations) { + // Check querySet has at least 1 element to evaluate. + if (querySet.n_cols == 0) + { + Log::Warn << "querySet is empty" << std::endl; + return; + } // Check whether dimensions match. if (querySet.n_rows != referenceTree->Dataset().n_rows) throw std::invalid_argument("cannot train KDE model: querySet and " @@ -316,6 +322,12 @@ Evaluate(Tree& queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) { + // Check querySet has at least 1 element to evaluate. + if (queryTree.Dataset().n_cols == 0) + { + Log::Warn << "querySet is empty" << std::endl; + return; + } // Check whether dimensions match. if (queryTree.Dataset().n_rows != referenceTree->Dataset().n_rows) throw std::invalid_argument("cannot train KDE model: querySet and " From 128e176b686a1605aa666a786fc47d120b9b1d1c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 18 Jul 2018 18:41:05 +0200 Subject: [PATCH 042/150] Add empty querySet KDE test --- src/mlpack/tests/kde_test.cpp | 46 ++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 31f348e7200..a9179b97f25 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -334,6 +334,9 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test a case where an empty reference set is given to train the model. + */ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) { arma::mat reference; @@ -352,14 +355,17 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) kde(metric, kernel, relError, 0.0, false); // When training using the dataset matrix - BOOST_CHECK_THROW(kde.Train(reference), std::invalid_argument); + BOOST_REQUIRE_THROW(kde.Train(reference), std::invalid_argument); // When training using a tree typedef KDTree Tree; Tree referenceTree(reference, 2); - BOOST_CHECK_THROW(kde.Train(referenceTree), std::invalid_argument); + BOOST_REQUIRE_THROW(kde.Train(referenceTree), std::invalid_argument); } +/** + * Tests when reference set values and query set values dimensions don't match. + */ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) { arma::mat reference = arma::randu(3, 10); @@ -379,15 +385,47 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) kde.Train(reference); // When evaluating using the query dataset matrix - BOOST_CHECK_THROW(kde.Evaluate(query, estimations), + BOOST_REQUIRE_THROW(kde.Evaluate(query, estimations), std::invalid_argument); // When evaluating using a query tree typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 3); - BOOST_CHECK_THROW(kde.Evaluate(queryTree, oldFromNewQueries, estimations), + BOOST_REQUIRE_THROW(kde.Evaluate(queryTree, oldFromNewQueries, estimations), std::invalid_argument); } +/** + * Tests when an empty query set is given to be evaluated. + */ +BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) +{ + arma::mat reference = arma::randu(1, 10); + arma::mat query; + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.7; + const double relError = 1e-8; + + // KDE + metric::EuclideanDistance metric; + GaussianKernel kernel(kernelBandwidth); + KDE + kde(metric, kernel, relError, 0.0, false); + kde.Train(reference); + + // When evaluating using the query dataset matrix + BOOST_REQUIRE_NO_THROW(kde.Evaluate(query, estimations)); + + // When evaluating using a query tree + typedef KDTree Tree; + std::vector oldFromNewQueries; + Tree queryTree(query, oldFromNewQueries, 3); + BOOST_REQUIRE_NO_THROW( + kde.Evaluate(queryTree, oldFromNewQueries, estimations)); +} + BOOST_AUTO_TEST_SUITE_END(); From be84c736dfc772e28bd823e650faab1f79129b21 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 20 Jul 2018 18:55:18 +0200 Subject: [PATCH 043/150] Assert KDE trees have not HasDuplicatedPoints --- src/mlpack/methods/kde/kde_rules.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index 9c9a5707eb0..0309edd83dc 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -101,6 +101,10 @@ class KDERules //! The number of scores. size_t scores; + + // Check TreeType is supported. + static_assert(!tree::TreeTraits::HasDuplicatedPoints, + "TreeType must not have duplicated points."); }; } // namespace kde From 2dce2ca3a953c64c452bf0ed781f3b829994ace2 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 00:41:29 +0200 Subject: [PATCH 044/150] Assert KDE trees have UniqueNumDescendants --- src/mlpack/methods/kde/kde_rules.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index 0309edd83dc..c91d667962c 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -105,6 +105,8 @@ class KDERules // Check TreeType is supported. static_assert(!tree::TreeTraits::HasDuplicatedPoints, "TreeType must not have duplicated points."); + static_assert(tree::TreeTraits::UniqueNumDescendants, + "TreeType must provide a number of unique descendants."); }; } // namespace kde From d00bb337198bf8b45535b20b4ad5085d4801b2e8 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 00:46:05 +0200 Subject: [PATCH 045/150] Add KDEStat as a TreeStatType for KDE --- src/mlpack/methods/kde/CMakeLists.txt | 1 + src/mlpack/methods/kde/kde_stat.hpp | 56 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 src/mlpack/methods/kde/kde_stat.hpp diff --git a/src/mlpack/methods/kde/CMakeLists.txt b/src/mlpack/methods/kde/CMakeLists.txt index 268fb55b8dc..5ca3039f232 100644 --- a/src/mlpack/methods/kde/CMakeLists.txt +++ b/src/mlpack/methods/kde/CMakeLists.txt @@ -5,6 +5,7 @@ set(SOURCES kde_impl.hpp kde_rules.hpp kde_rules_impl.hpp + kde_stat.hpp ) # Add directory name to sources. diff --git a/src/mlpack/methods/kde/kde_stat.hpp b/src/mlpack/methods/kde/kde_stat.hpp new file mode 100644 index 00000000000..91dbed243ad --- /dev/null +++ b/src/mlpack/methods/kde/kde_stat.hpp @@ -0,0 +1,56 @@ +/** + * @file kde_stat.hpp + * @author Roberto Hueso + * + * Defines TreeStatType for KDE. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_KDE_STAT_HPP +#define MLPACK_METHODS_KDE_STAT_HPP + +#include + +namespace mlpack { +namespace kde { + +/** + * Extra data for each node in the tree. + */ +class KDEStat +{ + public: + //! Initialize the statistic. + KDEStat() : + lastKernelValue(0.0) { } + + //! Initialization for a fully initialized node. + template + KDEStat(TreeType& /* node */) : + lastKernelValue(0.0) { } + + //! Get the last kernel value calculation. + double LastKernelValue() const { return lastKernelValue; } + + //! Modify the last kernel value calculation. + double& LastKernelValue() { return lastKernelValue; } + + //! Serialize the statistic to/from an archive. + template + void serialize(Archive& ar, const unsigned int /* version */) + { + ar & BOOST_SERIALIZATION_NVP(lastKernelValue); + } + + private: + //! Last kernel value evaluation. + double lastKernelValue; +}; + +} // namespace kde +} // namespace mlpack + +#endif From 6ca57882433083fc99f3a521fd8a510a1e4a3eef Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 00:48:40 +0200 Subject: [PATCH 046/150] Add EvaluateKernel for KDE rules --- src/mlpack/methods/kde/kde_rules.hpp | 4 ++++ src/mlpack/methods/kde/kde_rules_impl.hpp | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index c91d667962c..c7d924a56a4 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -64,6 +64,10 @@ class KDERules size_t Scores() const { return scores; } private: + //! Evaluate kernel value of 2 points. + double EvaluateKernel(const size_t queryIndex, + const size_t referenceIndex) const; + //! The reference set. const arma::mat& referenceSet; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 4105e9302e5..fb4a0c6a882 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -144,6 +144,16 @@ Rescore(TreeType& /*queryNode*/, return oldScore; } +template +double KDERules:: +EvaluateKernel(const size_t queryIndex, + const size_t referenceIndex) const +{ + return kernel.Evaluate(metric.Evaluate(querySet.unsafe_col(queryIndex), + referenceSet.unsafe_col(referenceIndex) + )); +} + } // namespace kde } // namespace mlpack From f25eb27d50da9e252954e81baac5b3d00dc15089 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 00:50:49 +0200 Subject: [PATCH 047/150] Improve KDE dual-tree score using stats Makes use of KDEStat --- src/mlpack/methods/kde/kde_rules_impl.hpp | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index fb4a0c6a882..6279cd9472d 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -98,19 +98,36 @@ Score(TreeType& queryNode, TreeType& referenceNode) if (bound <= (absError + relError * minKernel) / referenceSet.n_cols) { - arma::vec queryCenter, referenceCenter; + double kernelValue; + // If calculating a center is not required. if (tree::TreeTraits::FirstPointIsCentroid) { - queryCenter = querySet.unsafe_col(queryNode.Point(0)); - referenceCenter = referenceSet.unsafe_col(referenceNode.Point(0)); + // If a child center is the same as a parent center. + if (tree::TreeTraits::HasSelfChildren) + { + if ((referenceNode.Parent() != NULL) && + (referenceNode.Point(0) == referenceNode.Parent()->Point(0))) + kernelValue = referenceNode.Parent()->Stat().LastKernelValue(); + else + kernelValue = EvaluateKernel(queryNode.Point(0), + referenceNode.Point(0)); + } + else + kernelValue = EvaluateKernel(queryNode.Point(0), + referenceNode.Point(0)); } else { + arma::vec queryCenter, referenceCenter; referenceNode.Center(referenceCenter); queryNode.Center(queryCenter); + kernelValue = kernel.Evaluate(metric.Evaluate(referenceCenter, + queryCenter)); } - const double kernelValue = kernel.Evaluate(metric.Evaluate(referenceCenter, - queryCenter)); + + // Update lastKernelValue + referenceNode.Stat().LastKernelValue() = kernelValue; + for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { if (tree::TreeTraits::RearrangesDataset) From 4daecf6cb6ae4a1778e26afb4a53f2c2c33e2b61 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 00:52:16 +0200 Subject: [PATCH 048/150] Adjust existing code to KDEStat --- src/mlpack/methods/kde/kde.hpp | 4 +++- src/mlpack/tests/kde_test.cpp | 14 +++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 4e8f6a31697..6df064e6677 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -17,6 +17,8 @@ #include #include +#include "kde_stat.hpp" + namespace mlpack { namespace kde /** Kernel Density Estimation. */ { @@ -29,7 +31,7 @@ template Tree; + typedef TreeType Tree; KDE(const double bandwidth = 1.0, const double relError = 1e-5, diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index a9179b97f25..e7a0b61353d 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -108,7 +108,7 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) kernel); // Get dual-tree results. - typedef KDTree Tree; + typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 2); Tree referenceTree(reference, 2); @@ -177,7 +177,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) kernel); // BallTree KDE - typedef BallTree Tree; + typedef BallTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 2); Tree referenceTree(reference, 2); @@ -217,7 +217,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) kernel); // Dual-tree KDE - typedef KDTree Tree; + typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 2); Tree referenceTree(reference, 2); @@ -249,7 +249,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) query.col(2) = query.col(3); // Dual-tree KDE - typedef KDTree Tree; + typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 2); Tree referenceTree(reference, 2); @@ -358,7 +358,7 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) BOOST_REQUIRE_THROW(kde.Train(reference), std::invalid_argument); // When training using a tree - typedef KDTree Tree; + typedef KDTree Tree; Tree referenceTree(reference, 2); BOOST_REQUIRE_THROW(kde.Train(referenceTree), std::invalid_argument); } @@ -389,7 +389,7 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) std::invalid_argument); // When evaluating using a query tree - typedef KDTree Tree; + typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 3); BOOST_REQUIRE_THROW(kde.Evaluate(queryTree, oldFromNewQueries, estimations), @@ -421,7 +421,7 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) BOOST_REQUIRE_NO_THROW(kde.Evaluate(query, estimations)); // When evaluating using a query tree - typedef KDTree Tree; + typedef KDTree Tree; std::vector oldFromNewQueries; Tree queryTree(query, oldFromNewQueries, 3); BOOST_REQUIRE_NO_THROW( From 8e9573ecadf7e876df208692c2bbb9eeaddf36bf Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 11:44:57 +0200 Subject: [PATCH 049/150] Add KDE default constructor --- src/mlpack/methods/kde/kde.hpp | 8 +++++--- src/mlpack/methods/kde/kde_impl.hpp | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 6df064e6677..437f7658cf6 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -33,13 +33,15 @@ class KDE public: typedef TreeType Tree; - KDE(const double bandwidth = 1.0, + KDE(); + + KDE(const double bandwidth, const double relError = 1e-5, const double absError = 0, const bool breadthFirst = false); - KDE(MetricType& metric = MetricType(), - KernelType& kernel = KernelType(), + KDE(MetricType& metric, + KernelType& kernel, const double relError = 1e-5, const double absError = 0, const bool breadthFirst = false); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 565eea9dd3c..eff93236d1b 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -39,6 +39,23 @@ TreeType* BuildTree( return new TreeType(std::forward(dataset)); } +template class TreeType> +KDE::KDE() : + kernel(new KernelType()), + metric(new MetricType()), + relError(1e-8), + absError(0.0), + breadthFirst(false), + ownsKernel(true), + ownsMetric(true), + ownsReferenceTree(false), + trained(false) { } + template Date: Sat, 21 Jul 2018 11:46:01 +0200 Subject: [PATCH 050/150] Add KDE serialization method --- src/mlpack/methods/kde/kde.hpp | 4 ++++ src/mlpack/methods/kde/kde_impl.hpp | 37 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 437f7658cf6..7ff2f326917 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -94,6 +94,10 @@ class KDE //! Check if KDE model is trained or not. bool IsTrained() const { return trained; } + //! Serialize the model. + template + void serialize(Archive& ar, const unsigned int /* version */); + private: KernelType* kernel; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index eff93236d1b..c0507d19402 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -408,5 +408,42 @@ AbsoluteError(const double newError) this->absError = newError; } +template class TreeType> +template +void KDE:: +serialize(Archive& ar, const unsigned int /* version */) +{ + // Serialize preferences. + ar & BOOST_SERIALIZATION_NVP(relError); + ar & BOOST_SERIALIZATION_NVP(absError); + ar & BOOST_SERIALIZATION_NVP(breadthFirst); + ar & BOOST_SERIALIZATION_NVP(trained); + + // If we are loading, clean up memory if necessary. + if (Archive::is_loading::value) + { + if (ownsKernel && kernel) + delete kernel; + if (ownsMetric && metric) + delete metric; + if (ownsReferenceTree && referenceTree) + delete referenceTree; + // After loading kernel, metric and tree, we own it. + ownsKernel = true; + ownsMetric = true; + ownsReferenceTree = true; + } + + // Serialize the rest of values. + ar & BOOST_SERIALIZATION_NVP(kernel); + ar & BOOST_SERIALIZATION_NVP(metric); + ar & BOOST_SERIALIZATION_NVP(referenceTree); +} + } // namespace kde } // namespace mlpack From bebf37f31ceb5bbd35a695315a754aa0e161ba37 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 21 Jul 2018 11:46:26 +0200 Subject: [PATCH 051/150] Add KDE serialization test --- src/mlpack/tests/kde_test.cpp | 68 +++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index e7a0b61353d..ee3087b591c 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -15,6 +15,7 @@ #include #include "test_tools.hpp" +#include "serialization.hpp" using namespace mlpack; using namespace mlpack::kde; @@ -22,6 +23,8 @@ using namespace mlpack::metric; using namespace mlpack::tree; using namespace mlpack::kernel; +using namespace boost::serialization; + BOOST_AUTO_TEST_SUITE(KDETest); // Brute force gaussian KDE @@ -428,4 +431,69 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) kde.Evaluate(queryTree, oldFromNewQueries, estimations)); } +/** + * Tests serialiation of KDE models. + */ +BOOST_AUTO_TEST_CASE(SerializationTest) +{ + // Initial KDE model to me serialized. + const double relError = 0.25; + const double absError = 0.0; + const bool bf = false; + arma::mat reference = arma::randu(4, 800); + KDE + kde(0.25, relError, absError, bf); + kde.Train(reference); + + // Initialize serialized objects. + KDE kdeXml, kdeText, kdeBinary; + SerializeObjectAll(kde, kdeXml, kdeText, kdeBinary); + + // Check everything is correct. + BOOST_REQUIRE_CLOSE(kde.RelativeError(), relError, 1e-8); + BOOST_REQUIRE_CLOSE(kdeXml.RelativeError(), relError, 1e-8); + BOOST_REQUIRE_CLOSE(kdeText.RelativeError(), relError, 1e-8); + BOOST_REQUIRE_CLOSE(kdeBinary.RelativeError(), relError, 1e-8); + + BOOST_REQUIRE_CLOSE(kde.AbsoluteError(), absError, 1e-8); + BOOST_REQUIRE_CLOSE(kdeXml.AbsoluteError(), absError, 1e-8); + BOOST_REQUIRE_CLOSE(kdeText.AbsoluteError(), absError, 1e-8); + BOOST_REQUIRE_CLOSE(kdeBinary.AbsoluteError(), absError, 1e-8); + + BOOST_REQUIRE_EQUAL(kde.BreadthFirst(), bf); + BOOST_REQUIRE_EQUAL(kdeXml.BreadthFirst(), bf); + BOOST_REQUIRE_EQUAL(kdeText.BreadthFirst(), bf); + BOOST_REQUIRE_EQUAL(kdeBinary.BreadthFirst(), bf); + + BOOST_REQUIRE_EQUAL(kde.IsTrained(), true); + BOOST_REQUIRE_EQUAL(kdeXml.IsTrained(), true); + BOOST_REQUIRE_EQUAL(kdeText.IsTrained(), true); + BOOST_REQUIRE_EQUAL(kdeBinary.IsTrained(), true); + + // Test if execution gives the same result. + arma::mat query = arma::randu(4, 100);; + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec xmlEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec textEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec binEstimations = arma::vec(query.n_cols, arma::fill::zeros); + + kde.Evaluate(query, estimations); + kde.Evaluate(query, xmlEstimations); + kde.Evaluate(query, textEstimations); + kde.Evaluate(query, binEstimations); + + for (size_t i = 0; i < query.n_cols; ++i) + { + BOOST_REQUIRE_CLOSE(estimations[i], xmlEstimations[i], relError); + BOOST_REQUIRE_CLOSE(estimations[i], textEstimations[i], relError); + BOOST_REQUIRE_CLOSE(estimations[i], binEstimations[i], relError); + } +} + BOOST_AUTO_TEST_SUITE_END(); From 8c0f61bf149f46f2b545273a3f0a5ffc64e6e83c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 22 Jul 2018 19:03:56 +0200 Subject: [PATCH 052/150] Prepare estimation vectors on KDE evaluate --- src/mlpack/methods/kde/kde_impl.hpp | 10 ++++++++++ src/mlpack/tests/kde_test.cpp | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index c0507d19402..3709707101a 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -270,6 +270,11 @@ Evaluate(const MatType& querySet, arma::vec& estimations) throw std::invalid_argument("cannot train KDE model: querySet and " "referenceSet dimensions don't match"); + // Get estimations vector ready. + estimations.clear(); + estimations.resize(querySet.n_cols); + estimations.fill(arma::fill::zeros); + // Evaluate std::vector oldFromNewQueries; Tree* queryTree = BuildTree(querySet, oldFromNewQueries); @@ -350,6 +355,11 @@ Evaluate(Tree& queryTree, throw std::invalid_argument("cannot train KDE model: querySet and " "referenceSet dimensions don't match"); + // Get estimations vector ready. + estimations.clear(); + estimations.resize(queryTree.Dataset().n_cols); + estimations.fill(arma::fill::zeros); + // Evaluate typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index ee3087b591c..9b5ace20546 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -64,7 +64,7 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) {-2.1, 1.0} }; arma::inplace_trans(reference); arma::inplace_trans(query); - arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec estimations; // Manually calculated results. arma::vec estimations_result = {0.08323668699564207296148765635734889656305, 0.00167470061366603324010116082831700623501, From cdabad0b32c3bd641761cee387376c55e4d6892f Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 22 Jul 2018 19:39:33 +0200 Subject: [PATCH 053/150] Add KDE documentation --- src/mlpack/methods/kde/kde.hpp | 121 +++++++++++++++++++++++++++- src/mlpack/methods/kde/kde_impl.hpp | 2 +- 2 files changed, 118 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 7ff2f326917..e973433202c 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -22,6 +22,18 @@ namespace mlpack { namespace kde /** Kernel Density Estimation. */ { +/** + * The KDE class is a template class for performing Kernel Density Estimations. + * In statistics, kernel density estimation, is a way to estimate the + * probability density function of a variable in a non parametric way. + * This implementation performs this estimation using a tree-independent + * dual-tree algorithm. Details about this algorithm are available in KDERules. + * + * @tparam MetricType Metric to use for KDE calculations. + * @tparam MatType Type of data to use. + * @tparam KernelType Kernel function to use for KDE calculations. + * @tparam TreeType Type of tree to use; must satisfy the TreeType policy API. + */ template Tree; + /** + * Initialize KDE object with the default Kernel and Metric parameters. + * Relative error tolernce is initialized to 1e-6, absolute error tolerance + * is 0.0 and uses a depth-first approach. + */ KDE(); + /** + * Initialize KDE object using the default Metric parameters and a given + * Kernel bandwidth (only for kernels that require a bandwidth and are + * constructed like kernel(bandwidth)). + * + * @param bandwidth Bandwidth of the kernel. + * @param relError Relative error tolerance of the model. + * @param absError Absolute error tolerance of the model. + * @param breadthFirst Whether the tree should be traversed using a + * breadth-first approach. + */ KDE(const double bandwidth, - const double relError = 1e-5, + const double relError = 1e-6, const double absError = 0, const bool breadthFirst = false); + /** + * Initialize KDE object using custom instantiated Metric and Kernel objects. + * + * @param metric Instantiated metric object. + * @param kernel Instantiated kernel object. + * @param relError Relative error tolerance of the model. + * @param absError Absolute error tolerance of the model. + * @param breadthFirst Whether the tree should be traversed using a + * breadth-first approach. + */ KDE(MetricType& metric, KernelType& kernel, - const double relError = 1e-5, + const double relError = 1e-6, const double absError = 0, const bool breadthFirst = false); + /** + * Construct KDE object as a copy of the given model. This may be + * computationally intensive! + * + * @param other KDE object to copy. + */ KDE(const KDE& other); + /** + * Construct KDE object taking ownership of the given model. + * + * @param other KDE object to take ownership of. + */ KDE(KDE&& other); + /** + * Copy a KDE model. + * + * Use std::move if the object to copy is no longer needed. + * + * @param other KDE model to copy. + */ KDE& operator=(KDE other); + /** + * Destroy the KDE object. If this object created any trees, they will be + * deleted. If you created the trees then you have to delete them yourself. + */ ~KDE(); + /** + * Trains the KDE model. It builds a tree using a reference set. + * + * Use std::move if the reference set is no longer needed. + * + * @param referenceSet Set of reference data. + */ void Train(const MatType& referenceSet); + /** + * Trains the KDE model. Sets the reference tree to an already created tree. + * + * @param referenceTree New already created reference tree. + */ void Train(Tree& referenceTree); + /** + * Estimate density of each point in the query set given the data of the + * reference set. The result is stored in an estimations vector. + * + * - Dimension of each point in the query set must match the dimension of each + * point in the reference set. + * + * - Use std::move if the query set is no longer needed. + * + * @pre The model has to be previously trained. + * @param querySet Set of query points to get the density of. + * @param estimations Object which will hold the density of each query point. + */ void Evaluate(const MatType& querySet, arma::vec& estimations); + /** + * Estimate density of each point in the query set given the data of an + * already created query tree. The result is stored in an estimations vector. + * + * - Dimension of each point in the queryTree dataset must match the dimension + * of each point in the reference set. + * + * - Use std::move if the query tree is no longer needed. + * + * @pre The model has to be previously trained. + * @param queryTree Tree of query points to get the density of. + * @param oldFromNewQueries Mappings of query points to the tree dataset. + * @param estimations Object which will hold the density of each query point. + */ void Evaluate(Tree& queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations); + //! Get the kernel. const KernelType& Kernel() const { return kernel; } + //! Modify the kernel. KernelType& Kernel() { return kernel; } + //! Get the reference tree. const Tree& ReferenceTree() const { return referenceTree; } //! Get relative error tolerance. @@ -88,10 +191,10 @@ class KDE //! Modify whether breadth-first traversal is being used. bool& BreadthFirst() { return breadthFirst; } - //! Check if reference tree is owned by the KDE model. + //! Check whether reference tree is owned by the KDE model. bool OwnsReferenceTree() const { return ownsReferenceTree; } - //! Check if KDE model is trained or not. + //! Check whether KDE model is trained or not. bool IsTrained() const { return trained; } //! Serialize the model. @@ -99,24 +202,34 @@ class KDE void serialize(Archive& ar, const unsigned int /* version */); private: + //! Kernel. KernelType* kernel; + //! Metric. MetricType* metric; + //! Reference tree. Tree* referenceTree; + //! Relative error tolerance. double relError; + //! Absolute error tolerance. double absError; + //! If true, a breadth-first approach is used when evaluating. bool breadthFirst; + //! If true, the KDE object is responsible for deleting the kernel. bool ownsKernel; + //! If true, the KDE object is responsible for deleting the metric. bool ownsMetric; + //! If true, the KDE object is responsible for deleting the reference tree. bool ownsReferenceTree; + //! If true, the KDE object is trained. bool trained; }; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 3709707101a..75f4f148bc7 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -48,7 +48,7 @@ template::KDE() : kernel(new KernelType()), metric(new MetricType()), - relError(1e-8), + relError(1e-6), absError(0.0), breadthFirst(false), ownsKernel(true), From dcec680c7b4a90a1c698b14e96b3bf4bf31aa0ad Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 22 Jul 2018 20:19:15 +0200 Subject: [PATCH 054/150] Improve KDE error tolerance handling --- src/mlpack/methods/kde/kde.hpp | 7 +++-- src/mlpack/methods/kde/kde_impl.hpp | 46 ++++++++++++++++------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index e973433202c..fe38f95fed9 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -176,13 +176,13 @@ class KDE //! Get relative error tolerance. double RelativeError() const { return relError; } - //! Modify relative error tolerance. + //! Modify relative error tolerance (0 <= newError <= 1). void RelativeError(const double newError); //! Get absolute error tolerance. double AbsoluteError() const { return absError; } - //! Modify absolute error tolerance. + //! Modify absolute error tolerance (0 <= newError). void AbsoluteError(const double newError); //! Get whether breadth-first traversal is being used. @@ -231,6 +231,9 @@ class KDE //! If true, the KDE object is trained. bool trained; + + //! Check whether absolute and relative error values are compatible. + void CheckErrorValues(const double relError, const double absError) const; }; } // namespace kde diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 75f4f148bc7..49d863c0171 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -77,11 +77,7 @@ KDE(const double bandwidth, ownsReferenceTree(false), trained(false) { - if (relError > 0 && absError > 0) - Log::Warn << "Absolute and relative error tolerances will be sumed up" - << std::endl; - if (relError < 0 || absError < 0) - Log::Fatal << "Error tolerance can't be less than 0" << std::endl; + CheckErrorValues(relError, absError); } template 0 && absError > 0) - Log::Warn << "Absolute and relative error tolerances will be sumed up" - << std::endl; - if (relError < 0 || absError < 0) - Log::Fatal << "Error tolerance can't be less than 0" << std::endl; + CheckErrorValues(relError, absError); } template:: RelativeError(const double newError) { - if (newError < 0 || newError > 1) - Log::Fatal << "Relative error tolerance must be a value between 0 and 1" - << std::endl; - else - this->relError = newError; + CheckErrorValues(newError, absError); + relError = newError; } template:: AbsoluteError(const double newError) { - if (newError < 0) - Log::Fatal << "Absolute error tolerance must be a value greater or equal " - << "to 0" << std::endl; - else - this->absError = newError; + CheckErrorValues(relError, newError); + absError = newError; } template class TreeType> +void KDE:: +CheckErrorValues(const double relError, const double absError) const +{ + if (relError < 0 || relError > 1) + throw std::invalid_argument("Relative error tolerance must be a value " + "between 0 and 1"); + if (absError < 0) + throw std::invalid_argument("Absolute error tolerance must be a value " + "greater or equal to 0"); + if (relError > 0 && absError > 0) + Log::Warn << "Absolute and relative error tolerances will be sumed up" + << std::endl; +} + } // namespace kde } // namespace mlpack From 3dcec6314b6dc205affe567f42c5ae400e165b60 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 22 Jul 2018 20:59:37 +0200 Subject: [PATCH 055/150] Improve KDE api to fit #1021 --- src/mlpack/methods/kde/kde.hpp | 8 +++--- src/mlpack/methods/kde/kde_impl.hpp | 22 ++++++++--------- src/mlpack/tests/kde_test.cpp | 38 ++++++++++++++++++++--------- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index fe38f95fed9..6bb9ce0ebbb 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -122,14 +122,14 @@ class KDE * * @param referenceSet Set of reference data. */ - void Train(const MatType& referenceSet); + void Train(MatType referenceSet); /** * Trains the KDE model. Sets the reference tree to an already created tree. * * @param referenceTree New already created reference tree. */ - void Train(Tree& referenceTree); + void Train(Tree* referenceTree); /** * Estimate density of each point in the query set given the data of the @@ -160,7 +160,7 @@ class KDE * @param oldFromNewQueries Mappings of query points to the tree dataset. * @param estimations Object which will hold the density of each query point. */ - void Evaluate(Tree& queryTree, + void Evaluate(Tree* queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations); @@ -171,7 +171,7 @@ class KDE KernelType& Kernel() { return kernel; } //! Get the reference tree. - const Tree& ReferenceTree() const { return referenceTree; } + Tree* ReferenceTree() { return referenceTree; } //! Get relative error tolerance. double RelativeError() const { return relError; } diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 49d863c0171..557de6d078e 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -211,7 +211,7 @@ template class TreeType> void KDE:: -Train(const MatType& referenceSet) +Train(MatType referenceSet) { // Check if referenceSet is not an empty set. if (referenceSet.n_cols == 0) @@ -229,16 +229,16 @@ template class TreeType> void KDE:: -Train(Tree& referenceTree) +Train(Tree* referenceTree) { // Check if referenceTree dataset is not an empty set. - if (referenceTree.Dataset().n_cols == 0) + if (referenceTree->Dataset().n_cols == 0) throw std::invalid_argument("cannot train KDE model with an empty " "reference set"); if (this->ownsReferenceTree == true) delete this->referenceTree; this->ownsReferenceTree = false; - this->referenceTree = &referenceTree; + this->referenceTree = referenceTree; this->trained = true; } @@ -332,30 +332,30 @@ template class TreeType> void KDE:: -Evaluate(Tree& queryTree, +Evaluate(Tree* queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) { // Check querySet has at least 1 element to evaluate. - if (queryTree.Dataset().n_cols == 0) + if (queryTree->Dataset().n_cols == 0) { Log::Warn << "querySet is empty" << std::endl; return; } // Check whether dimensions match. - if (queryTree.Dataset().n_rows != referenceTree->Dataset().n_rows) + if (queryTree->Dataset().n_rows != referenceTree->Dataset().n_rows) throw std::invalid_argument("cannot train KDE model: querySet and " "referenceSet dimensions don't match"); // Get estimations vector ready. estimations.clear(); - estimations.resize(queryTree.Dataset().n_cols); + estimations.resize(queryTree->Dataset().n_cols); estimations.fill(arma::fill::zeros); // Evaluate typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), - queryTree.Dataset(), + queryTree->Dataset(), estimations, relError, absError, @@ -367,13 +367,13 @@ Evaluate(Tree& queryTree, // DualTreeTraverser Breadth-First typename Tree::template BreadthFirstDualTreeTraverser traverser(rules); - traverser.Traverse(queryTree, *referenceTree); + traverser.Traverse(*queryTree, *referenceTree); } else { // DualTreeTraverser Depth-First typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(queryTree, *referenceTree); + traverser.Traverse(*queryTree, *referenceTree); } estimations /= referenceTree->Dataset().n_cols; } diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 9b5ace20546..35dbf2693fa 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -113,8 +113,8 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) // Get dual-tree results. typedef KDTree Tree; std::vector oldFromNewQueries; - Tree queryTree(query, oldFromNewQueries, 2); - Tree referenceTree(reference, 2); + Tree* queryTree = new Tree(query, oldFromNewQueries, 2); + Tree* referenceTree = new Tree(reference, 2); KDE Tree; std::vector oldFromNewQueries; - Tree queryTree(query, oldFromNewQueries, 2); - Tree referenceTree(reference, 2); + Tree* queryTree = new Tree(query, oldFromNewQueries, 2); + Tree* referenceTree = new Tree(reference, 2); KDE Tree; std::vector oldFromNewQueries; - Tree queryTree(query, oldFromNewQueries, 2); - Tree referenceTree(reference, 2); + Tree* queryTree = new Tree(query, oldFromNewQueries, 2); + Tree* referenceTree = new Tree(reference, 2); KDE Tree; std::vector oldFromNewQueries; - Tree queryTree(query, oldFromNewQueries, 2); - Tree referenceTree(reference, 2); + Tree* queryTree = new Tree(query, oldFromNewQueries, 2); + Tree* referenceTree = new Tree(reference, 2); KDE Tree; - Tree referenceTree(reference, 2); + Tree* referenceTree = new Tree(reference, 2); BOOST_REQUIRE_THROW(kde.Train(referenceTree), std::invalid_argument); + + delete referenceTree; } /** @@ -394,9 +407,10 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) // When evaluating using a query tree typedef KDTree Tree; std::vector oldFromNewQueries; - Tree queryTree(query, oldFromNewQueries, 3); + Tree* queryTree = new Tree(query, oldFromNewQueries, 3); BOOST_REQUIRE_THROW(kde.Evaluate(queryTree, oldFromNewQueries, estimations), std::invalid_argument); + delete queryTree; } /** @@ -426,9 +440,11 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) // When evaluating using a query tree typedef KDTree Tree; std::vector oldFromNewQueries; - Tree queryTree(query, oldFromNewQueries, 3); + Tree* queryTree = new Tree(query, oldFromNewQueries, 3); BOOST_REQUIRE_NO_THROW( kde.Evaluate(queryTree, oldFromNewQueries, estimations)); + + delete queryTree; } /** From dbc368b2a98dc8fff2c577a95b116b81b2443f27 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 24 Jul 2018 21:26:17 +0200 Subject: [PATCH 056/150] Small simplification --- src/mlpack/methods/kde/kde_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 4683ea8b6d9..eb0809c73c1 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -66,7 +66,7 @@ static void mlpackMain() const double absError = CLI::GetParam("abs_error"); const bool breadthFirst = CLI::GetParam("breadth_first"); // Initialize results vector. - arma::vec estimations = std::move(arma::vec(query.n_cols, arma::fill::zeros)); + arma::vec estimations; // Handle KD-Tree, Gaussian, Euclidean KDE. if (treeStr == "kd-tree" && From 55338210027bc094b4245dd92d5a19fd85b6e3bd Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 26 Jul 2018 17:08:39 +0200 Subject: [PATCH 057/150] Normalize in KDE module --- src/mlpack/methods/kde/kde_impl.hpp | 9 +++++++++ src/mlpack/tests/kde_test.cpp | 10 ++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 557de6d078e..e1ef2bf7943 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -293,6 +293,11 @@ Evaluate(const MatType& querySet, arma::vec& estimations) traverser.Traverse(*queryTree, *referenceTree); } estimations /= referenceTree->Dataset().n_cols; + + // Normalize if required. + if (kernel::KernelTraits::IsNormalized) + estimations /= kernel->Normalizer(querySet.n_rows); + delete queryTree; // Ideas for the future... @@ -376,6 +381,10 @@ Evaluate(Tree* queryTree, traverser.Traverse(*queryTree, *referenceTree); } estimations /= referenceTree->Dataset().n_cols; + + // Normalize if required. + if (kernel::KernelTraits::IsNormalized) + estimations /= kernel->Normalizer(queryTree->Dataset().n_rows); } template::IsNormalized) + densities /= kernel.Normalizer(query.n_rows); } /** @@ -66,10 +68,10 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::inplace_trans(query); arma::vec estimations; // Manually calculated results. - arma::vec estimations_result = {0.08323668699564207296148765635734889656305, - 0.00167470061366603324010116082831700623501, - 0.07658867126520703394465527935608406551182, - 0.01028120384800740999553525512055784929544}; + arma::vec estimations_result = {0.02069926590929581, + 0.00041646387634996807, + 0.019046040026090477, + 0.002556725645852806}; KDE Date: Thu, 26 Jul 2018 21:22:18 +0200 Subject: [PATCH 058/150] Add KDEModel a KDE api abstraction --- src/mlpack/methods/kde/kde_model.hpp | 190 +++++++++++++++++++++ src/mlpack/methods/kde/kde_model_impl.hpp | 195 ++++++++++++++++++++++ 2 files changed, 385 insertions(+) create mode 100644 src/mlpack/methods/kde/kde_model.hpp create mode 100644 src/mlpack/methods/kde/kde_model_impl.hpp diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp new file mode 100644 index 00000000000..9052b942977 --- /dev/null +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -0,0 +1,190 @@ +/** + * @file kde_model.hpp + * @author Roberto Hueso + * + * Model for KDE. It abstracts different types of tree, kernels, etc. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_KDE_MODEL_HPP +#define MLPACK_METHODS_KDE_MODEL_HPP + +// Include trees +#include + +// Include kernels +#include +#include +#include + +// Remaining includes +#include +#include "kde.hpp" + +namespace mlpack { +namespace kde { + +//! Alias template. +template class TreeType> +using KDEType = KDE; + +class DualTreeVisitor : public boost::static_visitor +{ + private: + const arma::mat& querySet; + + arma::vec& estimations; + + public: + //! Alias template necessary for visual C++ compiler. + template class TreeType> + using KDETypeT = KDEType; + + template class TreeType> + void operator()(KDETypeT* kde) const; + + // TODO Implement specific cases where a leaf size can be selected. + + DualTreeVisitor(const arma::mat& querySet, + arma::vec& estimations); +}; + +class TrainVisitor : public boost::static_visitor +{ + private: + arma::mat&& referenceSet; + + public: + //! Alias template necessary for visual C++ compiler. + template class TreeType> + using KDETypeT = KDEType; + + template class TreeType> + void operator()(KDETypeT* kde) const; + + // TODO Implement specific cases where a leaf size can be selected. + + TrainVisitor(arma::mat&& referenceSet); +}; + +class DeleteVisitor : public boost::static_visitor +{ + public: + template + void operator()(KDEType* kde) const; +}; + +class KDEModel +{ + public: + enum TreeTypes + { + KD_TREE, + BALL_TREE + }; + + enum KernelTypes + { + GAUSSIAN_KERNEL, + EPANECHNIKOV_KERNEL + }; + + private: + //! Bandwidth of the kernel. + double bandwidth; + + //! Relative error tolerance. + double relError; + + //! Absolute error tolerance. + double absError; + + //! If true, a breadth-first approach is used when evaluating. + bool breadthFirst; + + KernelTypes kernelType; + + TreeTypes treeType; + + boost::variant*, + KDEType*, + KDEType*, + KDEType*> kdeModel; + + public: + KDEModel(const double bandwidth = 1.0, + const double relError = 1e-6, + const double absError = 0, + const bool breadthFirst = false, + const KernelTypes kernelType = KernelTypes::GAUSSIAN_KERNEL, + const TreeTypes treeType = TreeTypes::KD_TREE); + + KDEModel(const KDEModel& other); + + KDEModel(KDEModel&& other); + + KDEModel& operator=(KDEModel other); + + ~KDEModel(); + + template + void serialize(Archive& ar, const unsigned int /* version */); + + double Bandwidth() const { return bandwidth; } + + double& Bandwidth() { return bandwidth; } + + double RelativeError() const { return relError; } + + double& RelativeError() { return relError; } + + double AbsoluteError() const { return absError; } + + double& AbsoluteError() { return absError; } + + //! Get whether breadth-first traversal is being used. + bool BreadthFirst() const { return breadthFirst; } + + //! Modify whether breadth-first traversal is being used. + bool& BreadthFirst() { return breadthFirst; } + + TreeTypes TreeType() const { return treeType; } + + TreeTypes& TreeType() { return treeType; } + + KernelTypes KernelType() const { return kernelType; } + + KernelTypes& KernelType() { return kernelType; } + + void BuildModel(arma::mat&& referenceSet); + + void Evaluate(arma::mat&& querySet, arma::vec& estimations); + + private: + void CleanMemory(); +}; + +} // namespace kde +} // namespace mlpack + +#include "kde_model_impl.hpp" + +#endif diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp new file mode 100644 index 00000000000..4cd74cc4946 --- /dev/null +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -0,0 +1,195 @@ +/** + * @file kde_model_impl.hpp + * @author Roberto Hueso + * + * Implementation of KDE Model. + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#ifndef MLPACK_METHODS_KDE_MODEL_IMPL_HPP +#define MLPACK_METHODS_KDE_MODEL_IMPL_HPP + +// In case it hasn't been included yet. +#include "kde_model.hpp" + +#include + +namespace mlpack { +namespace kde { + +//! Initialize the KDEModel with the given parameters. +inline KDEModel::KDEModel(const double bandwidth, + const double relError, + const double absError, + const bool breadthFirst, + const KernelTypes kernelType, + const TreeTypes treeType) : + bandwidth(bandwidth), + relError(relError), + absError(absError), + breadthFirst(breadthFirst), + kernelType(kernelType), + treeType(treeType) +{ + // Nothing to do +} + +// Copy constructor. +inline KDEModel::KDEModel(const KDEModel& other) : + bandwidth(other.bandwidth), + relError(other.relError), + absError(other.absError), + breadthFirst(other.breadthFirst), + kernelType(other.kernelType), + treeType(other.treeType) +{ + // Nothing to do +} + +// Move constructor. +inline KDEModel::KDEModel(KDEModel&& other) : + bandwidth(other.bandwidth), + relError(other.relError), + absError(other.absError), + breadthFirst(other.breadthFirst), + kernelType(other.kernelType), + treeType(other.treeType), + kdeModel(std::move(other.kdeModel)) +{ + // Reset other model + other.bandwidth = 1.0; + other.relError = 1e-6; + other.absError = 0; + other.breadthFirst = false; + other.kernelType = KernelTypes::GAUSSIAN_KERNEL; + other.treeType = TreeTypes::KD_TREE; + other.kdeModel = decltype(other.kdeModel)(); +} + +inline KDEModel& KDEModel::operator=(KDEModel other) +{ + boost::apply_visitor(DeleteVisitor(), kdeModel); + bandwidth = other.bandwidth; + relError = other.relError; + absError = other.absError; + breadthFirst = other.breadthFirst; + kernelType = other.kernelType; + treeType = other.treeType; + kdeModel = std::move(other.kdeModel); + return *this; +} + +// Clean memory +inline KDEModel::~KDEModel() +{ + boost::apply_visitor(DeleteVisitor(), kdeModel); +} + +inline void KDEModel::BuildModel(arma::mat&& referenceSet) +{ + // Clean memory, if necessary. + boost::apply_visitor(DeleteVisitor(), kdeModel); + + if (kernelType == GAUSSIAN_KERNEL && treeType == KD_TREE) + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + + else if (kernelType == GAUSSIAN_KERNEL && treeType == BALL_TREE) + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + + else if (kernelType == EPANECHNIKOV_KERNEL && treeType == KD_TREE) + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + + else if (kernelType == EPANECHNIKOV_KERNEL && treeType == BALL_TREE) + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + + TrainVisitor train(std::move(referenceSet)); + boost::apply_visitor(train, kdeModel); +} + +// Perform evaluation +inline void KDEModel::Evaluate(arma::mat&& querySet, arma::vec& estimations) +{ + DualTreeVisitor eval(querySet, estimations); + boost::apply_visitor(eval, kdeModel); +} + +// Clean memory +inline void KDEModel::CleanMemory() +{ + boost::apply_visitor(DeleteVisitor(), kdeModel); +} + +// Parameters for KDE evaluation +DualTreeVisitor::DualTreeVisitor(const arma::mat& querySet, + arma::vec& estimations): + querySet(querySet), + estimations(estimations) +{} + +// Default KDE evaluation +template class TreeType> +void DualTreeVisitor::operator()(KDETypeT* kde) const +{ + if (kde) + kde->Evaluate(querySet, estimations); + else + throw std::runtime_error("no KDE model initialized"); +} + +// Parameters for Train. +TrainVisitor::TrainVisitor(arma::mat&& referenceSet) : + referenceSet(std::move(referenceSet)) +{} + +// Default Train +template class TreeType> +void TrainVisitor::operator()(KDETypeT* kde) const +{ + if (kde) + kde->Train(std::move(referenceSet)); + else + throw std::runtime_error("no KDE model initialized"); +} + +// Delete model +template +void DeleteVisitor::operator()(KDEType* kde) const +{ + if (kde) + delete kde; +} + +// Serialize the model. +template +void KDEModel::serialize(Archive& ar, const unsigned int /* version */) +{ + ar & BOOST_SERIALIZATION_NVP(bandwidth); + ar & BOOST_SERIALIZATION_NVP(relError); + ar & BOOST_SERIALIZATION_NVP(absError); + ar & BOOST_SERIALIZATION_NVP(breadthFirst); + ar & BOOST_SERIALIZATION_NVP(kernelType); + ar & BOOST_SERIALIZATION_NVP(treeType); + + if (Archive::is_loading::value) + boost::apply_visitor(DeleteVisitor(), kdeModel); + + ar & BOOST_SERIALIZATION_NVP(kdeModel); +} + +} // namespace kde +} // namespace mlpack + +#endif From 51ad93cf3e308e8318bc5ddfb139632dfc7680db Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 26 Jul 2018 21:23:12 +0200 Subject: [PATCH 059/150] Add KDEModel to CMake --- src/mlpack/methods/kde/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mlpack/methods/kde/CMakeLists.txt b/src/mlpack/methods/kde/CMakeLists.txt index 5ca3039f232..fa5977534b9 100644 --- a/src/mlpack/methods/kde/CMakeLists.txt +++ b/src/mlpack/methods/kde/CMakeLists.txt @@ -6,6 +6,8 @@ set(SOURCES kde_rules.hpp kde_rules_impl.hpp kde_stat.hpp + kde_model.hpp + kde_model_impl.hpp ) # Add directory name to sources. From b86ec2a65645e2a88abca711510761ece72fb005 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 26 Jul 2018 21:24:58 +0200 Subject: [PATCH 060/150] Rewrite KDE main to make use of KDEModel --- src/mlpack/methods/kde/kde_main.cpp | 158 +++++++++++++++------------- 1 file changed, 84 insertions(+), 74 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index eb0809c73c1..0d2d17d10a5 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -16,6 +16,7 @@ #include #include "kde.hpp" +#include "kde_model.hpp" using namespace mlpack; using namespace mlpack::kde; @@ -23,21 +24,49 @@ using namespace mlpack::util; using namespace std; // Define parameters for the executable. -PROGRAM_INFO("Kernel Density Estimation", "This program performs a Kernel " - "Density Estimation for a given reference dataset."); +PROGRAM_INFO("Kernel Density Estimation", + "This program performs a Kernel Density Estimation. KDE is a " + "non-parametric way of estimating probability density function. " + "For each query point the program will estimate its probability density " + "by applying a kernel function to each reference point. Computational " + " complexity is O(n^2) but it is optimized by making use of dual-trees. " + "\n\n" + "For example, the following will run KDE using the points in " + "reference_set.csv and query_set.csv. It will apply an Epanechnikov kernel " + "with a 0.2 bandwidth to each reference point and use a KD-Tree for the " + "dual-tree optimization. The result will be stored in a densities.csv file " + "with a maximum error of 5%" + "$ kde --reference reference_set.csv --query query_set.csv --bandwidth 0.2 " + "--kernel epanechnikov --tree kd-tree --rel_error 0.05 --output " + "densities.csv" + "\n\n" + "Dual-tree optimization allows to avoid lots of barely relevant " + "calculations (as kernel function values decrease with distance) if you " + "can afford a little error (you can define how much is the maximum you are " + "willing to afford) over the final result. This program runs using an " + "Euclidean metric. If no output file is specified then it will output the " + "result to standard output."); // Required options. -PARAM_MATRIX_IN_REQ("reference", "Input dataset to KDE on.", "r"); +PARAM_MATRIX_IN("reference", "Input dataset to KDE on.", "r"); PARAM_MATRIX_IN_REQ("query", "Query dataset to KDE on.", "q"); -PARAM_DOUBLE_IN_REQ("bandwidth", "Bandwidth of the kernel", "b"); +PARAM_DOUBLE_IN("bandwidth", "Bandwidth of the kernel", "b", 1.0); + +// Load or save models. +PARAM_MODEL_IN(KDEModel, + "input_model", + "File containing pre-trained KDE model.", + "m"); +PARAM_MODEL_OUT(KDEModel, + "output_model", + "If specified, the KDE model will be saved to the given file.", + "M"); // Configuration options PARAM_STRING_IN("kernel", "Kernel to use for the estimation" "('gaussian', 'epanechnikov').", "k", "gaussian"); PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree').", "t", "kd-tree"); -PARAM_STRING_IN("metric", "Metric to use for the estimation" - "('euclidean').", "m", "euclidean"); PARAM_DOUBLE_IN("rel_error", "Relative error tolerance for the result", "e", @@ -48,6 +77,7 @@ PARAM_DOUBLE_IN("abs_error", 0.0); PARAM_FLAG("breadth_first", "Use breadth-first traversal instead of depth" "first.", "w"); +// Maybe in the future it could be interesting to implement different metrics. // Output options. PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", @@ -55,93 +85,66 @@ PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", static void mlpackMain() { + const size_t output_precision = 40; // Get all parameters. arma::mat reference = std::move(CLI::GetParam("reference")); arma::mat query = std::move(CLI::GetParam("query")); const double bandwidth = CLI::GetParam("bandwidth"); const std::string kernelStr = CLI::GetParam("kernel"); const std::string treeStr = CLI::GetParam("tree"); - const std::string metricStr = CLI::GetParam("metric"); const double relError = CLI::GetParam("rel_error"); const double absError = CLI::GetParam("abs_error"); const bool breadthFirst = CLI::GetParam("breadth_first"); // Initialize results vector. arma::vec estimations; - // Handle KD-Tree, Gaussian, Euclidean KDE. - if (treeStr == "kd-tree" && - kernelStr == "gaussian" && - metricStr == "euclidean") - { - kernel::GaussianKernel kernel(bandwidth); - metric::EuclideanDistance metric; - kde::KDE - model(metric, kernel, relError, absError, breadthFirst); - model.Train(reference); - model.Evaluate(query, estimations); - estimations = estimations / (kernel.Normalizer(query.n_rows)); - } + // You can only specify reference data or a pre-trained model. + RequireOnlyOnePassed({ "reference", "input_model" }, true); + ReportIgnoredParam({{ "input_model", true }}, "tree"); + ReportIgnoredParam({{ "input_model", true }}, "kernel"); + ReportIgnoredParam({{ "input_model", true }}, "metric"); + ReportIgnoredParam({{ "input_model", true }}, "rel_error"); + ReportIgnoredParam({{ "input_model", true }}, "abs_error"); + ReportIgnoredParam({{ "input_model", true }}, "breadth_first"); - // Handle Ball-Tree, Gaussian, Euclidean KDE. - else if (treeStr == "ball-tree" && - kernelStr == "gaussian" && - metricStr == "euclidean") - { - kernel::GaussianKernel kernel(bandwidth); - metric::EuclideanDistance metric; - kde::KDE - model(metric, kernel, relError, absError, breadthFirst); - model.Train(reference); - model.Evaluate(query, estimations); - estimations = estimations / (kernel.Normalizer(query.n_rows)); - } + KDEModel* kde = new KDEModel(); - // Handle KD-Tree, Epanechnikov, Euclidean KDE. - else if (treeStr == "kd-tree" && - kernelStr == "epanechnikov" && - metricStr == "euclidean") + if (CLI::HasParam("reference")) { - kernel::EpanechnikovKernel kernel(bandwidth); - metric::EuclideanDistance metric; - kde::KDE - model(metric, kernel, relError, absError, breadthFirst); - model.Train(reference); - model.Evaluate(query, estimations); - estimations = estimations / (kernel.Normalizer(query.n_rows)); - } + // Set parameters + kde->Bandwidth() = bandwidth; + kde->RelativeError() = relError; + kde->AbsoluteError() = absError; + kde->BreadthFirst() = breadthFirst; - // Handle Ball-Tree, Epanechnikov, Euclidean KDE. - else if (treeStr == "ball-tree" && - kernelStr == "epanechnikov" && - metricStr == "euclidean") - { - kernel::EpanechnikovKernel kernel(bandwidth); - metric::EuclideanDistance metric; - kde::KDE - model(metric, kernel, relError, absError, breadthFirst); - model.Train(reference); - model.Evaluate(query, estimations); - estimations = estimations / (kernel.Normalizer(query.n_rows)); - } + // Set KernelType + if (kernelStr == "gaussian") + kde->KernelType() = KDEModel::GAUSSIAN_KERNEL; + else if (kernelStr == "epanechnikov") + kde->KernelType() = KDEModel::EPANECHNIKOV_KERNEL; + else + Log::Fatal << "Input kernel is not valid or not supported yet." + << std::endl; + + // Set TreeType + if (treeStr == "kd-tree") + kde->TreeType() = KDEModel::KD_TREE; + else if (treeStr == "ball-tree") + kde->TreeType() = KDEModel::BALL_TREE; + else + Log::Fatal << "Input tree is not valid or not supported yet." + << std::endl; - // Input parameters are wrong or are not supported yet. + // Build model + kde->BuildModel(std::move(reference)); + } else { - Log::Fatal << "Input parameters are not valid or are not supported yet." - << std::endl; + kde = CLI::GetParam("input_model"); } + + kde->Evaluate(std::move(query), estimations); + // Output estimations to file if defined. if (CLI::HasParam("output")) { @@ -149,7 +152,14 @@ static void mlpackMain() } else { - std::cout.precision(40); + std::cout.precision(output_precision); estimations.raw_print(std::cout); } + + // Save output model. + if (CLI::HasParam("output_model")) + CLI::GetParam("output_model") = kde; + + // Delete model. + delete kde; } From 60029107d0bc9212c20eabeeb836fbdccf295133 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 27 Jul 2018 14:02:11 +0200 Subject: [PATCH 061/150] Add load/save KDE models --- src/mlpack/methods/kde/kde_main.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 0d2d17d10a5..9dc0a526d2e 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -36,6 +36,7 @@ PROGRAM_INFO("Kernel Density Estimation", "with a 0.2 bandwidth to each reference point and use a KD-Tree for the " "dual-tree optimization. The result will be stored in a densities.csv file " "with a maximum error of 5%" + "\n\n" "$ kde --reference reference_set.csv --query query_set.csv --bandwidth 0.2 " "--kernel epanechnikov --tree kd-tree --rel_error 0.05 --output " "densities.csv" @@ -86,8 +87,7 @@ PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", static void mlpackMain() { const size_t output_precision = 40; - // Get all parameters. - arma::mat reference = std::move(CLI::GetParam("reference")); + // Get some parameters. arma::mat query = std::move(CLI::GetParam("query")); const double bandwidth = CLI::GetParam("bandwidth"); const std::string kernelStr = CLI::GetParam("kernel"); @@ -102,15 +102,17 @@ static void mlpackMain() RequireOnlyOnePassed({ "reference", "input_model" }, true); ReportIgnoredParam({{ "input_model", true }}, "tree"); ReportIgnoredParam({{ "input_model", true }}, "kernel"); - ReportIgnoredParam({{ "input_model", true }}, "metric"); ReportIgnoredParam({{ "input_model", true }}, "rel_error"); ReportIgnoredParam({{ "input_model", true }}, "abs_error"); ReportIgnoredParam({{ "input_model", true }}, "breadth_first"); - KDEModel* kde = new KDEModel(); + KDEModel* kde; if (CLI::HasParam("reference")) { + arma::mat reference = std::move(CLI::GetParam("reference")); + + kde = new KDEModel(); // Set parameters kde->Bandwidth() = bandwidth; kde->RelativeError() = relError; @@ -140,6 +142,7 @@ static void mlpackMain() } else { + // Load model kde = CLI::GetParam("input_model"); } @@ -156,10 +159,7 @@ static void mlpackMain() estimations.raw_print(std::cout); } - // Save output model. + // Save model. if (CLI::HasParam("output_model")) CLI::GetParam("output_model") = kde; - - // Delete model. - delete kde; } From 7ad80222600728617b5e5c01a4cb2c68fcd2697c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 27 Jul 2018 20:17:14 +0200 Subject: [PATCH 062/150] Improve KDERules style --- src/mlpack/methods/kde/kde_rules.hpp | 6 +++++- src/mlpack/methods/kde/kde_rules_impl.hpp | 16 +++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index c7d924a56a4..4a6a8da2f8f 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -64,10 +64,14 @@ class KDERules size_t Scores() const { return scores; } private: - //! Evaluate kernel value of 2 points. + //! Evaluate kernel value of 2 points given their indexes. double EvaluateKernel(const size_t queryIndex, const size_t referenceIndex) const; + //! Evaluate kernel value of 2 points. + double EvaluateKernel(const arma::vec& query, + const arma::vec& reference) const; + //! The reference set. const arma::mat& referenceSet; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 6279cd9472d..1e74a23e636 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -86,7 +86,7 @@ double KDERules::Rescore( //! Double-tree scoring function. template -double KDERules:: +inline double KDERules:: Score(TreeType& queryNode, TreeType& referenceNode) { const double maxKernel = @@ -162,13 +162,19 @@ Rescore(TreeType& /*queryNode*/, } template -double KDERules:: +inline force_inline double KDERules:: EvaluateKernel(const size_t queryIndex, const size_t referenceIndex) const { - return kernel.Evaluate(metric.Evaluate(querySet.unsafe_col(queryIndex), - referenceSet.unsafe_col(referenceIndex) - )); + return EvaluateKernel(querySet.unsafe_col(queryIndex), + referenceSet.unsafe_col(referenceIndex)); +} + +template +inline force_inline double KDERules:: +EvaluateKernel(const arma::vec& query, const arma::vec& reference) const +{ + return kernel.Evaluate(metric.Evaluate(query, reference)); } } // namespace kde From fb0972e229a6840ec5463c5edfdc4d9a2e3ab232 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 27 Jul 2018 20:17:45 +0200 Subject: [PATCH 063/150] Store centroids in KDEStat --- src/mlpack/methods/kde/kde_stat.hpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/mlpack/methods/kde/kde_stat.hpp b/src/mlpack/methods/kde/kde_stat.hpp index 91dbed243ad..ed9825aff86 100644 --- a/src/mlpack/methods/kde/kde_stat.hpp +++ b/src/mlpack/methods/kde/kde_stat.hpp @@ -24,30 +24,28 @@ class KDEStat { public: //! Initialize the statistic. - KDEStat() : - lastKernelValue(0.0) { } + KDEStat() { } //! Initialization for a fully initialized node. template - KDEStat(TreeType& /* node */) : - lastKernelValue(0.0) { } + KDEStat(TreeType& /* node */) { } - //! Get the last kernel value calculation. - double LastKernelValue() const { return lastKernelValue; } + //! Get the centroid calculation. + const arma::vec& Centroid() const { return centroid; } - //! Modify the last kernel value calculation. - double& LastKernelValue() { return lastKernelValue; } + //! Modify the centroid calculation. + arma::vec& Centroid() { return centroid; } //! Serialize the statistic to/from an archive. template void serialize(Archive& ar, const unsigned int /* version */) { - ar & BOOST_SERIALIZATION_NVP(lastKernelValue); + ar & BOOST_SERIALIZATION_NVP(centroid); } private: - //! Last kernel value evaluation. - double lastKernelValue; + //! Node centroid. + arma::vec centroid; }; } // namespace kde From d39f121d57b95a9a4a17cb8894139bf4b1ad3eed Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 27 Jul 2018 20:18:43 +0200 Subject: [PATCH 064/150] Fix HasSelfChildren KDE and improve style --- src/mlpack/methods/kde/kde_rules_impl.hpp | 42 ++++++++++++++--------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 1e74a23e636..50f3564b067 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -98,36 +98,46 @@ Score(TreeType& queryNode, TreeType& referenceNode) if (bound <= (absError + relError * minKernel) / referenceSet.n_cols) { + // Auxiliary variables. double kernelValue; + arma::vec& referenceCenter = referenceNode.Stat().Centroid(); + arma::vec& queryCenter = queryNode.Stat().Centroid(); + // If calculating a center is not required. if (tree::TreeTraits::FirstPointIsCentroid) { - // If a child center is the same as a parent center. - if (tree::TreeTraits::HasSelfChildren) + kernelValue = EvaluateKernel(queryNode.Point(0), referenceNode.Point(0)); + } + // If a child center is the same as its parent center. + else if (tree::TreeTraits::HasSelfChildren) + { + // Reference node. + if (referenceNode.Parent() != NULL && + referenceNode.Point(0) == referenceNode.Parent()->Point(0)) + referenceCenter = referenceNode.Parent()->Stat().Centroid(); + else { - if ((referenceNode.Parent() != NULL) && - (referenceNode.Point(0) == referenceNode.Parent()->Point(0))) - kernelValue = referenceNode.Parent()->Stat().LastKernelValue(); - else - kernelValue = EvaluateKernel(queryNode.Point(0), - referenceNode.Point(0)); + referenceNode.Center(referenceCenter); } + // Query node. + if (queryNode.Parent() != NULL && + queryNode.Point(0) == queryNode.Parent()->Point(0)) + queryCenter = queryNode.Parent()->Stat().Centroid(); else - kernelValue = EvaluateKernel(queryNode.Point(0), - referenceNode.Point(0)); + { + queryNode.Center(queryCenter); + } + // Compute kernel value. + kernelValue = EvaluateKernel(queryCenter, referenceCenter); } + // Regular case. else { - arma::vec queryCenter, referenceCenter; referenceNode.Center(referenceCenter); queryNode.Center(queryCenter); - kernelValue = kernel.Evaluate(metric.Evaluate(referenceCenter, - queryCenter)); + kernelValue = EvaluateKernel(queryCenter, referenceCenter); } - // Update lastKernelValue - referenceNode.Stat().LastKernelValue() = kernelValue; - for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { if (tree::TreeTraits::RearrangesDataset) From 071767d3542417405647a68e8d6c5fdd16f598ec Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 29 Jul 2018 02:41:39 +0200 Subject: [PATCH 065/150] Add openmp KDE optimization Just a simple for loop --- src/mlpack/methods/kde/kde_rules_impl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 50f3564b067..ca4015c4d32 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -138,6 +138,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) kernelValue = EvaluateKernel(queryCenter, referenceCenter); } + #pragma omp for for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { if (tree::TreeTraits::RearrangesDataset) From 8d5729dace7701248169d3767696b8c06c37354e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 17 Sep 2018 01:50:43 +0200 Subject: [PATCH 066/150] Improve KDE SerializationTest Use estimations obtained prior to serialization --- src/mlpack/tests/kde_test.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 5d7db576e89..1571e983d71 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -466,6 +466,11 @@ BOOST_AUTO_TEST_CASE(SerializationTest) kde(0.25, relError, absError, bf); kde.Train(reference); + // Get estimations to compare. + arma::mat query = arma::randu(4, 100);; + arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + kde.Evaluate(query, estimations); + // Initialize serialized objects. KDE Date: Tue, 18 Sep 2018 00:19:52 +0200 Subject: [PATCH 067/150] Reuse KDE evaluate --- src/mlpack/methods/kde/kde_impl.hpp | 75 +---------------------------- 1 file changed, 1 insertion(+), 74 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index e1ef2bf7943..a68ab1310d9 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -251,83 +251,10 @@ template:: Evaluate(const MatType& querySet, arma::vec& estimations) { - // Check querySet has at least 1 element to evaluate. - if (querySet.n_cols == 0) - { - Log::Warn << "querySet is empty" << std::endl; - return; - } - // Check whether dimensions match. - if (querySet.n_rows != referenceTree->Dataset().n_rows) - throw std::invalid_argument("cannot train KDE model: querySet and " - "referenceSet dimensions don't match"); - - // Get estimations vector ready. - estimations.clear(); - estimations.resize(querySet.n_cols); - estimations.fill(arma::fill::zeros); - - // Evaluate std::vector oldFromNewQueries; Tree* queryTree = BuildTree(querySet, oldFromNewQueries); - typedef KDERules RuleType; - RuleType rules = RuleType(referenceTree->Dataset(), - queryTree->Dataset(), - estimations, - relError, - absError, - oldFromNewQueries, - *metric, - *kernel); - if (breadthFirst) - { - // DualTreeTraverser Breadth-First - typename Tree::template BreadthFirstDualTreeTraverser - traverser(rules); - traverser.Traverse(*queryTree, *referenceTree); - } - else - { - // DualTreeTraverser Depth-First - typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(*queryTree, *referenceTree); - } - estimations /= referenceTree->Dataset().n_cols; - - // Normalize if required. - if (kernel::KernelTraits::IsNormalized) - estimations /= kernel->Normalizer(querySet.n_rows); - + this->Evaluate(queryTree, oldFromNewQueries, estimations); delete queryTree; - - // Ideas for the future... - // SingleTreeTraverser - /* - typename Tree::template SingleTreeTraverser traverser(rules); - for(size_t i = 0; i < query.n_cols; ++i) - traverser.Traverse(i, *referenceTree); - */ - // Brute force - /* - arma::vec result = arma::vec(query.n_cols); - result = arma::zeros(query.n_cols); - - for(size_t i = 0; i < query.n_cols; ++i) - { - arma::vec density = arma::zeros(referenceSet.n_cols); - - for(size_t j = 0; j < this->referenceSet.n_cols; ++j) - { - density(j) = this->kernel.Evaluate(query.col(i), - this->referenceSet.col(j)); - } - result(i) = arma::trunc_log(arma::sum(density)) - - std::log(referenceSet.n_cols); - //this->kernel.Normalizer(query.n_rows); - //result(i) = (1/referenceSet.n_cols)*(accumulated); - } - return result; - */ } template Date: Tue, 18 Sep 2018 00:27:15 +0200 Subject: [PATCH 068/150] Fix style issue --- src/mlpack/methods/kde/kde_model_impl.hpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 4cd74cc4946..e4880e5c101 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -94,20 +94,25 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) boost::apply_visitor(DeleteVisitor(), kdeModel); if (kernelType == GAUSSIAN_KERNEL && treeType == KD_TREE) + { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); - + (bandwidth, relError, absError, breadthFirst); + } else if (kernelType == GAUSSIAN_KERNEL && treeType == BALL_TREE) + { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); - + (bandwidth, relError, absError, breadthFirst); + } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == KD_TREE) + { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); - + (bandwidth, relError, absError, breadthFirst); + } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == BALL_TREE) + { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError, breadthFirst); + } TrainVisitor train(std::move(referenceSet)); boost::apply_visitor(train, kdeModel); From 3635fb35b324537c43f8b254e86b24aa2181aaeb Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 18 Sep 2018 00:48:39 +0200 Subject: [PATCH 069/150] Delete unnecessary warning KDE docs are already clear about this --- src/mlpack/methods/kde/kde_impl.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index a68ab1310d9..469cde42a00 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -392,9 +392,6 @@ CheckErrorValues(const double relError, const double absError) const if (absError < 0) throw std::invalid_argument("Absolute error tolerance must be a value " "greater or equal to 0"); - if (relError > 0 && absError > 0) - Log::Warn << "Absolute and relative error tolerances will be sumed up" - << std::endl; } } // namespace kde From 4343d097b329ae9393299847b85033e6faf4dcb1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 18 Sep 2018 01:21:22 +0200 Subject: [PATCH 070/150] Avoid copy reference matrix in KDE training --- src/mlpack/methods/kde/kde_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 469cde42a00..c2c956de0e5 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -218,7 +218,7 @@ Train(MatType referenceSet) throw std::invalid_argument("cannot train KDE model with an empty " "reference set"); this->ownsReferenceTree = true; - this->referenceTree = new Tree(referenceSet); + this->referenceTree = new Tree(std::move(referenceSet)); this->trained = true; } From 15e0127255c2110ef659f80f838bd198c8f499f7 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 18 Sep 2018 21:34:32 +0200 Subject: [PATCH 071/150] Improve KDE api to fit #1021 Avoid using 2 overloads in KDE evaluate --- src/mlpack/methods/kde/kde.hpp | 2 +- src/mlpack/methods/kde/kde_impl.hpp | 4 ++-- src/mlpack/methods/kde/kde_model.hpp | 3 +-- src/mlpack/methods/kde/kde_model_impl.hpp | 9 ++++----- src/mlpack/tests/kde_test.cpp | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 6bb9ce0ebbb..957fbed8a68 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -144,7 +144,7 @@ class KDE * @param querySet Set of query points to get the density of. * @param estimations Object which will hold the density of each query point. */ - void Evaluate(const MatType& querySet, arma::vec& estimations); + void Evaluate(MatType querySet, arma::vec& estimations); /** * Estimate density of each point in the query set given the data of an diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index c2c956de0e5..36544e3641c 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -249,10 +249,10 @@ template class TreeType> void KDE:: -Evaluate(const MatType& querySet, arma::vec& estimations) +Evaluate(MatType querySet, arma::vec& estimations) { std::vector oldFromNewQueries; - Tree* queryTree = BuildTree(querySet, oldFromNewQueries); + Tree* queryTree = BuildTree(std::move(querySet), oldFromNewQueries); this->Evaluate(queryTree, oldFromNewQueries, estimations); delete queryTree; } diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 9052b942977..d2c8d8f1763 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -57,8 +57,7 @@ class DualTreeVisitor : public boost::static_visitor // TODO Implement specific cases where a leaf size can be selected. - DualTreeVisitor(const arma::mat& querySet, - arma::vec& estimations); + DualTreeVisitor(arma::mat&& querySet, arma::vec& estimations); }; class TrainVisitor : public boost::static_visitor diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index e4880e5c101..7c511eb999a 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -121,7 +121,7 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) // Perform evaluation inline void KDEModel::Evaluate(arma::mat&& querySet, arma::vec& estimations) { - DualTreeVisitor eval(querySet, estimations); + DualTreeVisitor eval(std::move(querySet), estimations); boost::apply_visitor(eval, kdeModel); } @@ -132,9 +132,8 @@ inline void KDEModel::CleanMemory() } // Parameters for KDE evaluation -DualTreeVisitor::DualTreeVisitor(const arma::mat& querySet, - arma::vec& estimations): - querySet(querySet), +DualTreeVisitor::DualTreeVisitor(arma::mat&& querySet, arma::vec& estimations): + querySet(std::move(querySet)), estimations(estimations) {} @@ -146,7 +145,7 @@ template* kde) const { if (kde) - kde->Evaluate(querySet, estimations); + kde->Evaluate(std::move(querySet), estimations); else throw std::runtime_error("no KDE model initialized"); } diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 1571e983d71..f35081f478b 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -157,7 +157,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) tree::KDTree> kde(metric, kernel, relError, 0.0, false); kde.Train(reference); - kde.Evaluate(query, treeEstimations); + kde.Evaluate(std::move(query), treeEstimations); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) From e97d1bf8c313736948425fa56b3d8de24cc4b4ec Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 19 Sep 2018 01:42:45 +0200 Subject: [PATCH 072/150] Fix memory leak in KDE main Add requirements for input values --- src/mlpack/methods/kde/kde_main.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 9dc0a526d2e..fd66a4da70e 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -106,6 +106,16 @@ static void mlpackMain() ReportIgnoredParam({{ "input_model", true }}, "abs_error"); ReportIgnoredParam({{ "input_model", true }}, "breadth_first"); + // Requirements for parameter values. + RequireParamInSet("kernel", { "gaussian", "epanechnikov" }, true, + "unknown kernel type"); + RequireParamInSet("tree", { "kd-tree", "ball-tree" }, true, + "unknown tree type"); + RequireParamValue("rel_error", [](double x){return x >= 0 && x <= 1;}, + true, "relative error must be between 0 and 1"); + RequireParamValue("abs_error", [](double x){return x >= 0;}, + true, "absolute error must be equal or greater than 0"); + KDEModel* kde; if (CLI::HasParam("reference")) @@ -124,18 +134,12 @@ static void mlpackMain() kde->KernelType() = KDEModel::GAUSSIAN_KERNEL; else if (kernelStr == "epanechnikov") kde->KernelType() = KDEModel::EPANECHNIKOV_KERNEL; - else - Log::Fatal << "Input kernel is not valid or not supported yet." - << std::endl; // Set TreeType if (treeStr == "kd-tree") kde->TreeType() = KDEModel::KD_TREE; else if (treeStr == "ball-tree") kde->TreeType() = KDEModel::BALL_TREE; - else - Log::Fatal << "Input tree is not valid or not supported yet." - << std::endl; // Build model kde->BuildModel(std::move(reference)); From aa2e85bff66ab8e80fabf8347bf79a360b039b03 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 20 Sep 2018 01:33:28 +0200 Subject: [PATCH 073/150] Improve KDE model docs --- src/mlpack/methods/kde/kde_model.hpp | 73 ++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index d2c8d8f1763..210e13331ce 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -34,11 +34,16 @@ template class TreeType> using KDEType = KDE; +/** + * DualTreeVisitor computes a Kernel Density Estimation on the given KDEType. + */ class DualTreeVisitor : public boost::static_visitor { private: + //! The query set for the KDE. const arma::mat& querySet; + //! Vector to store the KDE results. arma::vec& estimations; public: @@ -49,6 +54,7 @@ class DualTreeVisitor : public boost::static_visitor typename TreeMatType> class TreeType> using KDETypeT = KDEType; + //! Default DualTreeVisitor on some KDEType. template // TODO Implement specific cases where a leaf size can be selected. + //! DualTreeVisitor constructor. Takes ownership of the given querySet. DualTreeVisitor(arma::mat&& querySet, arma::vec& estimations); }; +/** + * TrainVisitor trains a given KDEType using a reference set. + */ class TrainVisitor : public boost::static_visitor { private: + //! The reference set used for training. arma::mat&& referenceSet; public: @@ -73,6 +84,7 @@ class TrainVisitor : public boost::static_visitor typename TreeMatType> class TreeType> using KDETypeT = KDEType; + //! Default TrainVisitor on some KDEType. template // TODO Implement specific cases where a leaf size can be selected. + //! TrainVisitor constructor. Takes ownership of the given referenceSet. TrainVisitor(arma::mat&& referenceSet); }; class DeleteVisitor : public boost::static_visitor { public: + //! Delete KDEType instance. template void operator()(KDEType* kde) const; }; @@ -123,12 +137,31 @@ class KDEModel TreeTypes treeType; + /** + * kdeModel holds an instance of each possible combination of KernelType and + * TreeType. It is initialized using BuildModel. + */ boost::variant*, KDEType*, KDEType*, KDEType*> kdeModel; public: + /** + * Initialize KDEModel. + * + * @param bandwidth Bandwidth to use for the kernel. + * @param relError Maximum relative error tolerance for each point in the + * model. For example, 0.05 means that each value must be + * within 5% of the true KDE value. + * @param absError Maximum absolute error tolerance for each point in the + * model. For example, 0.1 means that for each point the + * value can have a maximum error of 0.1 units. + * @param breadthFirst Whether the tree should be traversed using a + * breadth-first approach. + * @param kernelType Type of kernel to use. + * @param treeType Type of tree to use. + */ KDEModel(const double bandwidth = 1.0, const double relError = 1e-6, const double absError = 0, @@ -136,27 +169,44 @@ class KDEModel const KernelTypes kernelType = KernelTypes::GAUSSIAN_KERNEL, const TreeTypes treeType = TreeTypes::KD_TREE); + //! Copy constructor of the given model. KDEModel(const KDEModel& other); + //! Move constructor of the given model. Takes ownership of the model. KDEModel(KDEModel&& other); + /** + * Copy the given model. + * + * Use std::move if the object to copy is no longer needed. + * + * @param other KDEModel to copy. + */ KDEModel& operator=(KDEModel other); + //! Destroy the KDEModel object. ~KDEModel(); + //! Serialize the KDE model. template void serialize(Archive& ar, const unsigned int /* version */); + //! Get the bandwidth of the kernel. double Bandwidth() const { return bandwidth; } + //! Modify the bandwidth of the kernel. double& Bandwidth() { return bandwidth; } + //! Get the relative error tolerance. double RelativeError() const { return relError; } + //! Modify the relative error tolerance. double& RelativeError() { return relError; } + //! Get the absolute error tolerance. double AbsoluteError() const { return absError; } + //! Modify the absolute error tolerance. double& AbsoluteError() { return absError; } //! Get whether breadth-first traversal is being used. @@ -165,19 +215,42 @@ class KDEModel //! Modify whether breadth-first traversal is being used. bool& BreadthFirst() { return breadthFirst; } + //! Get the tree type of the model. TreeTypes TreeType() const { return treeType; } + //! Modify the tree type of the model. TreeTypes& TreeType() { return treeType; } + //! Get the kernel type of the model. KernelTypes KernelType() const { return kernelType; } + //! Modify the kernel type of the model. KernelTypes& KernelType() { return kernelType; } + /** + * Build the KDE model with the given parameters and then trains it with the + * given reference data. + * Takes possession of the reference set to avoid a copy, so the reference set + * will not be usable after this. + * + * @param referenceSet Set of reference points. + */ void BuildModel(arma::mat&& referenceSet); + /** + * Perform kernel density estimation on the given query set. + * Takes possession of the query set to avoid a copy, so the query set + * will not be usable after this. + * + * @pre The model has to be previously created with BuildModel. + * @param querySet Set of query points. + * @param estimations Vector where the results will be stored in the same + * order as the query points. + */ void Evaluate(arma::mat&& querySet, arma::vec& estimations); private: + //! Clean memory. void CleanMemory(); }; From f0af9b4e94a9938aefc4714d29d97d6aa31fa58b Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 20 Sep 2018 17:14:56 +0200 Subject: [PATCH 074/150] Improve KDE main docs --- src/mlpack/methods/kde/kde_main.cpp | 49 ++++++++++++++++++----------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index fd66a4da70e..a3f9f2fc7d6 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -1,6 +1,6 @@ /** * @file kde_main.cpp - * @author Roberto Hueso (robertohueso96@gmail.com) + * @author Roberto Hueso * * Executable for running Kernel Density Estimation. * @@ -28,25 +28,36 @@ PROGRAM_INFO("Kernel Density Estimation", "This program performs a Kernel Density Estimation. KDE is a " "non-parametric way of estimating probability density function. " "For each query point the program will estimate its probability density " - "by applying a kernel function to each reference point. Computational " - " complexity is O(n^2) but it is optimized by making use of dual-trees. " + "by applying a kernel function to each reference point. The computational " + "complexity of this is O(N^2) where there are N query points and N " + "reference points, but this implementation will typically see better " + "performance as it uses an approximate dual-tree algorithm for " + "acceleration." "\n\n" - "For example, the following will run KDE using the points in " - "reference_set.csv and query_set.csv. It will apply an Epanechnikov kernel " - "with a 0.2 bandwidth to each reference point and use a KD-Tree for the " - "dual-tree optimization. The result will be stored in a densities.csv file " - "with a maximum error of 5%" + "Dual-tree optimization allows to avoid lots of barely relevant " + "calculations (as kernel function values decrease with distance), so it is " + "an approximate computation. You can specify the maximum relative error " + "tolerance for each query value with " + PRINT_PARAM_STRING("rel_error") + + " as well as the maximum absolute error tolerance with the parameter " + + PRINT_PARAM_STRING("abs_error") + ". This program runs using an Euclidean " + "metric. Kernel function can be selected using the " + + PRINT_PARAM_STRING("kernel") + " option. You can also choose what which " + "type of tree to use for the dual-tree algorithm with " + + PRINT_PARAM_STRING("tree") + "\n\n" - "$ kde --reference reference_set.csv --query query_set.csv --bandwidth 0.2 " - "--kernel epanechnikov --tree kd-tree --rel_error 0.05 --output " - "densities.csv" + "For example, the following will run KDE using the data in " + + PRINT_DATASET("ref_data") + " for training and the data in " + + PRINT_DATASET("qu_data") + " as query data. It will apply an Epanechnikov " + "kernel with a 0.2 bandwidth to each reference point and use a KD-Tree for " + "the dual-tree optimization. The returned results will be within 5% of the " + "real KDE value for each query point." + "\n\n" + + PRINT_CALL("kde", "reference", "ref_data", "query", "qu_data", "bandwidth", + 0.2, "kernel", "epanechnikov", "tree", "kd-tree", "rel_error", + 0.05, "output", "out_data") + "\n\n" - "Dual-tree optimization allows to avoid lots of barely relevant " - "calculations (as kernel function values decrease with distance) if you " - "can afford a little error (you can define how much is the maximum you are " - "willing to afford) over the final result. This program runs using an " - "Euclidean metric. If no output file is specified then it will output the " - "result to standard output."); + "the output density estimations will be stored in " + + PRINT_DATASET("out_data") + "."); // Required options. PARAM_MATRIX_IN("reference", "Input dataset to KDE on.", "r"); @@ -56,11 +67,11 @@ PARAM_DOUBLE_IN("bandwidth", "Bandwidth of the kernel", "b", 1.0); // Load or save models. PARAM_MODEL_IN(KDEModel, "input_model", - "File containing pre-trained KDE model.", + "Contains pre-trained KDE model.", "m"); PARAM_MODEL_OUT(KDEModel, "output_model", - "If specified, the KDE model will be saved to the given file.", + "If specified, the KDE model will be saved here.", "M"); // Configuration options From 018ff1301800655d493b14c57d15c357a1890722 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 20 Sep 2018 17:15:22 +0200 Subject: [PATCH 075/150] Delete KDE main stdout option --- src/mlpack/methods/kde/kde_main.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index a3f9f2fc7d6..9c3813e2256 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -97,7 +97,6 @@ PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", static void mlpackMain() { - const size_t output_precision = 40; // Get some parameters. arma::mat query = std::move(CLI::GetParam("query")); const double bandwidth = CLI::GetParam("bandwidth"); @@ -163,16 +162,9 @@ static void mlpackMain() kde->Evaluate(std::move(query), estimations); - // Output estimations to file if defined. + // Output results if needed. if (CLI::HasParam("output")) - { CLI::GetParam("output") = std::move(estimations); - } - else - { - std::cout.precision(output_precision); - estimations.raw_print(std::cout); - } // Save model. if (CLI::HasParam("output_model")) From 8148562f784d7dbc510b3e91a91211e837bad6a1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 21 Sep 2018 12:49:51 +0200 Subject: [PATCH 076/150] Delete normalization from KDE module --- src/mlpack/methods/kde/kde.hpp | 2 ++ src/mlpack/methods/kde/kde_impl.hpp | 4 ---- src/mlpack/tests/kde_test.cpp | 10 ++++------ 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 957fbed8a68..27db271ad57 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -134,6 +134,7 @@ class KDE /** * Estimate density of each point in the query set given the data of the * reference set. The result is stored in an estimations vector. + * Estimations might not be normalized. * * - Dimension of each point in the query set must match the dimension of each * point in the reference set. @@ -149,6 +150,7 @@ class KDE /** * Estimate density of each point in the query set given the data of an * already created query tree. The result is stored in an estimations vector. + * Estimations might not be normalized. * * - Dimension of each point in the queryTree dataset must match the dimension * of each point in the reference set. diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 36544e3641c..8b4e6dfa33e 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -308,10 +308,6 @@ Evaluate(Tree* queryTree, traverser.Traverse(*queryTree, *referenceTree); } estimations /= referenceTree->Dataset().n_cols; - - // Normalize if required. - if (kernel::KernelTraits::IsNormalized) - estimations /= kernel->Normalizer(queryTree->Dataset().n_rows); } template::IsNormalized) - densities /= kernel.Normalizer(query.n_rows); } /** @@ -68,10 +66,10 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::inplace_trans(query); arma::vec estimations; // Manually calculated results. - arma::vec estimations_result = {0.02069926590929581, - 0.00041646387634996807, - 0.019046040026090477, - 0.002556725645852806}; + arma::vec estimations_result = {0.08323668699564207296148765, + 0.00167470061366603324010116, + 0.07658867126520703394465527, + 0.01028120384800740999553525}; KDE Date: Fri, 21 Sep 2018 12:51:33 +0200 Subject: [PATCH 077/150] Fix minor error KDE Kernel() method didn't work properly --- src/mlpack/methods/kde/kde.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 27db271ad57..09866ecac37 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -167,10 +167,10 @@ class KDE arma::vec& estimations); //! Get the kernel. - const KernelType& Kernel() const { return kernel; } + const KernelType& Kernel() const { return *kernel; } //! Modify the kernel. - KernelType& Kernel() { return kernel; } + KernelType& Kernel() { return *kernel; } //! Get the reference tree. Tree* ReferenceTree() { return referenceTree; } From 02955e16f18e517266bd595a6e3432e264cad075 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 21 Sep 2018 13:06:26 +0200 Subject: [PATCH 078/150] Add KDEModel visitor specialization For Gaussian and Epanechnikov kernels --- src/mlpack/methods/kde/kde_model.hpp | 18 ++++++++++++- src/mlpack/methods/kde/kde_model_impl.hpp | 33 +++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 210e13331ce..3d88d75284f 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -40,6 +40,9 @@ using KDEType = KDE; class DualTreeVisitor : public boost::static_visitor { private: + //! Query set dimensionality. + const size_t dimension; + //! The query set for the KDE. const arma::mat& querySet; @@ -61,6 +64,18 @@ class DualTreeVisitor : public boost::static_visitor typename TreeMatType> class TreeType> void operator()(KDETypeT* kde) const; + //! DualTreeVisitor specialized on Gaussian Kernel KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + + //! DualTreeVisitor specialized on Epanechnikov Kernel KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + // TODO Implement specific cases where a leaf size can be selected. //! DualTreeVisitor constructor. Takes ownership of the given querySet. @@ -240,7 +255,8 @@ class KDEModel /** * Perform kernel density estimation on the given query set. * Takes possession of the query set to avoid a copy, so the query set - * will not be usable after this. + * will not be usable after this. If possible, it returns normalized + * estimations. * * @pre The model has to be previously created with BuildModel. * @param querySet Set of query points. diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 7c511eb999a..f5d9becedeb 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -133,6 +133,7 @@ inline void KDEModel::CleanMemory() // Parameters for KDE evaluation DualTreeVisitor::DualTreeVisitor(arma::mat&& querySet, arma::vec& estimations): + dimension(querySet.n_rows), querySet(std::move(querySet)), estimations(estimations) {} @@ -150,6 +151,38 @@ void DualTreeVisitor::operator()(KDETypeT* kde) const throw std::runtime_error("no KDE model initialized"); } +// Evaluation specialized for Gaussian Kernel +template class TreeType> +void DualTreeVisitor::operator()(KDETypeT* kde) const +{ + if (kde) + { + kde->Evaluate(std::move(querySet), estimations); + estimations /= kde->Kernel().Normalizer(dimension); + } + else + throw std::runtime_error("no KDE model initialized"); +} + +// Evaluation specialized for EpanechnikovKernel Kernel +template class TreeType> +void DualTreeVisitor::operator()(KDETypeT* kde) const +{ + if (kde) + { + kde->Evaluate(std::move(querySet), estimations); + estimations /= kde->Kernel().Normalizer(dimension); + } + else + throw std::runtime_error("no KDE model initialized"); +} + // Parameters for Train. TrainVisitor::TrainVisitor(arma::mat&& referenceSet) : referenceSet(std::move(referenceSet)) From 7dbaf03259eb169edb08ea9e299eef19b0ebe3d1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 21 Sep 2018 13:07:21 +0200 Subject: [PATCH 079/150] Add KDE Laplacian Kernel support --- src/mlpack/methods/kde/kde_main.cpp | 8 +++++--- src/mlpack/methods/kde/kde_model.hpp | 7 +++++-- src/mlpack/methods/kde/kde_model_impl.hpp | 10 ++++++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 9c3813e2256..459a88c5e38 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -76,7 +76,7 @@ PARAM_MODEL_OUT(KDEModel, // Configuration options PARAM_STRING_IN("kernel", "Kernel to use for the estimation" - "('gaussian', 'epanechnikov').", "k", "gaussian"); + "('gaussian', 'epanechnikov', 'laplacian').", "k", "gaussian"); PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree').", "t", "kd-tree"); PARAM_DOUBLE_IN("rel_error", @@ -117,8 +117,8 @@ static void mlpackMain() ReportIgnoredParam({{ "input_model", true }}, "breadth_first"); // Requirements for parameter values. - RequireParamInSet("kernel", { "gaussian", "epanechnikov" }, true, - "unknown kernel type"); + RequireParamInSet("kernel", { "gaussian", "epanechnikov", + "laplacian" }, true, "unknown kernel type"); RequireParamInSet("tree", { "kd-tree", "ball-tree" }, true, "unknown tree type"); RequireParamValue("rel_error", [](double x){return x >= 0 && x <= 1;}, @@ -144,6 +144,8 @@ static void mlpackMain() kde->KernelType() = KDEModel::GAUSSIAN_KERNEL; else if (kernelStr == "epanechnikov") kde->KernelType() = KDEModel::EPANECHNIKOV_KERNEL; + else if (kernelStr == "laplacian") + kde->KernelType() = KDEModel::LAPLACIAN_KERNEL; // Set TreeType if (treeStr == "kd-tree") diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 3d88d75284f..5caf7cc9a78 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -132,7 +132,8 @@ class KDEModel enum KernelTypes { GAUSSIAN_KERNEL, - EPANECHNIKOV_KERNEL + EPANECHNIKOV_KERNEL, + LAPLACIAN_KERNEL }; private: @@ -159,7 +160,9 @@ class KDEModel boost::variant*, KDEType*, KDEType*, - KDEType*> kdeModel; + KDEType*, + KDEType*, + KDEType*> kdeModel; public: /** diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index f5d9becedeb..b1e01a3fa13 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -113,6 +113,16 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError, breadthFirst); } + else if (kernelType == LAPLACIAN_KERNEL && treeType == KD_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + } + else if (kernelType == LAPLACIAN_KERNEL && treeType == BALL_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + } TrainVisitor train(std::move(referenceSet)); boost::apply_visitor(train, kdeModel); From e15ef141398fb43bb05f1e02d736cf2e324db799 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 21 Sep 2018 14:50:01 +0200 Subject: [PATCH 080/150] Add KDE Spherical Kernel support --- src/mlpack/methods/kde/kde_main.cpp | 6 ++++-- src/mlpack/methods/kde/kde_model.hpp | 13 ++++++++++-- src/mlpack/methods/kde/kde_model_impl.hpp | 26 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 459a88c5e38..32531c673e4 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -76,7 +76,7 @@ PARAM_MODEL_OUT(KDEModel, // Configuration options PARAM_STRING_IN("kernel", "Kernel to use for the estimation" - "('gaussian', 'epanechnikov', 'laplacian').", "k", "gaussian"); + "('gaussian', 'epanechnikov', 'laplacian', 'spherical').", "k", "gaussian"); PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree').", "t", "kd-tree"); PARAM_DOUBLE_IN("rel_error", @@ -118,7 +118,7 @@ static void mlpackMain() // Requirements for parameter values. RequireParamInSet("kernel", { "gaussian", "epanechnikov", - "laplacian" }, true, "unknown kernel type"); + "laplacian", "spherical" }, true, "unknown kernel type"); RequireParamInSet("tree", { "kd-tree", "ball-tree" }, true, "unknown tree type"); RequireParamValue("rel_error", [](double x){return x >= 0 && x <= 1;}, @@ -146,6 +146,8 @@ static void mlpackMain() kde->KernelType() = KDEModel::EPANECHNIKOV_KERNEL; else if (kernelStr == "laplacian") kde->KernelType() = KDEModel::LAPLACIAN_KERNEL; + else if (kernelStr == "spherical") + kde->KernelType() = KDEModel::SPHERICAL_KERNEL; // Set TreeType if (treeStr == "kd-tree") diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 5caf7cc9a78..f2a7740368e 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -76,6 +76,12 @@ class DualTreeVisitor : public boost::static_visitor typename TreeMatType> class TreeType> void operator()(KDETypeT* kde) const; + //! DualTreeVisitor specialized on Spherical Kernel KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + // TODO Implement specific cases where a leaf size can be selected. //! DualTreeVisitor constructor. Takes ownership of the given querySet. @@ -133,7 +139,8 @@ class KDEModel { GAUSSIAN_KERNEL, EPANECHNIKOV_KERNEL, - LAPLACIAN_KERNEL + LAPLACIAN_KERNEL, + SPHERICAL_KERNEL }; private: @@ -162,7 +169,9 @@ class KDEModel KDEType*, KDEType*, KDEType*, - KDEType*> kdeModel; + KDEType*, + KDEType*, + KDEType*> kdeModel; public: /** diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index b1e01a3fa13..266518f29ed 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -123,6 +123,16 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError, breadthFirst); } + else if (kernelType == SPHERICAL_KERNEL && treeType == KD_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + } + else if (kernelType == SPHERICAL_KERNEL && treeType == BALL_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + } TrainVisitor train(std::move(referenceSet)); boost::apply_visitor(train, kdeModel); @@ -193,6 +203,22 @@ void DualTreeVisitor::operator()(KDETypeT class TreeType> +void DualTreeVisitor::operator()(KDETypeT* kde) const +{ + if (kde) + { + kde->Evaluate(std::move(querySet), estimations); + estimations /= kde->Kernel().Normalizer(dimension); + } + else + throw std::runtime_error("no KDE model initialized"); +} + // Parameters for Train. TrainVisitor::TrainVisitor(arma::mat&& referenceSet) : referenceSet(std::move(referenceSet)) From e7b7b57e3db03600e605a73d5096ba03f4f4e9e0 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 21 Sep 2018 15:05:21 +0200 Subject: [PATCH 081/150] Add KDE Triangular Kernel support --- src/mlpack/methods/kde/kde_main.cpp | 7 +++++-- src/mlpack/methods/kde/kde_model.hpp | 7 +++++-- src/mlpack/methods/kde/kde_model_impl.hpp | 10 ++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 32531c673e4..dae78c39ea7 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -76,7 +76,8 @@ PARAM_MODEL_OUT(KDEModel, // Configuration options PARAM_STRING_IN("kernel", "Kernel to use for the estimation" - "('gaussian', 'epanechnikov', 'laplacian', 'spherical').", "k", "gaussian"); + "('gaussian', 'epanechnikov', 'laplacian', 'spherical', 'triangular').", + "k", "gaussian"); PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree').", "t", "kd-tree"); PARAM_DOUBLE_IN("rel_error", @@ -118,7 +119,7 @@ static void mlpackMain() // Requirements for parameter values. RequireParamInSet("kernel", { "gaussian", "epanechnikov", - "laplacian", "spherical" }, true, "unknown kernel type"); + "laplacian", "spherical", "triangular" }, true, "unknown kernel type"); RequireParamInSet("tree", { "kd-tree", "ball-tree" }, true, "unknown tree type"); RequireParamValue("rel_error", [](double x){return x >= 0 && x <= 1;}, @@ -148,6 +149,8 @@ static void mlpackMain() kde->KernelType() = KDEModel::LAPLACIAN_KERNEL; else if (kernelStr == "spherical") kde->KernelType() = KDEModel::SPHERICAL_KERNEL; + else if (kernelStr == "triangular") + kde->KernelType() = KDEModel::TRIANGULAR_KERNEL; // Set TreeType if (treeStr == "kd-tree") diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index f2a7740368e..c1cca0e4d85 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -140,7 +140,8 @@ class KDEModel GAUSSIAN_KERNEL, EPANECHNIKOV_KERNEL, LAPLACIAN_KERNEL, - SPHERICAL_KERNEL + SPHERICAL_KERNEL, + TRIANGULAR_KERNEL }; private: @@ -171,7 +172,9 @@ class KDEModel KDEType*, KDEType*, KDEType*, - KDEType*> kdeModel; + KDEType*, + KDEType*, + KDEType*> kdeModel; public: /** diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 266518f29ed..5ee1d6637b3 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -133,6 +133,16 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError, breadthFirst); } + else if (kernelType == TRIANGULAR_KERNEL && treeType == KD_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + } + else if (kernelType == TRIANGULAR_KERNEL && treeType == BALL_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError, breadthFirst); + } TrainVisitor train(std::move(referenceSet)); boost::apply_visitor(train, kdeModel); From 1d1b34a02adb74f48025bd065298409eae4d6234 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 23 Sep 2018 16:33:31 +0200 Subject: [PATCH 082/150] Add KDE same set support --- src/mlpack/methods/kde/kde.hpp | 22 +++++- src/mlpack/methods/kde/kde_impl.hpp | 82 +++++++++++++++++++++-- src/mlpack/methods/kde/kde_rules.hpp | 6 +- src/mlpack/methods/kde/kde_rules_impl.hpp | 9 ++- src/mlpack/tests/kde_test.cpp | 34 +++++----- 5 files changed, 129 insertions(+), 24 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 09866ecac37..34fb1df1e72 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -127,9 +127,14 @@ class KDE /** * Trains the KDE model. Sets the reference tree to an already created tree. * + * - If TreeTraits::RearrangesDataset is False then it is possible + * to use an empty oldFromNewReferences vector. + * * @param referenceTree New already created reference tree. + * @param oldFromNewReferences Permutations of reference points obtained + * during tree generation. */ - void Train(Tree* referenceTree); + void Train(Tree* referenceTree, std::vector* oldFromNewReferences); /** * Estimate density of each point in the query set given the data of the @@ -166,6 +171,18 @@ class KDE const std::vector& oldFromNewQueries, arma::vec& estimations); + /** + * Estimate density of each point in the reference set given the data of the + * reference set. It does not compute the estimation of a point with itself. + * The result is stored in an estimations vector. Estimations might not be + * normalized. + * + * @pre The model has to be previously trained. + * @param estimations Object which will hold the density of each reference + * point. + */ + void Evaluate(arma::vec& estimations); + //! Get the kernel. const KernelType& Kernel() const { return *kernel; } @@ -213,6 +230,9 @@ class KDE //! Reference tree. Tree* referenceTree; + //! Permutations of reference points. + std::vector* oldFromNewReferences; + //! Relative error tolerance. double relError; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 8b4e6dfa33e..7038af01c68 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -125,9 +125,15 @@ KDE::KDE(const KDE& other) : if (trained) { if (ownsReferenceTree) - referenceTree = new Tree(other.referenceTree); + { + oldFromNewReferences = new std::vector; + referenceTree = new Tree(other.referenceTree, *oldFromNewReferences); + } else + { + oldFromNewReferences = other.oldFromNewReferences; referenceTree = other.referenceTree; + } } } @@ -141,6 +147,7 @@ KDE::KDE(KDE&& other) : kernel(other.kernel), metric(other.metric), referenceTree(other.referenceTree), + oldFromNewReferences(other.oldFromNewReferences), relError(other.relError), absError(other.absError), breadthFirst(other.breadthFirst), @@ -152,6 +159,7 @@ KDE::KDE(KDE&& other) : other.kernel = new KernelType(); other.metric = new MetricType(); other.referenceTree = nullptr; + other.oldFromNewReferences = nullptr; other.ownsReferenceTree = false; other.trained = false; } @@ -171,12 +179,16 @@ KDE::operator=(KDE other) if (ownsMetric) delete metric; if (ownsReferenceTree) + { delete referenceTree; + delete oldFromNewReferences; + } // Move this->kernel = std::move(other.kernel); this->metric = std::move(other.metric); this->referenceTree = std::move(other.referenceTree); + this->oldFromNewReferences = std::move(other.oldFromNewReferences); this->relError = other.relError; this->absError = other.absError; this->breadthFirst = other.breadthFirst; @@ -201,7 +213,10 @@ KDE::~KDE() if (ownsMetric) delete metric; if (ownsReferenceTree) + { delete referenceTree; + delete oldFromNewReferences; + } } templateownsReferenceTree = true; - this->referenceTree = new Tree(std::move(referenceSet)); + this->oldFromNewReferences = new std::vector; + this->referenceTree = BuildTree(std::move(referenceSet), + *oldFromNewReferences); this->trained = true; } @@ -229,16 +251,20 @@ template class TreeType> void KDE:: -Train(Tree* referenceTree) +Train(Tree* referenceTree, std::vector* oldFromNewReferences) { // Check if referenceTree dataset is not an empty set. if (referenceTree->Dataset().n_cols == 0) throw std::invalid_argument("cannot train KDE model with an empty " "reference set"); - if (this->ownsReferenceTree == true) + if (ownsReferenceTree == true) + { delete this->referenceTree; + delete this->oldFromNewReferences; + } this->ownsReferenceTree = false; this->referenceTree = referenceTree; + this->oldFromNewReferences = oldFromNewReferences; this->trained = true; } @@ -293,7 +319,8 @@ Evaluate(Tree* queryTree, absError, oldFromNewQueries, *metric, - *kernel); + *kernel, + false); if (breadthFirst) { // DualTreeTraverser Breadth-First @@ -310,6 +337,47 @@ Evaluate(Tree* queryTree, estimations /= referenceTree->Dataset().n_cols; } +template class TreeType> +void KDE:: +Evaluate(arma::vec& estimations) +{ + // Get estimations vector ready. + estimations.clear(); + estimations.resize(referenceTree->Dataset().n_cols); + estimations.fill(arma::fill::zeros); + + // Evaluate + typedef KDERules RuleType; + RuleType rules = RuleType(referenceTree->Dataset(), + referenceTree->Dataset(), + estimations, + relError, + absError, + *oldFromNewReferences, + *metric, + *kernel, + true); + if (breadthFirst) + { + // DualTreeTraverser Breadth-First + typename Tree::template BreadthFirstDualTreeTraverser + traverser(rules); + traverser.Traverse(*referenceTree, *referenceTree); + } + else + { + // DualTreeTraverser Depth-First + typename Tree::template DualTreeTraverser traverser(rules); + traverser.Traverse(*referenceTree, *referenceTree); + } + estimations /= referenceTree->Dataset().n_cols; +} + template& oldFromNewQueries, MetricType& metric, - KernelType& kernel); + KernelType& kernel, + const bool sameSet); //! Base Case double BaseCase(const size_t queryIndex, const size_t referenceIndex); @@ -96,6 +97,9 @@ class KDERules //! Instantiated kernel KernelType& kernel; + //! Whether reference and query sets are the same. + const bool sameSet; + //! The last query index. size_t lastQueryIndex; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index ca4015c4d32..424e52c435f 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -28,7 +28,8 @@ KDERules::KDERules( const double absError, const std::vector& oldFromNewQueries, MetricType& metric, - KernelType& kernel) : + KernelType& kernel, + const bool sameSet) : referenceSet(referenceSet), querySet(querySet), densities(densities), @@ -37,6 +38,7 @@ KDERules::KDERules( oldFromNewQueries(oldFromNewQueries), metric(metric), kernel(kernel), + sameSet(sameSet), lastQueryIndex(querySet.n_cols), lastReferenceIndex(referenceSet.n_cols), baseCases(0), @@ -52,6 +54,11 @@ double KDERules::BaseCase( const size_t queryIndex, const size_t referenceIndex) { + // If reference and query sets are the same we don't want to compute the + // estimation of a point with itself. + if (sameSet && queryIndex == referenceIndex) + return 0.0; + double distance = metric.Evaluate(querySet.col(queryIndex), referenceSet.col(referenceIndex)); if (tree::TreeTraits::RearrangesDataset) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index e2dc714d268..6aa22f0212b 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -112,16 +112,16 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) // Get dual-tree results. typedef KDTree Tree; - std::vector oldFromNewQueries; + std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); - Tree* referenceTree = new Tree(reference, 2); + Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); KDE kde(kernelBandwidth, 0.0, 1e-8, false); - kde.Train(referenceTree); - kde.Evaluate(queryTree, oldFromNewQueries, estimations); + kde.Train(referenceTree, &oldFromNewReferences); + kde.Evaluate(queryTree, std::move(oldFromNewQueries), estimations); for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(estimations[i], estimationsResult[i], 1e-8); delete queryTree; @@ -183,16 +183,16 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) // BallTree KDE typedef BallTree Tree; - std::vector oldFromNewQueries; + std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); - Tree* referenceTree = new Tree(reference, 2); + Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); KDE kde(kernelBandwidth, relError, 0.0, false); - kde.Train(referenceTree); - kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); + kde.Train(referenceTree, &oldFromNewReferences); + kde.Evaluate(queryTree, std::move(oldFromNewQueries), treeEstimations); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) @@ -226,15 +226,15 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) // Dual-tree KDE typedef KDTree Tree; - std::vector oldFromNewQueries; + std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); - Tree* referenceTree = new Tree(reference, 2); + Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); KDE kde(kernelBandwidth, relError, 0.0, false); - kde.Train(referenceTree); + kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); // Check whether results are equal. @@ -261,15 +261,15 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) // Dual-tree KDE typedef KDTree Tree; - std::vector oldFromNewQueries; + std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); - Tree* referenceTree = new Tree(reference, 2); + Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); KDE kde(kernelBandwidth, relError, 0.0, false); - kde.Train(referenceTree); + kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, estimations); // Check whether results are equal. @@ -372,9 +372,11 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) BOOST_REQUIRE_THROW(kde.Train(reference), std::invalid_argument); // When training using a tree + std::vector oldFromNewReferences; typedef KDTree Tree; - Tree* referenceTree = new Tree(reference, 2); - BOOST_REQUIRE_THROW(kde.Train(referenceTree), std::invalid_argument); + Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); + BOOST_REQUIRE_THROW( + kde.Train(referenceTree, &oldFromNewReferences), std::invalid_argument); delete referenceTree; } From 89f11f89d48567ec952b2ffacf6bd4979e80a443 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 23 Sep 2018 20:33:10 +0200 Subject: [PATCH 083/150] Add monochromatic KDE main support --- src/mlpack/methods/kde/kde_main.cpp | 16 +++- src/mlpack/methods/kde/kde_model.hpp | 79 ++++++++++++++++--- src/mlpack/methods/kde/kde_model_impl.hpp | 96 ++++++++++++++++++++--- src/mlpack/methods/kde/kde_rules_impl.hpp | 2 +- 4 files changed, 169 insertions(+), 24 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index dae78c39ea7..808c2ac69f4 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -57,11 +57,14 @@ PROGRAM_INFO("Kernel Density Estimation", 0.05, "output", "out_data") + "\n\n" "the output density estimations will be stored in " + - PRINT_DATASET("out_data") + "."); + PRINT_DATASET("out_data") + "." + "\n" + "If no " + PRINT_PARAM_STRING("query") + " is provided, then KDE will be " + "computed on the " + PRINT_PARAM_STRING("reference") + " dataset."); // Required options. PARAM_MATRIX_IN("reference", "Input dataset to KDE on.", "r"); -PARAM_MATRIX_IN_REQ("query", "Query dataset to KDE on.", "q"); +PARAM_MATRIX_IN("query", "Query dataset to KDE on.", "q"); PARAM_DOUBLE_IN("bandwidth", "Bandwidth of the kernel", "b", 1.0); // Load or save models. @@ -99,7 +102,6 @@ PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", static void mlpackMain() { // Get some parameters. - arma::mat query = std::move(CLI::GetParam("query")); const double bandwidth = CLI::GetParam("bandwidth"); const std::string kernelStr = CLI::GetParam("kernel"); const std::string treeStr = CLI::GetParam("tree"); @@ -167,7 +169,13 @@ static void mlpackMain() kde = CLI::GetParam("input_model"); } - kde->Evaluate(std::move(query), estimations); + if (CLI::HasParam("query")) + { + arma::mat query = std::move(CLI::GetParam("query")); + kde->Evaluate(std::move(query), estimations); + } + else + kde->Evaluate(estimations); // Output results if needed. if (CLI::HasParam("output")) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index c1cca0e4d85..dac2b82fb73 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -35,12 +35,62 @@ template; /** - * DualTreeVisitor computes a Kernel Density Estimation on the given KDEType. + * DualMonoKDE computes a Kernel Density Estimation on the given KDEType. + * It performs a monochromatic KDE. */ -class DualTreeVisitor : public boost::static_visitor +class DualMonoKDE : public boost::static_visitor { private: - //! Query set dimensionality. + //! Vector to store the KDE results. + arma::vec& estimations; + + public: + //! Alias template necessary for visual C++ compiler. + template class TreeType> + using KDETypeT = KDEType; + + //! Default DualMonoKDE on some KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + + //! DualMonoKDE specialized on Gaussian Kernel KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + + //! DualMonoKDE specialized on Epanechnikov Kernel KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + + //! DualMonoKDE specialized on Spherical Kernel KDEType. + template class TreeType> + void operator()(KDETypeT* kde) const; + + // TODO Implement specific cases where a leaf size can be selected. + + //! DualMonoKDE constructor. + DualMonoKDE(arma::vec& estimations); +}; + +/** + * DualBiKDE computes a Kernel Density Estimation on the given KDEType. + * It performs a bichromatic KDE. + */ +class DualBiKDE : public boost::static_visitor +{ + private: + //! Query set dimensionality. const size_t dimension; //! The query set for the KDE. @@ -57,26 +107,26 @@ class DualTreeVisitor : public boost::static_visitor typename TreeMatType> class TreeType> using KDETypeT = KDEType; - //! Default DualTreeVisitor on some KDEType. + //! Default DualBiKDE on some KDEType. template class TreeType> void operator()(KDETypeT* kde) const; - //! DualTreeVisitor specialized on Gaussian Kernel KDEType. + //! DualBiKDE specialized on Gaussian Kernel KDEType. template class TreeType> void operator()(KDETypeT* kde) const; - //! DualTreeVisitor specialized on Epanechnikov Kernel KDEType. + //! DualBiKDE specialized on Epanechnikov Kernel KDEType. template class TreeType> void operator()(KDETypeT* kde) const; - //! DualTreeVisitor specialized on Spherical Kernel KDEType. + //! DualBiKDE specialized on Spherical Kernel KDEType. template class TreeType> @@ -84,8 +134,8 @@ class DualTreeVisitor : public boost::static_visitor // TODO Implement specific cases where a leaf size can be selected. - //! DualTreeVisitor constructor. Takes ownership of the given querySet. - DualTreeVisitor(arma::mat&& querySet, arma::vec& estimations); + //! DualBiKDE constructor. Takes ownership of the given querySet. + DualBiKDE(arma::mat&& querySet, arma::vec& estimations); }; /** @@ -280,6 +330,17 @@ class KDEModel */ void Evaluate(arma::mat&& querySet, arma::vec& estimations); + /** + * Perform kernel density estimation on the reference set. + * If possible, it returns normalized estimations. + * + * @pre The model has to be previously created with BuildModel. + * @param estimations Vector where the results will be stored in the same + * order as the query points. + */ + void Evaluate(arma::vec& estimations); + + private: //! Clean memory. void CleanMemory(); diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 5ee1d6637b3..534bc34e8ed 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -148,10 +148,17 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) boost::apply_visitor(train, kdeModel); } -// Perform evaluation +// Perform bichromatic evaluation inline void KDEModel::Evaluate(arma::mat&& querySet, arma::vec& estimations) { - DualTreeVisitor eval(std::move(querySet), estimations); + DualBiKDE eval(std::move(querySet), estimations); + boost::apply_visitor(eval, kdeModel); +} + +// Perform monochromatic evaluation +inline void KDEModel::Evaluate(arma::vec& estimations) +{ + DualMonoKDE eval(estimations); boost::apply_visitor(eval, kdeModel); } @@ -162,7 +169,76 @@ inline void KDEModel::CleanMemory() } // Parameters for KDE evaluation -DualTreeVisitor::DualTreeVisitor(arma::mat&& querySet, arma::vec& estimations): +DualMonoKDE::DualMonoKDE(arma::vec& estimations): + estimations(estimations) +{} + +// Default KDE evaluation +template class TreeType> +void DualMonoKDE::operator()(KDETypeT* kde) const +{ + if (kde) + kde->Evaluate(estimations); + else + throw std::runtime_error("no KDE model initialized"); +} + +// Evaluation specialized for Gaussian Kernel +template class TreeType> +void DualMonoKDE::operator()(KDETypeT* kde) const +{ + if (kde) + { + const size_t dimension = (kde->ReferenceTree())->Dataset().n_rows; + kde->Evaluate(estimations); + estimations /= kde->Kernel().Normalizer(dimension); + } + else + throw std::runtime_error("no KDE model initialized"); +} + +// Evaluation specialized for EpanechnikovKernel Kernel +template class TreeType> +void DualMonoKDE::operator()(KDETypeT* kde) const +{ + if (kde) + { + const size_t dimension = (kde->ReferenceTree())->Dataset().n_rows; + kde->Evaluate(estimations); + estimations /= kde->Kernel().Normalizer(dimension); + } + else + throw std::runtime_error("no KDE model initialized"); +} + +// Evaluation specialized for SphericalKernel Kernel +template class TreeType> +void DualMonoKDE::operator()(KDETypeT* kde) const +{ + if (kde) + { + const size_t dimension = (kde->ReferenceTree())->Dataset().n_rows; + kde->Evaluate(estimations); + estimations /= kde->Kernel().Normalizer(dimension); + } + else + throw std::runtime_error("no KDE model initialized"); +} + +// Parameters for KDE evaluation +DualBiKDE::DualBiKDE(arma::mat&& querySet, arma::vec& estimations): dimension(querySet.n_rows), querySet(std::move(querySet)), estimations(estimations) @@ -173,7 +249,7 @@ template class TreeType> -void DualTreeVisitor::operator()(KDETypeT* kde) const +void DualBiKDE::operator()(KDETypeT* kde) const { if (kde) kde->Evaluate(std::move(querySet), estimations); @@ -185,8 +261,8 @@ void DualTreeVisitor::operator()(KDETypeT* kde) const template class TreeType> -void DualTreeVisitor::operator()(KDETypeT* kde) const +void DualBiKDE::operator()(KDETypeT* kde) const { if (kde) { @@ -201,8 +277,8 @@ void DualTreeVisitor::operator()(KDETypeT class TreeType> -void DualTreeVisitor::operator()(KDETypeT* kde) const +void DualBiKDE::operator()(KDETypeT* kde) const { if (kde) { @@ -217,8 +293,8 @@ void DualTreeVisitor::operator()(KDETypeT class TreeType> -void DualTreeVisitor::operator()(KDETypeT* kde) const +void DualBiKDE::operator()(KDETypeT* kde) const { if (kde) { diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 424e52c435f..5832962c01a 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -56,7 +56,7 @@ double KDERules::BaseCase( { // If reference and query sets are the same we don't want to compute the // estimation of a point with itself. - if (sameSet && queryIndex == referenceIndex) + if (sameSet && (queryIndex == referenceIndex)) return 0.0; double distance = metric.Evaluate(querySet.col(queryIndex), From ab3e1f0e3b4d9f667235ed859e10a271e99bfd44 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 24 Sep 2018 02:20:30 +0200 Subject: [PATCH 084/150] Change default relative KDE error New relative error tolerance is 0.05 (5%) which is more reasonable --- src/mlpack/methods/kde/kde.hpp | 8 +++---- src/mlpack/methods/kde/kde_impl.hpp | 2 +- src/mlpack/methods/kde/kde_main.cpp | 2 +- src/mlpack/methods/kde/kde_model.hpp | 2 +- src/mlpack/methods/kde/kde_model_impl.hpp | 2 +- src/mlpack/tests/kde_test.cpp | 26 +++++++++++------------ 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 34fb1df1e72..12e27c7dca5 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -48,8 +48,8 @@ class KDE /** * Initialize KDE object with the default Kernel and Metric parameters. - * Relative error tolernce is initialized to 1e-6, absolute error tolerance - * is 0.0 and uses a depth-first approach. + * Relative error tolernce is initialized to 0.05 (5%), absolute error + * tolerance is 0.0 and uses a depth-first approach. */ KDE(); @@ -65,7 +65,7 @@ class KDE * breadth-first approach. */ KDE(const double bandwidth, - const double relError = 1e-6, + const double relError = 0.05, const double absError = 0, const bool breadthFirst = false); @@ -81,7 +81,7 @@ class KDE */ KDE(MetricType& metric, KernelType& kernel, - const double relError = 1e-6, + const double relError = 0.05, const double absError = 0, const bool breadthFirst = false); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 7038af01c68..6c22e4fc7cc 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -48,7 +48,7 @@ template::KDE() : kernel(new KernelType()), metric(new MetricType()), - relError(1e-6), + relError(0.05), absError(0.0), breadthFirst(false), ownsKernel(true), diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 808c2ac69f4..0e81e69433e 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -86,7 +86,7 @@ PARAM_STRING_IN("tree", "Tree to use for the estimation" PARAM_DOUBLE_IN("rel_error", "Relative error tolerance for the result", "e", - 1e-8); + 0.05); PARAM_DOUBLE_IN("abs_error", "Relative error tolerance for the result", "E", diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index dac2b82fb73..9653dcdb288 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -243,7 +243,7 @@ class KDEModel * @param treeType Type of tree to use. */ KDEModel(const double bandwidth = 1.0, - const double relError = 1e-6, + const double relError = 0.05, const double absError = 0, const bool breadthFirst = false, const KernelTypes kernelType = KernelTypes::GAUSSIAN_KERNEL, diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 534bc34e8ed..db561799794 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -61,7 +61,7 @@ inline KDEModel::KDEModel(KDEModel&& other) : { // Reset other model other.bandwidth = 1.0; - other.relError = 1e-6; + other.relError = 0.05; other.absError = 0; other.breadthFirst = false; other.kernelType = KernelTypes::GAUSSIAN_KERNEL; diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 6aa22f0212b..9f8c1139e10 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -74,11 +74,11 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::mat, GaussianKernel, KDTree> - kde(0.8, 0.0, 1e-8, false); + kde(0.8, 0.0, 0.01, false); kde.Train(reference); kde.Evaluate(query, estimations); for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 1e-8); + BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 0.01); } /** @@ -119,11 +119,11 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, 0.0, 1e-8, false); + kde(kernelBandwidth, 0.0, 1e-6, false); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), estimations); for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(estimations[i], estimationsResult[i], 1e-8); + BOOST_REQUIRE_CLOSE(estimations[i], estimationsResult[i], 0.01); delete queryTree; delete referenceTree; } @@ -138,7 +138,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.3; - const double relError = 1e-8; + const double relError = 0.01; // Brute force KDE GaussianKernel kernel(kernelBandwidth); @@ -172,7 +172,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.4; - const double relError = 1e-5; + const double relError = 0.05; // Brute force KDE GaussianKernel kernel(kernelBandwidth); @@ -212,7 +212,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.4; - const double relError = 1e-5; + const double relError = 0.05; // Duplicate value reference.col(2) = reference.col(3); @@ -254,7 +254,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) arma::mat query = arma::randu(2, 10); arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.4; - const double relError = 1e-5; + const double relError = 0.05; // Duplicate value query.col(2) = query.col(3); @@ -290,7 +290,7 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.8; - const double relError = 1e-8; + const double relError = 0.01; // Brute force KDE GaussianKernel kernel(kernelBandwidth); @@ -324,7 +324,7 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.7; - const double relError = 1e-8; + const double relError = 0.01; // Brute force KDE GaussianKernel kernel(kernelBandwidth); @@ -357,7 +357,7 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) arma::mat query = arma::randu(1, 10); arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.7; - const double relError = 1e-8; + const double relError = 0.01; // KDE metric::EuclideanDistance metric; @@ -390,7 +390,7 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) arma::mat query = arma::randu(1, 10); arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.7; - const double relError = 1e-8; + const double relError = 0.01; // KDE metric::EuclideanDistance metric; @@ -424,7 +424,7 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) arma::mat query; arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); const double kernelBandwidth = 0.7; - const double relError = 1e-8; + const double relError = 0.01; // KDE metric::EuclideanDistance metric; From f6396dabe3b549111d7f0fc25d535e8e6c0e7814 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 28 Sep 2018 18:32:17 +0200 Subject: [PATCH 085/150] Use custom traversal for KDE --- src/mlpack/methods/kde/kde.hpp | 25 ++-- src/mlpack/methods/kde/kde_impl.hpp | 132 ++++++++++------------ src/mlpack/methods/kde/kde_main.cpp | 5 - src/mlpack/methods/kde/kde_model.hpp | 20 ++-- src/mlpack/methods/kde/kde_model_impl.hpp | 27 ++--- src/mlpack/tests/kde_test.cpp | 35 +++--- 6 files changed, 102 insertions(+), 142 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 12e27c7dca5..4fba5eb6539 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -39,7 +39,11 @@ template class TreeType = tree::KDTree> + typename TreeMatType> class TreeType = tree::KDTree, + template class DualTreeTraversalType = + TreeType::template DualTreeTraverser> class KDE { public: @@ -61,13 +65,10 @@ class KDE * @param bandwidth Bandwidth of the kernel. * @param relError Relative error tolerance of the model. * @param absError Absolute error tolerance of the model. - * @param breadthFirst Whether the tree should be traversed using a - * breadth-first approach. */ KDE(const double bandwidth, const double relError = 0.05, - const double absError = 0, - const bool breadthFirst = false); + const double absError = 0); /** * Initialize KDE object using custom instantiated Metric and Kernel objects. @@ -76,14 +77,11 @@ class KDE * @param kernel Instantiated kernel object. * @param relError Relative error tolerance of the model. * @param absError Absolute error tolerance of the model. - * @param breadthFirst Whether the tree should be traversed using a - * breadth-first approach. */ KDE(MetricType& metric, KernelType& kernel, const double relError = 0.05, - const double absError = 0, - const bool breadthFirst = false); + const double absError = 0); /** * Construct KDE object as a copy of the given model. This may be @@ -204,12 +202,6 @@ class KDE //! Modify absolute error tolerance (0 <= newError). void AbsoluteError(const double newError); - //! Get whether breadth-first traversal is being used. - bool BreadthFirst() const { return breadthFirst; } - - //! Modify whether breadth-first traversal is being used. - bool& BreadthFirst() { return breadthFirst; } - //! Check whether reference tree is owned by the KDE model. bool OwnsReferenceTree() const { return ownsReferenceTree; } @@ -239,9 +231,6 @@ class KDE //! Absolute error tolerance. double absError; - //! If true, a breadth-first approach is used when evaluating. - bool breadthFirst; - //! If true, the KDE object is responsible for deleting the kernel. bool ownsKernel; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 6c22e4fc7cc..2a39eabea68 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -44,13 +44,13 @@ template class TreeType> -KDE::KDE() : + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE::KDE() : kernel(new KernelType()), metric(new MetricType()), relError(0.05), absError(0.0), - breadthFirst(false), ownsKernel(true), ownsMetric(true), ownsReferenceTree(false), @@ -61,17 +61,16 @@ template class TreeType> -KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE:: KDE(const double bandwidth, const double relError, - const double absError, - const bool breadthFirst) : + const double absError) : kernel(new KernelType(bandwidth)), metric(new MetricType()), relError(relError), absError(absError), - breadthFirst(breadthFirst), ownsKernel(true), ownsMetric(true), ownsReferenceTree(false), @@ -85,18 +84,17 @@ template class TreeType> -KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE:: KDE(MetricType& metric, KernelType& kernel, const double relError, - const double absError, - const bool breadthFirst) : + const double absError) : kernel(&kernel), metric(&metric), relError(relError), absError(absError), - breadthFirst(breadthFirst), ownsKernel(false), ownsMetric(false), ownsReferenceTree(false), @@ -110,13 +108,14 @@ template class TreeType> -KDE::KDE(const KDE& other) : + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE:: +KDE(const KDE& other) : kernel(new KernelType(other.kernel)), metric(new MetricType(other.metric)), relError(other.relError), absError(other.absError), - breadthFirst(other.breadthFirst), ownsKernel(other.ownsKernel), ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), @@ -142,15 +141,16 @@ template class TreeType> -KDE::KDE(KDE&& other) : + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE:: +KDE(KDE&& other) : kernel(other.kernel), metric(other.metric), referenceTree(other.referenceTree), oldFromNewReferences(other.oldFromNewReferences), relError(other.relError), absError(other.absError), - breadthFirst(other.breadthFirst), ownsKernel(other.ownsKernel), ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), @@ -169,9 +169,11 @@ template class TreeType> -KDE& -KDE::operator=(KDE other) + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE& +KDE:: +operator=(KDE other) { // Clean memory if (ownsKernel) @@ -191,7 +193,6 @@ KDE::operator=(KDE other) this->oldFromNewReferences = std::move(other.oldFromNewReferences); this->relError = other.relError; this->absError = other.absError; - this->breadthFirst = other.breadthFirst; this->ownsKernel = other.ownsKernel; this->ownsMetric = other.ownsMetric; this->ownsReferenceTree = other.ownsReferenceTree; @@ -205,8 +206,9 @@ template class TreeType> -KDE::~KDE() + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +KDE::~KDE() { if (ownsKernel) delete kernel; @@ -224,8 +226,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: Train(MatType referenceSet) { // Check if referenceSet is not an empty set. @@ -249,8 +252,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: Train(Tree* referenceTree, std::vector* oldFromNewReferences) { // Check if referenceTree dataset is not an empty set. @@ -273,8 +277,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: Evaluate(MatType querySet, arma::vec& estimations) { std::vector oldFromNewQueries; @@ -288,8 +293,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: Evaluate(Tree* queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) @@ -321,19 +327,10 @@ Evaluate(Tree* queryTree, *metric, *kernel, false); - if (breadthFirst) - { - // DualTreeTraverser Breadth-First - typename Tree::template BreadthFirstDualTreeTraverser - traverser(rules); - traverser.Traverse(*queryTree, *referenceTree); - } - else - { - // DualTreeTraverser Depth-First - typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(*queryTree, *referenceTree); - } + + // Create traverser. + DualTreeTraversalType traverser(rules); + traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; } @@ -342,8 +339,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: Evaluate(arma::vec& estimations) { // Get estimations vector ready. @@ -362,19 +360,10 @@ Evaluate(arma::vec& estimations) *metric, *kernel, true); - if (breadthFirst) - { - // DualTreeTraverser Breadth-First - typename Tree::template BreadthFirstDualTreeTraverser - traverser(rules); - traverser.Traverse(*referenceTree, *referenceTree); - } - else - { - // DualTreeTraverser Depth-First - typename Tree::template DualTreeTraverser traverser(rules); - traverser.Traverse(*referenceTree, *referenceTree); - } + + // Create traverser. + DualTreeTraversalType traverser(rules); + traverser.Traverse(*referenceTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; } @@ -383,8 +372,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: RelativeError(const double newError) { CheckErrorValues(newError, absError); @@ -396,8 +386,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: AbsoluteError(const double newError) { CheckErrorValues(relError, newError); @@ -409,15 +400,15 @@ template class TreeType> + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> template -void KDE:: +void KDE:: serialize(Archive& ar, const unsigned int /* version */) { // Serialize preferences. ar & BOOST_SERIALIZATION_NVP(relError); ar & BOOST_SERIALIZATION_NVP(absError); - ar & BOOST_SERIALIZATION_NVP(breadthFirst); ar & BOOST_SERIALIZATION_NVP(trained); // If we are loading, clean up memory if necessary. @@ -450,8 +441,9 @@ template class TreeType> -void KDE:: + typename TreeMatType> class TreeType, + template class DualTreeTraversalType> +void KDE:: CheckErrorValues(const double relError, const double absError) const { if (relError < 0 || relError > 1) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 0e81e69433e..2ec6d49d0bd 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -91,8 +91,6 @@ PARAM_DOUBLE_IN("abs_error", "Relative error tolerance for the result", "E", 0.0); -PARAM_FLAG("breadth_first", "Use breadth-first traversal instead of depth" - "first.", "w"); // Maybe in the future it could be interesting to implement different metrics. // Output options. @@ -107,7 +105,6 @@ static void mlpackMain() const std::string treeStr = CLI::GetParam("tree"); const double relError = CLI::GetParam("rel_error"); const double absError = CLI::GetParam("abs_error"); - const bool breadthFirst = CLI::GetParam("breadth_first"); // Initialize results vector. arma::vec estimations; @@ -117,7 +114,6 @@ static void mlpackMain() ReportIgnoredParam({{ "input_model", true }}, "kernel"); ReportIgnoredParam({{ "input_model", true }}, "rel_error"); ReportIgnoredParam({{ "input_model", true }}, "abs_error"); - ReportIgnoredParam({{ "input_model", true }}, "breadth_first"); // Requirements for parameter values. RequireParamInSet("kernel", { "gaussian", "epanechnikov", @@ -140,7 +136,6 @@ static void mlpackMain() kde->Bandwidth() = bandwidth; kde->RelativeError() = relError; kde->AbsoluteError() = absError; - kde->BreadthFirst() = breadthFirst; // Set KernelType if (kernelStr == "gaussian") diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 9653dcdb288..b142665441c 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -32,7 +32,13 @@ template class TreeType> -using KDEType = KDE; +using KDEType = KDE::template DualTreeTraverser>; /** * DualMonoKDE computes a Kernel Density Estimation on the given KDEType. @@ -204,9 +210,6 @@ class KDEModel //! Absolute error tolerance. double absError; - //! If true, a breadth-first approach is used when evaluating. - bool breadthFirst; - KernelTypes kernelType; TreeTypes treeType; @@ -237,15 +240,12 @@ class KDEModel * @param absError Maximum absolute error tolerance for each point in the * model. For example, 0.1 means that for each point the * value can have a maximum error of 0.1 units. - * @param breadthFirst Whether the tree should be traversed using a - * breadth-first approach. * @param kernelType Type of kernel to use. * @param treeType Type of tree to use. */ KDEModel(const double bandwidth = 1.0, const double relError = 0.05, const double absError = 0, - const bool breadthFirst = false, const KernelTypes kernelType = KernelTypes::GAUSSIAN_KERNEL, const TreeTypes treeType = TreeTypes::KD_TREE); @@ -289,12 +289,6 @@ class KDEModel //! Modify the absolute error tolerance. double& AbsoluteError() { return absError; } - //! Get whether breadth-first traversal is being used. - bool BreadthFirst() const { return breadthFirst; } - - //! Modify whether breadth-first traversal is being used. - bool& BreadthFirst() { return breadthFirst; } - //! Get the tree type of the model. TreeTypes TreeType() const { return treeType; } diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index db561799794..73520aa69c1 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -24,13 +24,11 @@ namespace kde { inline KDEModel::KDEModel(const double bandwidth, const double relError, const double absError, - const bool breadthFirst, const KernelTypes kernelType, const TreeTypes treeType) : bandwidth(bandwidth), relError(relError), absError(absError), - breadthFirst(breadthFirst), kernelType(kernelType), treeType(treeType) { @@ -42,7 +40,6 @@ inline KDEModel::KDEModel(const KDEModel& other) : bandwidth(other.bandwidth), relError(other.relError), absError(other.absError), - breadthFirst(other.breadthFirst), kernelType(other.kernelType), treeType(other.treeType) { @@ -54,7 +51,6 @@ inline KDEModel::KDEModel(KDEModel&& other) : bandwidth(other.bandwidth), relError(other.relError), absError(other.absError), - breadthFirst(other.breadthFirst), kernelType(other.kernelType), treeType(other.treeType), kdeModel(std::move(other.kdeModel)) @@ -63,7 +59,6 @@ inline KDEModel::KDEModel(KDEModel&& other) : other.bandwidth = 1.0; other.relError = 0.05; other.absError = 0; - other.breadthFirst = false; other.kernelType = KernelTypes::GAUSSIAN_KERNEL; other.treeType = TreeTypes::KD_TREE; other.kdeModel = decltype(other.kdeModel)(); @@ -75,7 +70,6 @@ inline KDEModel& KDEModel::operator=(KDEModel other) bandwidth = other.bandwidth; relError = other.relError; absError = other.absError; - breadthFirst = other.breadthFirst; kernelType = other.kernelType; treeType = other.treeType; kdeModel = std::move(other.kdeModel); @@ -96,52 +90,52 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) if (kernelType == GAUSSIAN_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == GAUSSIAN_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == LAPLACIAN_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == LAPLACIAN_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == SPHERICAL_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == SPHERICAL_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == TRIANGULAR_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } else if (kernelType == TRIANGULAR_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError, breadthFirst); + (bandwidth, relError, absError); } TrainVisitor train(std::move(referenceSet)); @@ -338,7 +332,6 @@ void KDEModel::serialize(Archive& ar, const unsigned int /* version */) ar & BOOST_SERIALIZATION_NVP(bandwidth); ar & BOOST_SERIALIZATION_NVP(relError); ar & BOOST_SERIALIZATION_NVP(absError); - ar & BOOST_SERIALIZATION_NVP(breadthFirst); ar & BOOST_SERIALIZATION_NVP(kernelType); ar & BOOST_SERIALIZATION_NVP(treeType); diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 9f8c1139e10..4f89591e33d 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -74,7 +74,7 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::mat, GaussianKernel, KDTree> - kde(0.8, 0.0, 0.01, false); + kde(0.8, 0.0, 0.01); kde.Train(reference); kde.Evaluate(query, estimations); for (size_t i = 0; i < query.n_cols; ++i) @@ -119,7 +119,7 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, 0.0, 1e-6, false); + kde(kernelBandwidth, 0.0, 1e-6); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), estimations); for (size_t i = 0; i < query.n_cols; ++i) @@ -153,7 +153,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0, false); + kde(metric, kernel, relError, 0.0); kde.Train(reference); kde.Evaluate(std::move(query), treeEstimations); @@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) arma::mat, GaussianKernel, BallTree> - kde(kernelBandwidth, relError, 0.0, false); + kde(kernelBandwidth, relError, 0.0); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), treeEstimations); @@ -233,7 +233,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, relError, 0.0, false); + kde(kernelBandwidth, relError, 0.0); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); @@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, relError, 0.0, false); + kde(kernelBandwidth, relError, 0.0); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, estimations); @@ -304,8 +304,11 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) KDE - kde(metric, kernel, relError, 0.0, true); + tree::KDTree, + tree::KDTree::template BreadthFirstDualTreeTraverser> + kde(metric, kernel, relError, 0.0); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -339,7 +342,7 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0, false); + kde(metric, kernel, relError, 0.0); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -366,7 +369,7 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0, false); + kde(metric, kernel, relError, 0.0); // When training using the dataset matrix BOOST_REQUIRE_THROW(kde.Train(reference), std::invalid_argument); @@ -399,7 +402,7 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0, false); + kde(metric, kernel, relError, 0.0); kde.Train(reference); // When evaluating using the query dataset matrix @@ -433,7 +436,7 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0, false); + kde(metric, kernel, relError, 0.0); kde.Train(reference); // When evaluating using the query dataset matrix @@ -457,13 +460,12 @@ BOOST_AUTO_TEST_CASE(SerializationTest) // Initial KDE model to me serialized. const double relError = 0.25; const double absError = 0.0; - const bool bf = false; arma::mat reference = arma::randu(4, 800); KDE - kde(0.25, relError, absError, bf); + kde(0.25, relError, absError); kde.Train(reference); // Get estimations to compare. @@ -489,11 +491,6 @@ BOOST_AUTO_TEST_CASE(SerializationTest) BOOST_REQUIRE_CLOSE(kdeText.AbsoluteError(), absError, 1e-8); BOOST_REQUIRE_CLOSE(kdeBinary.AbsoluteError(), absError, 1e-8); - BOOST_REQUIRE_EQUAL(kde.BreadthFirst(), bf); - BOOST_REQUIRE_EQUAL(kdeXml.BreadthFirst(), bf); - BOOST_REQUIRE_EQUAL(kdeText.BreadthFirst(), bf); - BOOST_REQUIRE_EQUAL(kdeBinary.BreadthFirst(), bf); - BOOST_REQUIRE_EQUAL(kde.IsTrained(), true); BOOST_REQUIRE_EQUAL(kdeXml.IsTrained(), true); BOOST_REQUIRE_EQUAL(kdeText.IsTrained(), true); From 43849de74b565b0798bd511867a020f62a6d5007 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 28 Sep 2018 18:37:41 +0200 Subject: [PATCH 086/150] Add KDE Octree gaussian test --- src/mlpack/tests/kde_test.cpp | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 4f89591e33d..f7725a480c9 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include @@ -202,6 +204,40 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) delete referenceTree; } +/** + * Test Octree dual-tree implementation results against brute force results. + */ +BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) +{ + arma::mat reference = arma::randu(2, 500); + arma::mat query = arma::randu(2, 200); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.3; + const double relError = 0.01; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0); + kde.Train(reference); + kde.Evaluate(std::move(query), treeEstimations); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + /** * Test duplicated value in reference matrix. */ From a26dedd9796ad0a9430a12a6c76fd04e0e0a44ec Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 28 Sep 2018 18:38:25 +0200 Subject: [PATCH 087/150] Add KDE RTree gaussian test --- src/mlpack/tests/kde_test.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index f7725a480c9..92a373113d8 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -238,6 +238,40 @@ BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test RTree dual-tree implementation results against brute force results. + */ +BOOST_AUTO_TEST_CASE(RTreeGaussianKDETest) +{ + arma::mat reference = arma::randu(2, 500); + arma::mat query = arma::randu(2, 200); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.3; + const double relError = 0.01; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0); + kde.Train(reference); + kde.Evaluate(std::move(query), treeEstimations); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + /** * Test duplicated value in reference matrix. */ From 2415b110ea9db44f60469dda2939b25189d0da68 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 14 Oct 2018 18:23:10 +0200 Subject: [PATCH 088/150] Add KDE rules Cover tree support --- src/mlpack/methods/kde/kde_rules.hpp | 6 ----- src/mlpack/methods/kde/kde_rules_impl.hpp | 31 ++++++++++++++++++++--- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index f5affa55ba2..2f5c1f75db8 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -113,12 +113,6 @@ class KDERules //! The number of scores. size_t scores; - - // Check TreeType is supported. - static_assert(!tree::TreeTraits::HasDuplicatedPoints, - "TreeType must not have duplicated points."); - static_assert(tree::TreeTraits::UniqueNumDescendants, - "TreeType must provide a number of unique descendants."); }; } // namespace kde diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 5832962c01a..f8f951d42cf 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -59,12 +59,18 @@ double KDERules::BaseCase( if (sameSet && (queryIndex == referenceIndex)) return 0.0; + // Avoid duplicated calculations. + if ((lastQueryIndex == queryIndex) && (lastReferenceIndex == referenceIndex)) + return 0.0; + + // Calculations. double distance = metric.Evaluate(querySet.col(queryIndex), referenceSet.col(referenceIndex)); if (tree::TreeTraits::RearrangesDataset) densities(oldFromNewQueries.at(queryIndex)) += kernel.Evaluate(distance); else densities(queryIndex) += kernel.Evaluate(distance); + ++baseCases; lastQueryIndex = queryIndex; lastReferenceIndex = referenceIndex; @@ -96,14 +102,31 @@ template inline double KDERules:: Score(TreeType& queryNode, TreeType& referenceNode) { + double score; + // Calculations are not duplicated. + bool newCalculations = true; const double maxKernel = kernel.Evaluate(queryNode.MinDistance(referenceNode)); const double minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); const double bound = maxKernel - minKernel; - double score; - if (bound <= (absError + relError * minKernel) / referenceSet.n_cols) + if (tree::TreeTraits::FirstPointIsCentroid) + { + if ((traversalInfo.LastQueryNode() != NULL) && + (traversalInfo.LastReferenceNode() != NULL) && + (traversalInfo.LastQueryNode()->Point(0) == queryNode.Point(0)) && + (traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0))) + { + // Don't duplicate calculations. + newCalculations = false; + lastQueryIndex = queryNode.Point(0); + lastReferenceIndex = referenceNode.Point(0); + } + } + + if (bound <= (absError + relError * minKernel) / referenceSet.n_cols && + newCalculations) { // Auxiliary variables. double kernelValue; @@ -145,7 +168,9 @@ Score(TreeType& queryNode, TreeType& referenceNode) kernelValue = EvaluateKernel(queryCenter, referenceCenter); } - #pragma omp for + // Can be paralellized but we avoid it for now because of a compilation + // error in visual C++ compiler. + // #pragma omp for for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { if (tree::TreeTraits::RearrangesDataset) From 13de1a340d85766c9c4d94d0f06590488a231845 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 14 Oct 2018 18:24:52 +0200 Subject: [PATCH 089/150] Add KDE StandardCoverTree gaussian test --- src/mlpack/tests/kde_test.cpp | 41 ++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 92a373113d8..f562b33526a 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -157,7 +157,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) tree::KDTree> kde(metric, kernel, relError, 0.0); kde.Train(reference); - kde.Evaluate(std::move(query), treeEstimations); + kde.Evaluate(query, treeEstimations); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) @@ -231,7 +231,7 @@ BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) tree::Octree> kde(metric, kernel, relError, 0.0); kde.Train(reference); - kde.Evaluate(std::move(query), treeEstimations); + kde.Evaluate(query, treeEstimations); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) @@ -265,7 +265,42 @@ BOOST_AUTO_TEST_CASE(RTreeGaussianKDETest) tree::RTree> kde(metric, kernel, relError, 0.0); kde.Train(reference); - kde.Evaluate(std::move(query), treeEstimations); + kde.Evaluate(query, treeEstimations); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + +/** + * Test Standard Cover Tree dual-tree implementation results against brute + * force results. + */ +BOOST_AUTO_TEST_CASE(StandardCoverTreeGaussianKDETest) +{ + arma::mat reference = arma::randu(2, 500); + arma::mat query = arma::randu(2, 200); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.3; + const double relError = 0.01; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) From 9cff9c5b6d7ffc44d8b764652a808d4bda798bfc Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 15 Oct 2018 16:14:41 +0200 Subject: [PATCH 090/150] Add KDE main support for Cover-tree, Octree and RTree --- src/mlpack/methods/kde/kde_main.cpp | 13 +++- src/mlpack/methods/kde/kde_model.hpp | 25 +++++++- src/mlpack/methods/kde/kde_model_impl.hpp | 75 +++++++++++++++++++++++ 3 files changed, 108 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 2ec6d49d0bd..9c659af9382 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -82,7 +82,8 @@ PARAM_STRING_IN("kernel", "Kernel to use for the estimation" "('gaussian', 'epanechnikov', 'laplacian', 'spherical', 'triangular').", "k", "gaussian"); PARAM_STRING_IN("tree", "Tree to use for the estimation" - "('kd-tree', 'ball-tree').", "t", "kd-tree"); + "('kd-tree', 'ball-tree', 'cover-tree', 'octree', 'r-tree').", + "t", "kd-tree"); PARAM_DOUBLE_IN("rel_error", "Relative error tolerance for the result", "e", @@ -118,8 +119,8 @@ static void mlpackMain() // Requirements for parameter values. RequireParamInSet("kernel", { "gaussian", "epanechnikov", "laplacian", "spherical", "triangular" }, true, "unknown kernel type"); - RequireParamInSet("tree", { "kd-tree", "ball-tree" }, true, - "unknown tree type"); + RequireParamInSet("tree", { "kd-tree", "ball-tree", "cover-tree", + "octree", "r-tree"}, true, "unknown tree type"); RequireParamValue("rel_error", [](double x){return x >= 0 && x <= 1;}, true, "relative error must be between 0 and 1"); RequireParamValue("abs_error", [](double x){return x >= 0;}, @@ -154,6 +155,12 @@ static void mlpackMain() kde->TreeType() = KDEModel::KD_TREE; else if (treeStr == "ball-tree") kde->TreeType() = KDEModel::BALL_TREE; + else if (treeStr == "cover-tree") + kde->TreeType() = KDEModel::COVER_TREE; + else if (treeStr == "octree") + kde->TreeType() = KDEModel::OCTREE; + else if (treeStr == "r-tree") + kde->TreeType() = KDEModel::R_TREE; // Build model kde->BuildModel(std::move(reference)); diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index b142665441c..68d891413c9 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -14,6 +14,9 @@ // Include trees #include +#include +#include +#include // Include kernels #include @@ -188,7 +191,10 @@ class KDEModel enum TreeTypes { KD_TREE, - BALL_TREE + BALL_TREE, + COVER_TREE, + OCTREE, + R_TREE }; enum KernelTypes @@ -220,14 +226,29 @@ class KDEModel */ boost::variant*, KDEType*, + KDEType*, + KDEType*, + KDEType*, KDEType*, KDEType*, + KDEType*, + KDEType*, + KDEType*, KDEType*, KDEType*, + KDEType*, + KDEType*, + KDEType*, KDEType*, KDEType*, + KDEType*, + KDEType*, + KDEType*, KDEType*, - KDEType*> kdeModel; + KDEType*, + KDEType*, + KDEType*, + KDEType*> kdeModel; public: /** diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 73520aa69c1..1a9f0431ffe 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -97,6 +97,21 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError); } + else if (kernelType == GAUSSIAN_KERNEL && treeType == COVER_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == GAUSSIAN_KERNEL && treeType == OCTREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == GAUSSIAN_KERNEL && treeType == R_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType @@ -107,6 +122,21 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError); } + else if (kernelType == EPANECHNIKOV_KERNEL && treeType == COVER_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == EPANECHNIKOV_KERNEL && treeType == OCTREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == EPANECHNIKOV_KERNEL && treeType == R_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } else if (kernelType == LAPLACIAN_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType @@ -117,6 +147,21 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError); } + else if (kernelType == LAPLACIAN_KERNEL && treeType == COVER_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == LAPLACIAN_KERNEL && treeType == OCTREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == LAPLACIAN_KERNEL && treeType == R_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } else if (kernelType == SPHERICAL_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType @@ -127,6 +172,21 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError); } + else if (kernelType == SPHERICAL_KERNEL && treeType == COVER_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == SPHERICAL_KERNEL && treeType == OCTREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == SPHERICAL_KERNEL && treeType == R_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } else if (kernelType == TRIANGULAR_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType @@ -137,6 +197,21 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) kdeModel = new KDEType (bandwidth, relError, absError); } + else if (kernelType == TRIANGULAR_KERNEL && treeType == COVER_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == TRIANGULAR_KERNEL && treeType == OCTREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } + else if (kernelType == TRIANGULAR_KERNEL && treeType == R_TREE) + { + kdeModel = new KDEType + (bandwidth, relError, absError); + } TrainVisitor train(std::move(referenceSet)); boost::apply_visitor(train, kdeModel); From 843968adfcddcfd8caf17a28a81b8d6a1d7e9368 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 16 Oct 2018 20:52:31 +0200 Subject: [PATCH 091/150] Rewrite KDE dual-tree Score --- src/mlpack/methods/kde/kde_rules_impl.hpp | 40 +++++++++-------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index f8f951d42cf..1442f1bc35c 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -105,8 +105,9 @@ Score(TreeType& queryNode, TreeType& referenceNode) double score; // Calculations are not duplicated. bool newCalculations = true; + const double minDistance = queryNode.MinDistance(referenceNode); const double maxKernel = - kernel.Evaluate(queryNode.MinDistance(referenceNode)); + kernel.Evaluate(minDistance); const double minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); const double bound = maxKernel - minKernel; @@ -130,42 +131,33 @@ Score(TreeType& queryNode, TreeType& referenceNode) { // Auxiliary variables. double kernelValue; - arma::vec& referenceCenter = referenceNode.Stat().Centroid(); - arma::vec& queryCenter = queryNode.Stat().Centroid(); + kde::KDEStat& referenceStat = referenceNode.Stat(); + kde::KDEStat& queryStat = queryNode.Stat(); // If calculating a center is not required. if (tree::TreeTraits::FirstPointIsCentroid) { kernelValue = EvaluateKernel(queryNode.Point(0), referenceNode.Point(0)); } - // If a child center is the same as its parent center. - else if (tree::TreeTraits::HasSelfChildren) + // Sadly, we have no choice but to calculate the center. + else { - // Reference node. - if (referenceNode.Parent() != NULL && - referenceNode.Point(0) == referenceNode.Parent()->Point(0)) - referenceCenter = referenceNode.Parent()->Stat().Centroid(); - else + // Calculate center for each node if it has not been calculated yet. + if (!referenceStat.ValidCentroid()) { + arma::vec referenceCenter; referenceNode.Center(referenceCenter); + referenceStat.SetCentroid(std::move(referenceCenter)); } - // Query node. - if (queryNode.Parent() != NULL && - queryNode.Point(0) == queryNode.Parent()->Point(0)) - queryCenter = queryNode.Parent()->Stat().Centroid(); - else + if (!queryStat.ValidCentroid()) { + arma::vec queryCenter; queryNode.Center(queryCenter); + queryStat.SetCentroid(std::move(queryCenter)); } // Compute kernel value. - kernelValue = EvaluateKernel(queryCenter, referenceCenter); - } - // Regular case. - else - { - referenceNode.Center(referenceCenter); - queryNode.Center(queryCenter); - kernelValue = EvaluateKernel(queryCenter, referenceCenter); + kernelValue = EvaluateKernel(queryStat.Centroid(), + referenceStat.Centroid()); } // Can be paralellized but we avoid it for now because of a compilation @@ -184,7 +176,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) } else { - score = queryNode.MinDistance(referenceNode); + score = minDistance; } ++scores; From 9b95d01493e0001e409c81d57f5b9a9a57a348e4 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 16 Oct 2018 20:53:33 +0200 Subject: [PATCH 092/150] Improve centroid handling in KDEStat --- src/mlpack/methods/kde/kde_stat.hpp | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_stat.hpp b/src/mlpack/methods/kde/kde_stat.hpp index ed9825aff86..e7d0bbc760a 100644 --- a/src/mlpack/methods/kde/kde_stat.hpp +++ b/src/mlpack/methods/kde/kde_stat.hpp @@ -24,28 +24,45 @@ class KDEStat { public: //! Initialize the statistic. - KDEStat() { } + KDEStat() : validCentroid(false) { } //! Initialization for a fully initialized node. template - KDEStat(TreeType& /* node */) { } + KDEStat(TreeType& /* node */) : validCentroid(false) { } - //! Get the centroid calculation. - const arma::vec& Centroid() const { return centroid; } + //! Get the centroid of the node. + inline const arma::vec& Centroid() const + { + if (validCentroid) + return centroid; + throw std::logic_error("Centroid must be assigned before requesting its " + "value"); + } + + //! Modify the centroid of the node. + void SetCentroid(arma::vec newCentroid) + { + validCentroid = true; + centroid = std::move(newCentroid); + } - //! Modify the centroid calculation. - arma::vec& Centroid() { return centroid; } + //! Get whether the centroid is valid. + inline bool ValidCentroid() const { return validCentroid; } //! Serialize the statistic to/from an archive. template void serialize(Archive& ar, const unsigned int /* version */) { ar & BOOST_SERIALIZATION_NVP(centroid); + ar & BOOST_SERIALIZATION_NVP(validCentroid); } private: //! Node centroid. arma::vec centroid; + + //! Whether the centroid is updated or is junk. + bool validCentroid; }; } // namespace kde From 4654def652c91b8473cf0c43fc1847cf67c19f46 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 17 Oct 2018 16:16:11 +0200 Subject: [PATCH 093/150] Add KDE main tests Test no input data and compare main and kde estimations --- src/mlpack/tests/CMakeLists.txt | 1 + src/mlpack/tests/main_tests/kde_test.cpp | 105 +++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 src/mlpack/tests/main_tests/kde_test.cpp diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index f7fd3d18e09..f265c1b24f2 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -175,6 +175,7 @@ add_executable(mlpack_test main_tests/hmm_generate_test.cpp main_tests/radical_test.cpp main_tests/hmm_test_utils.hpp + main_tests/kde_test.cpp ) # Link dependencies of test executable. diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp new file mode 100644 index 00000000000..6bb4cebd2bd --- /dev/null +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -0,0 +1,105 @@ +/** + * @file kde_test.cpp + * @author Roberto Hueso + * + * Test mlpackMain() of kde_main.cpp + * + * mlpack is free software; you may redistribute it and/or modify it under the + * terms of the 3-clause BSD license. You should have received a copy of the + * 3-clause BSD license along with mlpack. If not, see + * http://www.opensource.org/licenses/BSD-3-Clause for more information. + */ +#include + +#define BINDING_TYPE BINDING_TYPE_TEST + +static const std::string testName = "KDE"; + +#include +#include +#include "test_helper.hpp" +#include + +#include +#include "../test_tools.hpp" + +using namespace mlpack; + +struct KDETestFixture +{ + public: + KDETestFixture() + { + // Cache in the options for this program. + CLI::RestoreSettings(testName); + } + + ~KDETestFixture() + { + // Clear the settings. + CLI::ClearSettings(); + } +}; + +void ResetKDESettings() +{ + CLI::ClearSettings(); + CLI::RestoreSettings(testName); +} + +BOOST_FIXTURE_TEST_SUITE(KDEMainTest, KDETestFixture); + +/** + * Ensure that the estimations we get for KDEMain, are the same as the ones we + * get from the KDE class without any wrappers. + **/ +BOOST_AUTO_TEST_CASE(KDEEqualResultsForMain) +{ + // Datasets + arma::mat reference = arma::randu(3, 500); + arma::mat query = arma::randu(3, 100); + arma::vec kdeEstimations, mainEstimations; + double kernelBandwidth = 1.5; + double relError = 0.05; + + kernel::GaussianKernel kernel(kernelBandwidth); + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0); + kde.Train(reference); + kde.Evaluate(query, kdeEstimations); + // Normalize estimations + kdeEstimations /= kernel.Normalizer(reference.n_rows); + + // Main estimations + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("kernel", std::string("gaussian")); + SetInputParam("tree", std::string("r-tree")); + SetInputParam("rel_error", relError); + SetInputParam("bandwidth", kernelBandwidth); + + mlpackMain(); + + mainEstimations = std::move(CLI::GetParam("output")); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); +} + +/** + * Ensuring that absence of input data is checked. + **/ +BOOST_AUTO_TEST_CASE(KDENoInputData) +{ + // No input data is not provided. Should throw a runtime error. + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + +BOOST_AUTO_TEST_SUITE_END(); From 68bf18caa4e769a75ce575ac0b20ed9642af04bb Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 6 Nov 2018 19:07:45 +0100 Subject: [PATCH 094/150] Add KDE main output size test --- src/mlpack/tests/main_tests/kde_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 6bb4cebd2bd..97e42a311dd 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -102,4 +102,23 @@ BOOST_AUTO_TEST_CASE(KDENoInputData) Log::Fatal.ignoreInput = false; } +/** + * Check that there're as many densities in the result as query points. + **/ +BOOST_AUTO_TEST_CASE(KDEOutputSize) +{ + const size_t dim = 3; + const size_t samples = 110; + arma::mat reference = arma::randu(dim, 325); + arma::mat query = arma::randu(dim, samples); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + + mlpackMain(); + // Check number of output elements + BOOST_REQUIRE_EQUAL(CLI::GetParam("output").size(), samples); +} + BOOST_AUTO_TEST_SUITE_END(); From 3b1fe74f5aabca43b8c4c777307992f69166ce88 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 7 Nov 2018 15:00:51 +0100 Subject: [PATCH 095/150] Add KDE main model reuse test --- src/mlpack/tests/main_tests/kde_test.cpp | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 97e42a311dd..8f56d71362e 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -121,4 +121,41 @@ BOOST_AUTO_TEST_CASE(KDEOutputSize) BOOST_REQUIRE_EQUAL(CLI::GetParam("output").size(), samples); } +/** + * Check that saved model can be reused. + **/ +BOOST_AUTO_TEST_CASE(KDEModelReuse) +{ + const size_t dim = 3; + const size_t samples = 100; + const double relError = 0.05; + arma::mat reference = arma::randu(dim, 300); + arma::mat query = arma::randu(dim, samples); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("bandwidth", 2.4); + SetInputParam("rel_error", 0.05); + + mlpackMain(); + + arma::vec oldEstimations = std::move(CLI::GetParam("output")); + + // Change parameters and load model + CLI::GetSingleton().Parameters()["reference"].wasPassed = false; + SetInputParam("bandwidth", 0.5); + SetInputParam("query", query); + SetInputParam("input_model", + std::move(CLI::GetParam("output_model"))); + + mlpackMain(); + + arma::vec newEstimations = std::move(CLI::GetParam("output")); + + // Check estimations are the same + for (size_t i = 0; i < samples; ++i) + BOOST_REQUIRE_CLOSE(oldEstimations[i], newEstimations[i], relError); +} + BOOST_AUTO_TEST_SUITE_END(); From d023b8266be0d9e43d64dfd1f8d0e2b00ab0db4b Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 8 Nov 2018 16:03:11 +0100 Subject: [PATCH 096/150] Implement KDE single tree score --- src/mlpack/methods/kde/kde_rules_impl.hpp | 71 +++++++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 1442f1bc35c..f5688e93117 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -80,11 +80,72 @@ double KDERules::BaseCase( //! Single-tree scoring function. template double KDERules:: -Score(const size_t /* queryIndex */, TreeType& /* referenceNode */) +Score(const size_t queryIndex, TreeType& referenceNode) { + double score; + bool newCalculations = true; + const arma::vec& queryPoint = querySet.unsafe_col(queryIndex); + const double minDistance = referenceNode.MinDistance(queryPoint); + const double maxKernel = kernel.Evaluate(minDistance); + const double minKernel = + kernel.Evaluate(referenceNode.MaxDistance(queryPoint)); + const double bound = maxKernel - minKernel; + + if (tree::TreeTraits::FirstPointIsCentroid && + lastQueryIndex == queryIndex && + traversalInfo.LastReferenceNode() != NULL && + traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0)) + { + // Don't duplicate calculations. + newCalculations = false; + lastQueryIndex = queryIndex; + lastReferenceIndex = referenceNode.Point(0); + } + + if (bound <= (absError + relError * minKernel) / referenceSet.n_cols && + newCalculations) + { + double kernelValue; + + // Calculate kernel value based on reference node centroid. + if (tree::TreeTraits::FirstPointIsCentroid) + { + kernelValue = EvaluateKernel(queryIndex, referenceNode.Point(0)); + } + else + { + kde::KDEStat& referenceStat = referenceNode.Stat(); + if (!referenceStat.ValidCentroid()) + { + arma::vec referenceCenter; + referenceNode.Center(referenceCenter); + referenceStat.SetCentroid(std::move(referenceCenter)); + } + kernelValue = EvaluateKernel(queryPoint, referenceStat.Centroid()); + } + + // Add kernel value to density estimations + if (tree::TreeTraits::RearrangesDataset) + { + densities(oldFromNewQueries.at(queryIndex)) += + referenceNode.NumDescendants() * kernelValue; + } + else + { + densities(queryIndex) += referenceNode.NumDescendants() * kernelValue; + } + // Don't explore this tree branch + score = DBL_MAX; + } + else + { + score = minDistance; + } + ++scores; - traversalInfo.LastScore() = 0.0; - return 0.0; + traversalInfo.LastReferenceNode() = &referenceNode; + traversalInfo.LastScore() = score; + return score; } template @@ -106,8 +167,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) // Calculations are not duplicated. bool newCalculations = true; const double minDistance = queryNode.MinDistance(referenceNode); - const double maxKernel = - kernel.Evaluate(minDistance); + const double maxKernel = kernel.Evaluate(minDistance); const double minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); const double bound = maxKernel - minKernel; @@ -126,6 +186,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) } } + // If possible, avoid some calculations because of the error tolerance if (bound <= (absError + relError * minKernel) / referenceSet.n_cols && newCalculations) { From 93c8191f5020acae898325e24bba4838671d0a05 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 8 Nov 2018 16:08:47 +0100 Subject: [PATCH 097/150] Fix KDE serialization test evaluation --- src/mlpack/tests/kde_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index f562b33526a..2d230b91f80 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -606,9 +606,9 @@ BOOST_AUTO_TEST_CASE(SerializationTest) arma::vec textEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec binEstimations = arma::vec(query.n_cols, arma::fill::zeros); - kde.Evaluate(query, xmlEstimations); - kde.Evaluate(query, textEstimations); - kde.Evaluate(query, binEstimations); + kdeXml.Evaluate(query, xmlEstimations); + kdeText.Evaluate(query, textEstimations); + kdeBinary.Evaluate(query, binEstimations); for (size_t i = 0; i < query.n_cols; ++i) { From 170039423f97a2b065998ac330422ffa1fd11962 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 14 Nov 2018 16:33:40 +0100 Subject: [PATCH 098/150] Handle KDE kernel normalization using explicit specialization Viusal C++ compiler can't handle partial template specialization, in order to avoid that, this makes use of only explicit template specialization in KDEModel --- src/mlpack/methods/kde/kde_model.hpp | 76 ++++++------- src/mlpack/methods/kde/kde_model_impl.hpp | 129 ++++++---------------- 2 files changed, 66 insertions(+), 139 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 68d891413c9..a769f220ca7 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -42,6 +42,37 @@ using KDEType = KDE::template DualTreeTraverser>; +/** + * KernerlNormalizer holds a set of methods to normalize estimations applying + * in each case the appropiate kernel normalizer function. + */ +class KernelNormalizer +{ + public: + //! Normalization not needed. + template + static void ApplyNormalizer(KernelType& /* kernel */, + const size_t /* dimension */, + arma::vec& /* estimations */) { return; } + + //! Normalize Gaussian Kernel. + template + static void ApplyNormalizer(kernel::GaussianKernel& kernel, + const size_t dimension, + arma::vec& estimations); + + //! Normalize Epanechnikov Kernel. + template + static void ApplyNormalizer(kernel::EpanechnikovKernel& kernel, + const size_t dimension, + arma::vec& estimations); + + //! Normalize SphericalKernel Kernel. + template + static void ApplyNormalizer(kernel::SphericalKernel& kernel, + const size_t dimension, + arma::vec& estimations); +}; /** * DualMonoKDE computes a Kernel Density Estimation on the given KDEType. @@ -68,24 +99,6 @@ class DualMonoKDE : public boost::static_visitor typename TreeMatType> class TreeType> void operator()(KDETypeT* kde) const; - //! DualMonoKDE specialized on Gaussian Kernel KDEType. - template class TreeType> - void operator()(KDETypeT* kde) const; - - //! DualMonoKDE specialized on Epanechnikov Kernel KDEType. - template class TreeType> - void operator()(KDETypeT* kde) const; - - //! DualMonoKDE specialized on Spherical Kernel KDEType. - template class TreeType> - void operator()(KDETypeT* kde) const; - // TODO Implement specific cases where a leaf size can be selected. //! DualMonoKDE constructor. @@ -123,24 +136,6 @@ class DualBiKDE : public boost::static_visitor typename TreeMatType> class TreeType> void operator()(KDETypeT* kde) const; - //! DualBiKDE specialized on Gaussian Kernel KDEType. - template class TreeType> - void operator()(KDETypeT* kde) const; - - //! DualBiKDE specialized on Epanechnikov Kernel KDEType. - template class TreeType> - void operator()(KDETypeT* kde) const; - - //! DualBiKDE specialized on Spherical Kernel KDEType. - template class TreeType> - void operator()(KDETypeT* kde) const; - // TODO Implement specific cases where a leaf size can be selected. //! DualBiKDE constructor. Takes ownership of the given querySet. @@ -157,19 +152,12 @@ class TrainVisitor : public boost::static_visitor arma::mat&& referenceSet; public: - //! Alias template necessary for visual C++ compiler. - template class TreeType> - using KDETypeT = KDEType; - //! Default TrainVisitor on some KDEType. template class TreeType> - void operator()(KDETypeT* kde) const; + void operator()(KDEType* kde) const; // TODO Implement specific cases where a leaf size can be selected. diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 1a9f0431ffe..e1d1ead2eab 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -237,6 +237,33 @@ inline void KDEModel::CleanMemory() boost::apply_visitor(DeleteVisitor(), kdeModel); } +// Gaussian KDE normalization +template +void KernelNormalizer::ApplyNormalizer(kernel::GaussianKernel& kernel, + const size_t dimension, + arma::vec& estimations) +{ + estimations /= kernel.Normalizer(dimension); +} + +// Epanechnikov KDE normalization +template +void KernelNormalizer::ApplyNormalizer(kernel::EpanechnikovKernel& kernel, + const size_t dimension, + arma::vec& estimations) +{ + estimations /= kernel.Normalizer(dimension); +} + +// Spherical KDE normalization +template +void KernelNormalizer::ApplyNormalizer(kernel::SphericalKernel& kernel, + const size_t dimension, + arma::vec& estimations) +{ + estimations /= kernel.Normalizer(dimension); +} + // Parameters for KDE evaluation DualMonoKDE::DualMonoKDE(arma::vec& estimations): estimations(estimations) @@ -248,59 +275,14 @@ template class TreeType> void DualMonoKDE::operator()(KDETypeT* kde) const -{ - if (kde) - kde->Evaluate(estimations); - else - throw std::runtime_error("no KDE model initialized"); -} - -// Evaluation specialized for Gaussian Kernel -template class TreeType> -void DualMonoKDE::operator()(KDETypeT* kde) const { if (kde) { - const size_t dimension = (kde->ReferenceTree())->Dataset().n_rows; kde->Evaluate(estimations); - estimations /= kde->Kernel().Normalizer(dimension); - } - else - throw std::runtime_error("no KDE model initialized"); -} - -// Evaluation specialized for EpanechnikovKernel Kernel -template class TreeType> -void DualMonoKDE::operator()(KDETypeT* kde) const -{ - if (kde) - { const size_t dimension = (kde->ReferenceTree())->Dataset().n_rows; - kde->Evaluate(estimations); - estimations /= kde->Kernel().Normalizer(dimension); - } - else - throw std::runtime_error("no KDE model initialized"); -} - -// Evaluation specialized for SphericalKernel Kernel -template class TreeType> -void DualMonoKDE::operator()(KDETypeT* kde) const -{ - if (kde) - { - const size_t dimension = (kde->ReferenceTree())->Dataset().n_rows; - kde->Evaluate(estimations); - estimations /= kde->Kernel().Normalizer(dimension); + KernelNormalizer::ApplyNormalizer(kde->Kernel(), + dimension, + estimations); } else throw std::runtime_error("no KDE model initialized"); @@ -319,56 +301,13 @@ template class TreeType> void DualBiKDE::operator()(KDETypeT* kde) const -{ - if (kde) - kde->Evaluate(std::move(querySet), estimations); - else - throw std::runtime_error("no KDE model initialized"); -} - -// Evaluation specialized for Gaussian Kernel -template class TreeType> -void DualBiKDE::operator()(KDETypeT* kde) const -{ - if (kde) - { - kde->Evaluate(std::move(querySet), estimations); - estimations /= kde->Kernel().Normalizer(dimension); - } - else - throw std::runtime_error("no KDE model initialized"); -} - -// Evaluation specialized for EpanechnikovKernel Kernel -template class TreeType> -void DualBiKDE::operator()(KDETypeT* kde) const -{ - if (kde) - { - kde->Evaluate(std::move(querySet), estimations); - estimations /= kde->Kernel().Normalizer(dimension); - } - else - throw std::runtime_error("no KDE model initialized"); -} - -// Evaluation specialized for SphericalKernel Kernel -template class TreeType> -void DualBiKDE::operator()(KDETypeT* kde) const { if (kde) { kde->Evaluate(std::move(querySet), estimations); - estimations /= kde->Kernel().Normalizer(dimension); + KernelNormalizer::ApplyNormalizer(kde->Kernel(), + dimension, + estimations); } else throw std::runtime_error("no KDE model initialized"); @@ -384,7 +323,7 @@ template class TreeType> -void TrainVisitor::operator()(KDETypeT* kde) const +void TrainVisitor::operator()(KDEType* kde) const { if (kde) kde->Train(std::move(referenceSet)); From cc515f68c4819619a89621d96c9f53b15c2de10c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 19 Nov 2018 17:12:33 +0100 Subject: [PATCH 099/150] Add KDE main results without normalzation test --- src/mlpack/tests/main_tests/kde_test.cpp | 44 ++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 8f56d71362e..24642ac1bdb 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -51,9 +51,9 @@ BOOST_FIXTURE_TEST_SUITE(KDEMainTest, KDETestFixture); /** * Ensure that the estimations we get for KDEMain, are the same as the ones we - * get from the KDE class without any wrappers. + * get from the KDE class without any wrappers. Requires normalization. **/ -BOOST_AUTO_TEST_CASE(KDEEqualResultsForMain) +BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) { // Datasets arma::mat reference = arma::randu(3, 500); @@ -91,6 +91,46 @@ BOOST_AUTO_TEST_CASE(KDEEqualResultsForMain) BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); } +/** + * Ensure that the estimations we get for KDEMain, are the same as the ones we + * get from the KDE class without any wrappers. Doesn't require normalization. + **/ +BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) +{ + // Datasets + arma::mat reference = arma::randu(3, 300); + arma::mat query = arma::randu(3, 100); + arma::vec kdeEstimations, mainEstimations; + double kernelBandwidth = 3.0; + double relError = 0.06; + + kernel::TriangularKernel kernel(kernelBandwidth); + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0); + kde.Train(reference); + kde.Evaluate(query, kdeEstimations); + + // Main estimations + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("kernel", std::string("triangular")); + SetInputParam("tree", std::string("ball-tree")); + SetInputParam("rel_error", relError); + SetInputParam("bandwidth", kernelBandwidth); + + mlpackMain(); + + mainEstimations = std::move(CLI::GetParam("output")); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); +} + /** * Ensuring that absence of input data is checked. **/ From d9a4dc6f03c7ec0c385f6e55729e29fade1e5c0e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 20 Nov 2018 03:34:29 +0100 Subject: [PATCH 100/150] Add KDE main results mono test --- src/mlpack/tests/main_tests/kde_test.cpp | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 24642ac1bdb..36a84cfc202 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -131,6 +131,47 @@ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); } +/** + * Ensure that the estimations we get for KDEMain, are the same as the ones we + * get from the KDE class without any wrappers in the monochromatic case. + **/ +BOOST_AUTO_TEST_CASE(KDEMonoResultsMain) +{ + // Datasets + arma::mat reference = arma::randu(2, 300); + arma::vec kdeEstimations, mainEstimations; + double kernelBandwidth = 2.3; + double relError = 0.05; + + kernel::EpanechnikovKernel kernel(kernelBandwidth); + metric::EuclideanDistance metric; + KDE + kde(metric, kernel, relError, 0.0); + kde.Train(reference); + // Perform monochromatic KDE. + kde.Evaluate(kdeEstimations); + // Normalize + kdeEstimations /= kernel.Normalizer(reference.n_rows); + + // Main estimations + SetInputParam("reference", reference); + SetInputParam("kernel", std::string("epanechnikov")); + SetInputParam("tree", std::string("cover-tree")); + SetInputParam("rel_error", relError); + SetInputParam("bandwidth", kernelBandwidth); + + mlpackMain(); + + mainEstimations = std::move(CLI::GetParam("output")); + + // Check whether results are equal. + for (size_t i = 0; i < reference.n_cols; ++i) + BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); +} + /** * Ensuring that absence of input data is checked. **/ From 56bfaa5b0942e20a42044b74a484019a4c60cd2c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 20 Nov 2018 13:12:39 +0100 Subject: [PATCH 101/150] Add KDE timers --- src/mlpack/methods/kde/kde_impl.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 2a39eabea68..e9c12cc2df8 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -282,8 +282,10 @@ template:: Evaluate(MatType querySet, arma::vec& estimations) { + Timer::Start("building_tree"); std::vector oldFromNewQueries; Tree* queryTree = BuildTree(std::move(querySet), oldFromNewQueries); + Timer::Stop("building_tree"); this->Evaluate(queryTree, oldFromNewQueries, estimations); delete queryTree; } @@ -311,6 +313,7 @@ Evaluate(Tree* queryTree, throw std::invalid_argument("cannot train KDE model: querySet and " "referenceSet dimensions don't match"); + Timer::Start("computing_kde"); // Get estimations vector ready. estimations.clear(); estimations.resize(queryTree->Dataset().n_cols); @@ -332,6 +335,7 @@ Evaluate(Tree* queryTree, DualTreeTraversalType traverser(rules); traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; + Timer::Stop("computing_kde"); } template:: Evaluate(arma::vec& estimations) { + Timer::Start("computing_kde"); // Get estimations vector ready. estimations.clear(); estimations.resize(referenceTree->Dataset().n_cols); @@ -365,6 +370,7 @@ Evaluate(arma::vec& estimations) DualTreeTraversalType traverser(rules); traverser.Traverse(*referenceTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; + Timer::Stop("computing_kde"); } template Date: Tue, 20 Nov 2018 21:36:31 +0100 Subject: [PATCH 102/150] Add some KDE log information --- src/mlpack/methods/kde/kde_impl.hpp | 12 ++++++++++-- src/mlpack/methods/kde/kde_model_impl.hpp | 3 +++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index e9c12cc2df8..4773ab6ab65 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -241,9 +241,11 @@ Train(MatType referenceSet) delete oldFromNewReferences; } this->ownsReferenceTree = true; + Timer::Start("building_reference_tree"); this->oldFromNewReferences = new std::vector; this->referenceTree = BuildTree(std::move(referenceSet), *oldFromNewReferences); + Timer::Stop("building_reference_tree"); this->trained = true; } @@ -282,10 +284,10 @@ template:: Evaluate(MatType querySet, arma::vec& estimations) { - Timer::Start("building_tree"); + Timer::Start("building_query_tree"); std::vector oldFromNewQueries; Tree* queryTree = BuildTree(std::move(querySet), oldFromNewQueries); - Timer::Stop("building_tree"); + Timer::Stop("building_query_tree"); this->Evaluate(queryTree, oldFromNewQueries, estimations); delete queryTree; } @@ -336,6 +338,9 @@ Evaluate(Tree* queryTree, traverser.Traverse(*queryTree, *referenceTree); estimations /= referenceTree->Dataset().n_cols; Timer::Stop("computing_kde"); + + Log::Info << rules.Scores() << " node combinations were scored." << std::endl; + Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; } templateDataset().n_cols; Timer::Stop("computing_kde"); + + Log::Info << rules.Scores() << " node combinations were scored." << std::endl; + Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; } template class TreeType> void TrainVisitor::operator()(KDEType* kde) const { + Log::Info << "Training KDE model..." << std::endl; if (kde) kde->Train(std::move(referenceSet)); else From 01a80439772ea325458dc48a507acd83f3a583e5 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 25 Dec 2018 15:52:06 +0100 Subject: [PATCH 103/150] Fix KDE includes Remove unnecessary includes --- src/mlpack/methods/kde/kde.hpp | 1 - src/mlpack/methods/kde/kde_impl.hpp | 1 - src/mlpack/methods/kde/kde_main.cpp | 3 --- src/mlpack/methods/kde/kde_model.hpp | 6 ++---- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 4fba5eb6539..3791d0d38d1 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -14,7 +14,6 @@ #define MLPACK_METHODS_KDE_KDE_HPP #include -#include #include #include "kde_stat.hpp" diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 4773ab6ab65..8dfd6053b85 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -12,7 +12,6 @@ #include "kde.hpp" #include "kde_rules.hpp" -#include namespace mlpack { namespace kde { diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 9c659af9382..4b7c39c4f52 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -10,10 +10,7 @@ * http://www.opensource.org/licenses/BSD-3-Clause for more information. */ -#include -#include #include -#include #include "kde.hpp" #include "kde_model.hpp" diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index a769f220ca7..05482de9d1f 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -18,10 +18,8 @@ #include #include -// Include kernels -#include -#include -#include +// Include core +#include // Remaining includes #include From 5ab62347b0b747415be18244f674d100055e5b70 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 25 Dec 2018 16:55:31 +0100 Subject: [PATCH 104/150] Fix style issues --- src/mlpack/methods/kde/kde_rules_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index f5688e93117..5c1fe97282b 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -88,7 +88,7 @@ Score(const size_t queryIndex, TreeType& referenceNode) const double minDistance = referenceNode.MinDistance(queryPoint); const double maxKernel = kernel.Evaluate(minDistance); const double minKernel = - kernel.Evaluate(referenceNode.MaxDistance(queryPoint)); + kernel.Evaluate(referenceNode.MaxDistance(queryPoint)); const double bound = maxKernel - minKernel; if (tree::TreeTraits::FirstPointIsCentroid && @@ -169,7 +169,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) const double minDistance = queryNode.MinDistance(referenceNode); const double maxKernel = kernel.Evaluate(minDistance); const double minKernel = - kernel.Evaluate(queryNode.MaxDistance(referenceNode)); + kernel.Evaluate(queryNode.MaxDistance(referenceNode)); const double bound = maxKernel - minKernel; if (tree::TreeTraits::FirstPointIsCentroid) From 1d38bf4e1f900d1ac2d969cf58a8046157b73de0 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Tue, 25 Dec 2018 17:27:27 +0100 Subject: [PATCH 105/150] Improve KDE log messages --- src/mlpack/methods/kde/kde_impl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 8dfd6053b85..63d4285df66 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -306,12 +306,13 @@ Evaluate(Tree* queryTree, // Check querySet has at least 1 element to evaluate. if (queryTree->Dataset().n_cols == 0) { - Log::Warn << "querySet is empty" << std::endl; + Log::Warn << "KDE::Evaluate(): querySet is empty, no predictions will " + << "be returned" << std::endl; return; } // Check whether dimensions match. if (queryTree->Dataset().n_rows != referenceTree->Dataset().n_rows) - throw std::invalid_argument("cannot train KDE model: querySet and " + throw std::invalid_argument("cannot evaluate KDE model: querySet and " "referenceSet dimensions don't match"); Timer::Start("computing_kde"); From 0bb023aba67d0550db02450e9dd59ec700072d5c Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 29 Dec 2018 11:40:02 +0100 Subject: [PATCH 106/150] Refactor KDE main predictions output - Rename output for predictions - Write predictions as a column vector --- src/mlpack/methods/kde/kde_main.cpp | 16 ++++++++-------- src/mlpack/tests/main_tests/kde_test.cpp | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 4b7c39c4f52..d2cb9d2bd1b 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -51,9 +51,9 @@ PROGRAM_INFO("Kernel Density Estimation", "\n\n" + PRINT_CALL("kde", "reference", "ref_data", "query", "qu_data", "bandwidth", 0.2, "kernel", "epanechnikov", "tree", "kd-tree", "rel_error", - 0.05, "output", "out_data") + + 0.05, "predictions", "out_data") + "\n\n" - "the output density estimations will be stored in " + + "the predicted density estimations will be stored in " + PRINT_DATASET("out_data") + "." "\n" "If no " + PRINT_PARAM_STRING("query") + " is provided, then KDE will be " @@ -91,9 +91,9 @@ PARAM_DOUBLE_IN("abs_error", 0.0); // Maybe in the future it could be interesting to implement different metrics. -// Output options. -PARAM_MATRIX_OUT("output", "Matrix to store output estimations.", - "o"); +// Output predictions options. +PARAM_COL_OUT("predictions", "Vector to store density predictions.", + "p"); static void mlpackMain() { @@ -176,9 +176,9 @@ static void mlpackMain() else kde->Evaluate(estimations); - // Output results if needed. - if (CLI::HasParam("output")) - CLI::GetParam("output") = std::move(estimations); + // Output predictions if needed. + if (CLI::HasParam("predictions")) + CLI::GetParam("predictions") = std::move(estimations); // Save model. if (CLI::HasParam("output_model")) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 36a84cfc202..13a5914c2cc 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -84,7 +84,7 @@ BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) mlpackMain(); - mainEstimations = std::move(CLI::GetParam("output")); + mainEstimations = std::move(CLI::GetParam("predictions")); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) @@ -124,7 +124,7 @@ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) mlpackMain(); - mainEstimations = std::move(CLI::GetParam("output")); + mainEstimations = std::move(CLI::GetParam("predictions")); // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) @@ -165,7 +165,7 @@ BOOST_AUTO_TEST_CASE(KDEMonoResultsMain) mlpackMain(); - mainEstimations = std::move(CLI::GetParam("output")); + mainEstimations = std::move(CLI::GetParam("predictions")); // Check whether results are equal. for (size_t i = 0; i < reference.n_cols; ++i) @@ -199,7 +199,7 @@ BOOST_AUTO_TEST_CASE(KDEOutputSize) mlpackMain(); // Check number of output elements - BOOST_REQUIRE_EQUAL(CLI::GetParam("output").size(), samples); + BOOST_REQUIRE_EQUAL(CLI::GetParam("predictions").size(), samples); } /** @@ -221,7 +221,7 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) mlpackMain(); - arma::vec oldEstimations = std::move(CLI::GetParam("output")); + arma::vec oldEstimations = std::move(CLI::GetParam("predictions")); // Change parameters and load model CLI::GetSingleton().Parameters()["reference"].wasPassed = false; @@ -232,7 +232,7 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) mlpackMain(); - arma::vec newEstimations = std::move(CLI::GetParam("output")); + arma::vec newEstimations = std::move(CLI::GetParam("predictions")); // Check estimations are the same for (size_t i = 0; i < samples; ++i) From dee44680ca51d70d1201d23b6d91acf412b86c56 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 30 Dec 2018 01:04:53 +0100 Subject: [PATCH 107/150] Improve KDE predictions vector preparation --- src/mlpack/methods/kde/kde_impl.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 63d4285df66..1fb1ee11ebe 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -303,6 +303,11 @@ Evaluate(Tree* queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) { + // Get estimations vector ready. + estimations.clear(); + estimations.set_size(queryTree->Dataset().n_cols); + estimations.fill(arma::fill::zeros); + // Check querySet has at least 1 element to evaluate. if (queryTree->Dataset().n_cols == 0) { @@ -316,10 +321,6 @@ Evaluate(Tree* queryTree, "referenceSet dimensions don't match"); Timer::Start("computing_kde"); - // Get estimations vector ready. - estimations.clear(); - estimations.resize(queryTree->Dataset().n_cols); - estimations.fill(arma::fill::zeros); // Evaluate typedef KDERules RuleType; @@ -353,12 +354,12 @@ template:: Evaluate(arma::vec& estimations) { - Timer::Start("computing_kde"); // Get estimations vector ready. estimations.clear(); - estimations.resize(referenceTree->Dataset().n_cols); + estimations.set_size(referenceTree->Dataset().n_cols); estimations.fill(arma::fill::zeros); + Timer::Start("computing_kde"); // Evaluate typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), From e57fb1c384edfabc0e7bff16dc695def9fa734d1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 30 Dec 2018 01:06:09 +0100 Subject: [PATCH 108/150] Improve KDE EmptyQuerySetTest --- src/mlpack/tests/kde_test.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 2d230b91f80..092c8dd12f8 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -530,7 +530,8 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) { arma::mat reference = arma::randu(1, 10); arma::mat query; - arma::vec estimations = arma::vec(query.n_cols, arma::fill::zeros); + // Set estimations to the wrong size + arma::vec estimations(33, arma::fill::zeros); const double kernelBandwidth = 0.7; const double relError = 0.01; @@ -544,6 +545,8 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) kde(metric, kernel, relError, 0.0); kde.Train(reference); + // The query set must be empty + BOOST_REQUIRE_EQUAL(query.n_cols, 0); // When evaluating using the query dataset matrix BOOST_REQUIRE_NO_THROW(kde.Evaluate(query, estimations)); @@ -553,8 +556,10 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) Tree* queryTree = new Tree(query, oldFromNewQueries, 3); BOOST_REQUIRE_NO_THROW( kde.Evaluate(queryTree, oldFromNewQueries, estimations)); - delete queryTree; + + // Estimations must be empty + BOOST_REQUIRE_EQUAL(estimations.size(), 0); } /** From c4f501bb0dd1218bc769edc5b7f4d44a2ac609f1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 30 Dec 2018 19:26:57 +0100 Subject: [PATCH 109/150] Manage KDE normalizers using SFINAE --- src/mlpack/methods/kde/kde_model.hpp | 43 ++++++++++++++--------- src/mlpack/methods/kde/kde_model_impl.hpp | 27 -------------- 2 files changed, 27 insertions(+), 43 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 05482de9d1f..789784f9a2a 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -46,30 +46,41 @@ using KDEType = KDE + class HasNormalizer + { + private: + typedef char YesType[1]; + typedef char NoType[2]; + + template static YesType& test( decltype(&X::Normalizer) ) ; + template static NoType& test(...); + public: + enum { value = sizeof(test(0)) == sizeof(YesType) }; + }; + public: //! Normalization not needed. template static void ApplyNormalizer(KernelType& /* kernel */, const size_t /* dimension */, - arma::vec& /* estimations */) { return; } + arma::vec& /* estimations */, + const typename std::enable_if< + !HasNormalizer::value>::type* = 0) + { return; } - //! Normalize Gaussian Kernel. + //! Normalize kernels that have normalizer. template - static void ApplyNormalizer(kernel::GaussianKernel& kernel, + static void ApplyNormalizer(KernelType& kernel, const size_t dimension, - arma::vec& estimations); - - //! Normalize Epanechnikov Kernel. - template - static void ApplyNormalizer(kernel::EpanechnikovKernel& kernel, - const size_t dimension, - arma::vec& estimations); - - //! Normalize SphericalKernel Kernel. - template - static void ApplyNormalizer(kernel::SphericalKernel& kernel, - const size_t dimension, - arma::vec& estimations); + arma::vec& estimations, + const typename std::enable_if< + HasNormalizer::value>::type* = 0) + { + estimations /= kernel.Normalizer(dimension); + } }; /** diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index c5e52d1bfd2..5eda1bca415 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -239,33 +239,6 @@ inline void KDEModel::CleanMemory() boost::apply_visitor(DeleteVisitor(), kdeModel); } -// Gaussian KDE normalization -template -void KernelNormalizer::ApplyNormalizer(kernel::GaussianKernel& kernel, - const size_t dimension, - arma::vec& estimations) -{ - estimations /= kernel.Normalizer(dimension); -} - -// Epanechnikov KDE normalization -template -void KernelNormalizer::ApplyNormalizer(kernel::EpanechnikovKernel& kernel, - const size_t dimension, - arma::vec& estimations) -{ - estimations /= kernel.Normalizer(dimension); -} - -// Spherical KDE normalization -template -void KernelNormalizer::ApplyNormalizer(kernel::SphericalKernel& kernel, - const size_t dimension, - arma::vec& estimations) -{ - estimations /= kernel.Normalizer(dimension); -} - // Parameters for KDE evaluation DualMonoKDE::DualMonoKDE(arma::vec& estimations): estimations(estimations) From 972580f35902b6d6d59f9d0006c3742dff240694 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 3 Jan 2019 19:27:34 +0100 Subject: [PATCH 110/150] Compute centroids in KDEStat constructor --- src/mlpack/methods/kde/kde_rules_impl.hpp | 20 -------------------- src/mlpack/methods/kde/kde_stat.hpp | 14 +++++++++++++- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 5c1fe97282b..2139e7dd371 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -115,12 +115,6 @@ Score(const size_t queryIndex, TreeType& referenceNode) else { kde::KDEStat& referenceStat = referenceNode.Stat(); - if (!referenceStat.ValidCentroid()) - { - arma::vec referenceCenter; - referenceNode.Center(referenceCenter); - referenceStat.SetCentroid(std::move(referenceCenter)); - } kernelValue = EvaluateKernel(queryPoint, referenceStat.Centroid()); } @@ -203,20 +197,6 @@ Score(TreeType& queryNode, TreeType& referenceNode) // Sadly, we have no choice but to calculate the center. else { - // Calculate center for each node if it has not been calculated yet. - if (!referenceStat.ValidCentroid()) - { - arma::vec referenceCenter; - referenceNode.Center(referenceCenter); - referenceStat.SetCentroid(std::move(referenceCenter)); - } - if (!queryStat.ValidCentroid()) - { - arma::vec queryCenter; - queryNode.Center(queryCenter); - queryStat.SetCentroid(std::move(queryCenter)); - } - // Compute kernel value. kernelValue = EvaluateKernel(queryStat.Centroid(), referenceStat.Centroid()); } diff --git a/src/mlpack/methods/kde/kde_stat.hpp b/src/mlpack/methods/kde/kde_stat.hpp index e7d0bbc760a..92d6a118156 100644 --- a/src/mlpack/methods/kde/kde_stat.hpp +++ b/src/mlpack/methods/kde/kde_stat.hpp @@ -28,7 +28,19 @@ class KDEStat //! Initialization for a fully initialized node. template - KDEStat(TreeType& /* node */) : validCentroid(false) { } + KDEStat(TreeType& node) + { + // Calculate centroid if necessary. + if (!tree::TreeTraits::FirstPointIsCentroid) + { + node.Center(centroid); + validCentroid = true; + } + else + { + validCentroid = false; + } + } //! Get the centroid of the node. inline const arma::vec& Centroid() const From a3f101287c5c6ed5be50565a87467f63c6c16e19 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Thu, 3 Jan 2019 20:51:18 +0100 Subject: [PATCH 111/150] Save unnecessary calculations in KDE rules When calculations are duplicated don't calculate minKernel, maxKernel or bound --- src/mlpack/methods/kde/kde_rules_impl.hpp | 58 ++++++++++++----------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 2139e7dd371..f30bb49ba05 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -82,14 +82,10 @@ template double KDERules:: Score(const size_t queryIndex, TreeType& referenceNode) { - double score; - bool newCalculations = true; + double score, maxKernel, minKernel, bound; const arma::vec& queryPoint = querySet.unsafe_col(queryIndex); const double minDistance = referenceNode.MinDistance(queryPoint); - const double maxKernel = kernel.Evaluate(minDistance); - const double minKernel = - kernel.Evaluate(referenceNode.MaxDistance(queryPoint)); - const double bound = maxKernel - minKernel; + bool newCalculations = true; if (tree::TreeTraits::FirstPointIsCentroid && lastQueryIndex == queryIndex && @@ -101,9 +97,16 @@ Score(const size_t queryIndex, TreeType& referenceNode) lastQueryIndex = queryIndex; lastReferenceIndex = referenceNode.Point(0); } + else + { + // Calculations are new. + maxKernel = kernel.Evaluate(minDistance); + minKernel = kernel.Evaluate(referenceNode.MaxDistance(queryPoint)); + bound = maxKernel - minKernel; + } - if (bound <= (absError + relError * minKernel) / referenceSet.n_cols && - newCalculations) + if (newCalculations && + bound <= (absError + relError * minKernel) / referenceSet.n_cols) { double kernelValue; @@ -157,32 +160,33 @@ template inline double KDERules:: Score(TreeType& queryNode, TreeType& referenceNode) { - double score; + double score, maxKernel, minKernel, bound; + const double minDistance = queryNode.MinDistance(referenceNode); // Calculations are not duplicated. bool newCalculations = true; - const double minDistance = queryNode.MinDistance(referenceNode); - const double maxKernel = kernel.Evaluate(minDistance); - const double minKernel = - kernel.Evaluate(queryNode.MaxDistance(referenceNode)); - const double bound = maxKernel - minKernel; - if (tree::TreeTraits::FirstPointIsCentroid) + if (tree::TreeTraits::FirstPointIsCentroid && + (traversalInfo.LastQueryNode() != NULL) && + (traversalInfo.LastReferenceNode() != NULL) && + (traversalInfo.LastQueryNode()->Point(0) == queryNode.Point(0)) && + (traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0))) { - if ((traversalInfo.LastQueryNode() != NULL) && - (traversalInfo.LastReferenceNode() != NULL) && - (traversalInfo.LastQueryNode()->Point(0) == queryNode.Point(0)) && - (traversalInfo.LastReferenceNode()->Point(0) == referenceNode.Point(0))) - { - // Don't duplicate calculations. - newCalculations = false; - lastQueryIndex = queryNode.Point(0); - lastReferenceIndex = referenceNode.Point(0); - } + // Don't duplicate calculations. + newCalculations = false; + lastQueryIndex = queryNode.Point(0); + lastReferenceIndex = referenceNode.Point(0); + } + else + { + // Calculations are new. + maxKernel = kernel.Evaluate(minDistance); + minKernel = kernel.Evaluate(queryNode.MaxDistance(referenceNode)); + bound = maxKernel - minKernel; } // If possible, avoid some calculations because of the error tolerance - if (bound <= (absError + relError * minKernel) / referenceSet.n_cols && - newCalculations) + if (newCalculations && + bound <= (absError + relError * minKernel) / referenceSet.n_cols) { // Auxiliary variables. double kernelValue; From bd2e970568452930cb2390a83581e4d80710383d Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 02:10:19 +0100 Subject: [PATCH 112/150] Rearrange KDE predictions on evaluation It was previously done in KDE rules --- src/mlpack/methods/kde/kde.hpp | 6 ++++- src/mlpack/methods/kde/kde_impl.hpp | 31 ++++++++++++++++++++--- src/mlpack/methods/kde/kde_rules.hpp | 4 --- src/mlpack/methods/kde/kde_rules_impl.hpp | 25 +++--------------- 4 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 3791d0d38d1..a62ad405df2 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -243,7 +243,11 @@ class KDE bool trained; //! Check whether absolute and relative error values are compatible. - void CheckErrorValues(const double relError, const double absError) const; + static void CheckErrorValues(const double relError, const double absError); + + //! Rearrange estimations vector if required. + static void RearrangeEstimations(const std::vector& oldFromNew, + arma::vec& estimations); }; } // namespace kde diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 1fb1ee11ebe..888c3835252 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -329,7 +329,6 @@ Evaluate(Tree* queryTree, estimations, relError, absError, - oldFromNewQueries, *metric, *kernel, false); @@ -340,6 +339,9 @@ Evaluate(Tree* queryTree, estimations /= referenceTree->Dataset().n_cols; Timer::Stop("computing_kde"); + // Rearrange if necessary. + RearrangeEstimations(oldFromNewQueries, estimations); + Log::Info << rules.Scores() << " node combinations were scored." << std::endl; Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; } @@ -367,7 +369,6 @@ Evaluate(arma::vec& estimations) estimations, relError, absError, - *oldFromNewReferences, *metric, *kernel, true); @@ -378,6 +379,9 @@ Evaluate(arma::vec& estimations) estimations /= referenceTree->Dataset().n_cols; Timer::Stop("computing_kde"); + // Rearrange if necessary. + RearrangeEstimations(*oldFromNewReferences, estimations); + Log::Info << rules.Scores() << " node combinations were scored." << std::endl; Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; } @@ -459,7 +463,7 @@ template class TreeType, template class DualTreeTraversalType> void KDE:: -CheckErrorValues(const double relError, const double absError) const +CheckErrorValues(const double relError, const double absError) { if (relError < 0 || relError > 1) throw std::invalid_argument("Relative error tolerance must be a value " @@ -469,5 +473,26 @@ CheckErrorValues(const double relError, const double absError) const "greater or equal to 0"); } +template class TreeType, + template class DualTreeTraversalType> +void KDE:: +RearrangeEstimations(const std::vector& oldFromNew, + arma::vec& estimations) +{ + if (tree::TreeTraits::RearrangesDataset) + { + const size_t n_queries = oldFromNew.size(); + arma::vec rearranged_estimations(n_queries); + for (size_t i = 0; i < n_queries; ++i) + rearranged_estimations(oldFromNew.at(i)) = estimations(i); + estimations = std::move(rearranged_estimations); + } +} + } // namespace kde } // namespace mlpack diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index 2f5c1f75db8..a93c9956226 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -28,7 +28,6 @@ class KDERules arma::vec& densities, const double relError, const double absError, - const std::vector& oldFromNewQueries, MetricType& metric, KernelType& kernel, const bool sameSet); @@ -88,9 +87,6 @@ class KDERules //! Relatve error tolerance. const double relError; - //! New query dataset order. - const std::vector& oldFromNewQueries; - //! Instantiated metric. MetricType& metric; diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index f30bb49ba05..fb8b7f94b68 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -26,7 +26,6 @@ KDERules::KDERules( arma::vec& densities, const double relError, const double absError, - const std::vector& oldFromNewQueries, MetricType& metric, KernelType& kernel, const bool sameSet) : @@ -35,7 +34,6 @@ KDERules::KDERules( densities(densities), absError(absError), relError(relError), - oldFromNewQueries(oldFromNewQueries), metric(metric), kernel(kernel), sameSet(sameSet), @@ -66,10 +64,7 @@ double KDERules::BaseCase( // Calculations. double distance = metric.Evaluate(querySet.col(queryIndex), referenceSet.col(referenceIndex)); - if (tree::TreeTraits::RearrangesDataset) - densities(oldFromNewQueries.at(queryIndex)) += kernel.Evaluate(distance); - else - densities(queryIndex) += kernel.Evaluate(distance); + densities(queryIndex) += kernel.Evaluate(distance); ++baseCases; lastQueryIndex = queryIndex; @@ -121,16 +116,8 @@ Score(const size_t queryIndex, TreeType& referenceNode) kernelValue = EvaluateKernel(queryPoint, referenceStat.Centroid()); } - // Add kernel value to density estimations - if (tree::TreeTraits::RearrangesDataset) - { - densities(oldFromNewQueries.at(queryIndex)) += - referenceNode.NumDescendants() * kernelValue; - } - else - { - densities(queryIndex) += referenceNode.NumDescendants() * kernelValue; - } + densities(queryIndex) += referenceNode.NumDescendants() * kernelValue; + // Don't explore this tree branch score = DBL_MAX; } @@ -210,11 +197,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) // #pragma omp for for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { - if (tree::TreeTraits::RearrangesDataset) - densities(oldFromNewQueries.at(queryNode.Descendant(i))) += - referenceNode.NumDescendants() * kernelValue; - else - densities(queryNode.Descendant(i)) += + densities(queryNode.Descendant(i)) += referenceNode.NumDescendants() * kernelValue; } score = DBL_MAX; From 24653322f72f3ae9dc1288b83d5fa2fa6d6e7b24 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 14:04:45 +0100 Subject: [PATCH 113/150] Improve KDE KernelNormalizer SFINAE --- src/mlpack/methods/kde/kde_model.hpp | 39 ++++++++++++---------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 789784f9a2a..3cf4ffa91a3 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -47,37 +47,30 @@ using KDEType = KDE - class HasNormalizer - { - private: - typedef char YesType[1]; - typedef char NoType[2]; - - template static YesType& test( decltype(&X::Normalizer) ) ; - template static NoType& test(...); - public: - enum { value = sizeof(test(0)) == sizeof(YesType) }; - }; + // SFINAE helper to check if has a Normalizer function. + HAS_MEM_FUNC(Normalizer, HasNormalizer); public: //! Normalization not needed. template - static void ApplyNormalizer(KernelType& /* kernel */, - const size_t /* dimension */, - arma::vec& /* estimations */, - const typename std::enable_if< - !HasNormalizer::value>::type* = 0) + static void ApplyNormalizer( + KernelType& /* kernel */, + const size_t /* dimension */, + arma::vec& /* estimations */, + const typename std::enable_if< + !HasNormalizer::value>:: + type* = 0) { return; } //! Normalize kernels that have normalizer. template - static void ApplyNormalizer(KernelType& kernel, - const size_t dimension, - arma::vec& estimations, - const typename std::enable_if< - HasNormalizer::value>::type* = 0) + static void ApplyNormalizer( + KernelType& kernel, + const size_t dimension, + arma::vec& estimations, + const typename std::enable_if< + HasNormalizer::value>:: + type* = 0) { estimations /= kernel.Normalizer(dimension); } From 323fd1c32720b5c796df7d18882e46c65a160273 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 18:06:56 +0100 Subject: [PATCH 114/150] Add KDE class single-tree support --- src/mlpack/methods/kde/kde.hpp | 28 ++- src/mlpack/methods/kde/kde_impl.hpp | 286 ++++++++++++++++++++++------ 2 files changed, 256 insertions(+), 58 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index a62ad405df2..29b112c764f 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -21,6 +21,13 @@ namespace mlpack { namespace kde /** Kernel Density Estimation. */ { +//! KDEMode represents the ways in which KDE algorithm can be executed. +enum KDEMode +{ + DUAL_TREE_MODE, + SINGLE_TREE_MODE +}; + /** * The KDE class is a template class for performing Kernel Density Estimations. * In statistics, kernel density estimation, is a way to estimate the @@ -42,7 +49,11 @@ template class DualTreeTraversalType = TreeType::template DualTreeTraverser> + MatType>::template DualTreeTraverser, + template class SingleTreeTraversalType = + TreeType::template SingleTreeTraverser> class KDE { public: @@ -52,7 +63,7 @@ class KDE /** * Initialize KDE object with the default Kernel and Metric parameters. * Relative error tolernce is initialized to 0.05 (5%), absolute error - * tolerance is 0.0 and uses a depth-first approach. + * tolerance is 0.0 and uses a depth-first approach. Mode is dual-tree. */ KDE(); @@ -64,10 +75,12 @@ class KDE * @param bandwidth Bandwidth of the kernel. * @param relError Relative error tolerance of the model. * @param absError Absolute error tolerance of the model. + * @param mode Mode for the algorithm. */ KDE(const double bandwidth, const double relError = 0.05, - const double absError = 0); + const double absError = 0, + const KDEMode mode = DUAL_TREE_MODE); /** * Initialize KDE object using custom instantiated Metric and Kernel objects. @@ -76,11 +89,13 @@ class KDE * @param kernel Instantiated kernel object. * @param relError Relative error tolerance of the model. * @param absError Absolute error tolerance of the model. + * @param mode Mode for the algorithm. */ KDE(MetricType& metric, KernelType& kernel, const double relError = 0.05, - const double absError = 0); + const double absError = 0, + const KDEMode mode = DUAL_TREE_MODE); /** * Construct KDE object as a copy of the given model. This may be @@ -159,7 +174,7 @@ class KDE * * - Use std::move if the query tree is no longer needed. * - * @pre The model has to be previously trained. + * @pre The model has to be previously trained and mode has to be dual-tree. * @param queryTree Tree of query points to get the density of. * @param oldFromNewQueries Mappings of query points to the tree dataset. * @param estimations Object which will hold the density of each query point. @@ -242,6 +257,9 @@ class KDE //! If true, the KDE object is trained. bool trained; + //! Mode of the KDE algorithm. + KDEMode mode; + //! Check whether absolute and relative error values are compatible. static void CheckErrorValues(const double relError, const double absError); diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 888c3835252..4e6a3fb995f 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -44,8 +44,15 @@ template class TreeType, - template class DualTreeTraversalType> -KDE::KDE() : + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE:: +KDE() : kernel(new KernelType()), metric(new MetricType()), relError(0.05), @@ -53,7 +60,8 @@ KDE::KDE() : ownsKernel(true), ownsMetric(true), ownsReferenceTree(false), - trained(false) { } + trained(false), + mode(DUAL_TREE_MODE) { } template class TreeType, - template class DualTreeTraversalType> -KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE:: KDE(const double bandwidth, const double relError, - const double absError) : + const double absError, + const KDEMode mode) : kernel(new KernelType(bandwidth)), metric(new MetricType()), relError(relError), @@ -73,7 +88,8 @@ KDE(const double bandwidth, ownsKernel(true), ownsMetric(true), ownsReferenceTree(false), - trained(false) + trained(false), + mode(mode) { CheckErrorValues(relError, absError); } @@ -84,12 +100,19 @@ template class TreeType, - template class DualTreeTraversalType> -KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE:: KDE(MetricType& metric, KernelType& kernel, const double relError, - const double absError) : + const double absError, + const KDEMode mode) : kernel(&kernel), metric(&metric), relError(relError), @@ -97,7 +120,8 @@ KDE(MetricType& metric, ownsKernel(false), ownsMetric(false), ownsReferenceTree(false), - trained(false) + trained(false), + mode(mode) { CheckErrorValues(relError, absError); } @@ -108,8 +132,14 @@ template class TreeType, - template class DualTreeTraversalType> -KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE:: KDE(const KDE& other) : kernel(new KernelType(other.kernel)), metric(new MetricType(other.metric)), @@ -118,7 +148,8 @@ KDE(const KDE& other) : ownsKernel(other.ownsKernel), ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), - trained(other.trained) + trained(other.trained), + mode(other.mode) { if (trained) { @@ -141,8 +172,14 @@ template class TreeType, - template class DualTreeTraversalType> -KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE:: KDE(KDE&& other) : kernel(other.kernel), metric(other.metric), @@ -153,7 +190,8 @@ KDE(KDE&& other) : ownsKernel(other.ownsKernel), ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), - trained(other.trained) + trained(other.trained), + mode(other.mode) { other.kernel = new KernelType(); other.metric = new MetricType(); @@ -169,9 +207,20 @@ template class TreeType, - template class DualTreeTraversalType> -KDE& -KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE& +KDE:: operator=(KDE other) { // Clean memory @@ -196,6 +245,7 @@ operator=(KDE other) this->ownsMetric = other.ownsMetric; this->ownsReferenceTree = other.ownsReferenceTree; this->trained = other.trained; + this->mode = other.mode; return *this; } @@ -206,8 +256,15 @@ template class TreeType, - template class DualTreeTraversalType> -KDE::~KDE() + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +KDE:: +~KDE() { if (ownsKernel) delete kernel; @@ -226,8 +283,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: Train(MatType referenceSet) { // Check if referenceSet is not an empty set. @@ -254,8 +317,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: Train(Tree* referenceTree, std::vector* oldFromNewReferences) { // Check if referenceTree dataset is not an empty set. @@ -279,16 +348,70 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: Evaluate(MatType querySet, arma::vec& estimations) { - Timer::Start("building_query_tree"); - std::vector oldFromNewQueries; - Tree* queryTree = BuildTree(std::move(querySet), oldFromNewQueries); - Timer::Stop("building_query_tree"); - this->Evaluate(queryTree, oldFromNewQueries, estimations); - delete queryTree; + if (mode == DUAL_TREE_MODE) + { + Timer::Start("building_query_tree"); + std::vector oldFromNewQueries; + Tree* queryTree = BuildTree(std::move(querySet), oldFromNewQueries); + Timer::Stop("building_query_tree"); + this->Evaluate(queryTree, oldFromNewQueries, estimations); + delete queryTree; + } + else if (mode == SINGLE_TREE_MODE) + { + // Get estimations vector ready. + estimations.clear(); + estimations.set_size(querySet.n_cols); + estimations.fill(arma::fill::zeros); + + // Check querySet has at least 1 element to evaluate. + if (querySet.n_cols == 0) + { + Log::Warn << "KDE::Evaluate(): querySet is empty, no predictions will " + << "be returned" << std::endl; + return; + } + // Check whether dimensions match. + if (querySet.n_rows != referenceTree->Dataset().n_rows) + throw std::invalid_argument("cannot evaluate KDE model: querySet and " + "referenceSet dimensions don't match"); + + // Evaluate + typedef KDERules RuleType; + RuleType rules = RuleType(referenceTree->Dataset(), + querySet, + estimations, + relError, + absError, + *metric, + *kernel, + false); + + // Create traverser. + SingleTreeTraversalType traverser(rules); + + // Traverse for each point. + for (size_t i = 0; i < querySet.n_cols; ++i) + traverser.Traverse(i, *referenceTree); + + estimations /= referenceTree->Dataset().n_cols; + Timer::Stop("computing_kde"); + + Log::Info << rules.Scores() << " node combinations were scored." + << std::endl; + Log::Info << rules.BaseCases() << " base cases were calculated." + << std::endl; + } } template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: Evaluate(Tree* queryTree, const std::vector& oldFromNewQueries, arma::vec& estimations) @@ -319,6 +448,11 @@ Evaluate(Tree* queryTree, if (queryTree->Dataset().n_rows != referenceTree->Dataset().n_rows) throw std::invalid_argument("cannot evaluate KDE model: querySet and " "referenceSet dimensions don't match"); + // Check the mode is correct. + if (mode != DUAL_TREE_MODE) + throw std::invalid_argument("cannot evaluate KDE model: cannot use " + "a query tree when mode is different from " + "dual-tree"); Timer::Start("computing_kde"); @@ -352,8 +486,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: Evaluate(arma::vec& estimations) { // Get estimations vector ready. @@ -373,14 +513,23 @@ Evaluate(arma::vec& estimations) *kernel, true); - // Create traverser. - DualTreeTraversalType traverser(rules); - traverser.Traverse(*referenceTree, *referenceTree); - estimations /= referenceTree->Dataset().n_cols; - Timer::Stop("computing_kde"); + if (mode == DUAL_TREE_MODE) + { + // Create traverser. + DualTreeTraversalType traverser(rules); + traverser.Traverse(*referenceTree, *referenceTree); + } + else if (mode == SINGLE_TREE_MODE) + { + SingleTreeTraversalType traverser(rules); + for (size_t i = 0; i < referenceTree->Dataset().n_cols; ++i) + traverser.Traverse(i, *referenceTree); + } + estimations /= referenceTree->Dataset().n_cols; // Rearrange if necessary. RearrangeEstimations(*oldFromNewReferences, estimations); + Timer::Stop("computing_kde"); Log::Info << rules.Scores() << " node combinations were scored." << std::endl; Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; @@ -392,8 +541,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: RelativeError(const double newError) { CheckErrorValues(newError, absError); @@ -406,8 +561,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: AbsoluteError(const double newError) { CheckErrorValues(relError, newError); @@ -420,15 +581,22 @@ template class TreeType, - template class DualTreeTraversalType> + template class DualTreeTraversalType, + template class SingleTreeTraversalType> template -void KDE:: +void KDE:: serialize(Archive& ar, const unsigned int /* version */) { // Serialize preferences. ar & BOOST_SERIALIZATION_NVP(relError); ar & BOOST_SERIALIZATION_NVP(absError); ar & BOOST_SERIALIZATION_NVP(trained); + ar & BOOST_SERIALIZATION_NVP(mode); // If we are loading, clean up memory if necessary. if (Archive::is_loading::value) @@ -461,8 +629,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: CheckErrorValues(const double relError, const double absError) { if (relError < 0 || relError > 1) @@ -479,8 +653,14 @@ template class TreeType, - template class DualTreeTraversalType> -void KDE:: + template class DualTreeTraversalType, + template class SingleTreeTraversalType> +void KDE:: RearrangeEstimations(const std::vector& oldFromNew, arma::vec& estimations) { From 036a3a4fb49e1de901527ef930a804a290019799 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 19:35:06 +0100 Subject: [PATCH 115/150] Unify all KDE constructors --- src/mlpack/methods/kde/kde.hpp | 54 ++++---------- src/mlpack/methods/kde/kde_impl.hpp | 112 +++++----------------------- 2 files changed, 33 insertions(+), 133 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 29b112c764f..f2ba2930270 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -60,42 +60,20 @@ class KDE //! Convenience typedef. typedef TreeType Tree; - /** - * Initialize KDE object with the default Kernel and Metric parameters. - * Relative error tolernce is initialized to 0.05 (5%), absolute error - * tolerance is 0.0 and uses a depth-first approach. Mode is dual-tree. - */ - KDE(); - - /** - * Initialize KDE object using the default Metric parameters and a given - * Kernel bandwidth (only for kernels that require a bandwidth and are - * constructed like kernel(bandwidth)). - * - * @param bandwidth Bandwidth of the kernel. - * @param relError Relative error tolerance of the model. - * @param absError Absolute error tolerance of the model. - * @param mode Mode for the algorithm. - */ - KDE(const double bandwidth, - const double relError = 0.05, - const double absError = 0, - const KDEMode mode = DUAL_TREE_MODE); - /** * Initialize KDE object using custom instantiated Metric and Kernel objects. * - * @param metric Instantiated metric object. - * @param kernel Instantiated kernel object. * @param relError Relative error tolerance of the model. * @param absError Absolute error tolerance of the model. + * @param kernel Instantiated kernel object. * @param mode Mode for the algorithm. + * @param metric Instantiated metric object. */ - KDE(MetricType& metric, - KernelType& kernel, - const double relError = 0.05, + KDE(const double relError = 0.05, const double absError = 0, - const KDEMode mode = DUAL_TREE_MODE); + KernelType kernel = KernelType(), + const KDEMode mode = DUAL_TREE_MODE, + MetricType metric = MetricType()); /** * Construct KDE object as a copy of the given model. This may be @@ -196,10 +174,10 @@ class KDE void Evaluate(arma::vec& estimations); //! Get the kernel. - const KernelType& Kernel() const { return *kernel; } + const KernelType& Kernel() const { return kernel; } //! Modify the kernel. - KernelType& Kernel() { return *kernel; } + KernelType& Kernel() { return kernel; } //! Get the reference tree. Tree* ReferenceTree() { return referenceTree; } @@ -222,16 +200,22 @@ class KDE //! Check whether KDE model is trained or not. bool IsTrained() const { return trained; } + //! Get the mode of KDE. + KDEMode Mode() const { return mode; } + + //! Modify the mode of KDE. + KDEMode& Mode() { return mode; } + //! Serialize the model. template void serialize(Archive& ar, const unsigned int /* version */); private: //! Kernel. - KernelType* kernel; + KernelType kernel; //! Metric. - MetricType* metric; + MetricType metric; //! Reference tree. Tree* referenceTree; @@ -245,12 +229,6 @@ class KDE //! Absolute error tolerance. double absError; - //! If true, the KDE object is responsible for deleting the kernel. - bool ownsKernel; - - //! If true, the KDE object is responsible for deleting the metric. - bool ownsMetric; - //! If true, the KDE object is responsible for deleting the reference tree. bool ownsReferenceTree; diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 4e6a3fb995f..b7906341003 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -52,73 +52,15 @@ KDE:: -KDE() : - kernel(new KernelType()), - metric(new MetricType()), - relError(0.05), - absError(0.0), - ownsKernel(true), - ownsMetric(true), - ownsReferenceTree(false), - trained(false), - mode(DUAL_TREE_MODE) { } - -template class TreeType, - template class DualTreeTraversalType, - template class SingleTreeTraversalType> -KDE:: -KDE(const double bandwidth, - const double relError, - const double absError, - const KDEMode mode) : - kernel(new KernelType(bandwidth)), - metric(new MetricType()), - relError(relError), - absError(absError), - ownsKernel(true), - ownsMetric(true), - ownsReferenceTree(false), - trained(false), - mode(mode) -{ - CheckErrorValues(relError, absError); -} - -template class TreeType, - template class DualTreeTraversalType, - template class SingleTreeTraversalType> -KDE:: -KDE(MetricType& metric, - KernelType& kernel, - const double relError, +KDE(const double relError, const double absError, - const KDEMode mode) : - kernel(&kernel), - metric(&metric), + KernelType kernel, + const KDEMode mode, + MetricType metric) : + kernel(kernel), + metric(metric), relError(relError), absError(absError), - ownsKernel(false), - ownsMetric(false), ownsReferenceTree(false), trained(false), mode(mode) @@ -141,12 +83,10 @@ KDE:: KDE(const KDE& other) : - kernel(new KernelType(other.kernel)), - metric(new MetricType(other.metric)), + kernel(KernelType(other.kernel)), + metric(MetricType(other.metric)), relError(other.relError), absError(other.absError), - ownsKernel(other.ownsKernel), - ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), trained(other.trained), mode(other.mode) @@ -187,14 +127,12 @@ KDE(KDE&& other) : oldFromNewReferences(other.oldFromNewReferences), relError(other.relError), absError(other.absError), - ownsKernel(other.ownsKernel), - ownsMetric(other.ownsMetric), ownsReferenceTree(other.ownsReferenceTree), trained(other.trained), mode(other.mode) { - other.kernel = new KernelType(); - other.metric = new MetricType(); + other.kernel = KernelType(); + other.metric = MetricType(); other.referenceTree = nullptr; other.oldFromNewReferences = nullptr; other.ownsReferenceTree = false; @@ -224,10 +162,6 @@ KDEoldFromNewReferences = std::move(other.oldFromNewReferences); this->relError = other.relError; this->absError = other.absError; - this->ownsKernel = other.ownsKernel; - this->ownsMetric = other.ownsMetric; this->ownsReferenceTree = other.ownsReferenceTree; this->trained = other.trained; this->mode = other.mode; @@ -266,10 +198,6 @@ KDE:: ~KDE() { - if (ownsKernel) - delete kernel; - if (ownsMetric) - delete metric; if (ownsReferenceTree) { delete referenceTree; @@ -393,8 +321,8 @@ Evaluate(MatType querySet, arma::vec& estimations) estimations, relError, absError, - *metric, - *kernel, + metric, + kernel, false); // Create traverser. @@ -463,8 +391,8 @@ Evaluate(Tree* queryTree, estimations, relError, absError, - *metric, - *kernel, + metric, + kernel, false); // Create traverser. @@ -509,8 +437,8 @@ Evaluate(arma::vec& estimations) estimations, relError, absError, - *metric, - *kernel, + metric, + kernel, true); if (mode == DUAL_TREE_MODE) @@ -601,18 +529,12 @@ serialize(Archive& ar, const unsigned int /* version */) // If we are loading, clean up memory if necessary. if (Archive::is_loading::value) { - if (ownsKernel && kernel) - delete kernel; - if (ownsMetric && metric) - delete metric; if (ownsReferenceTree && referenceTree) { delete referenceTree; delete oldFromNewReferences; } - // After loading kernel, metric and tree, we own it. - ownsKernel = true; - ownsMetric = true; + // After loading tree, we own it. ownsReferenceTree = true; } From fc5bf64192bd121893ed621492d7748700cdd1e5 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 19:36:46 +0100 Subject: [PATCH 116/150] Adapt KDE tests to the new constructor --- src/mlpack/tests/kde_test.cpp | 36 ++++++++++++++---------- src/mlpack/tests/main_tests/kde_test.cpp | 6 ++-- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 092c8dd12f8..cab4b147d65 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::mat, GaussianKernel, KDTree> - kde(0.8, 0.0, 0.01); + kde(0.0, 0.01, GaussianKernel(0.8)); kde.Train(reference); kde.Evaluate(query, estimations); for (size_t i = 0; i < query.n_cols; ++i) @@ -121,7 +121,7 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, 0.0, 1e-6); + kde(0.0, 1e-6, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), estimations); for (size_t i = 0; i < query.n_cols; ++i) @@ -155,7 +155,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -192,7 +192,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) arma::mat, GaussianKernel, BallTree> - kde(kernelBandwidth, relError, 0.0); + kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), treeEstimations); @@ -229,7 +229,7 @@ BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) arma::mat, kernel::GaussianKernel, tree::Octree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -263,7 +263,7 @@ BOOST_AUTO_TEST_CASE(RTreeGaussianKDETest) arma::mat, kernel::GaussianKernel, tree::RTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -298,7 +298,7 @@ BOOST_AUTO_TEST_CASE(StandardCoverTreeGaussianKDETest) arma::mat, kernel::GaussianKernel, tree::StandardCoverTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -338,7 +338,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, relError, 0.0); + kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); @@ -373,7 +373,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) arma::mat, GaussianKernel, KDTree> - kde(kernelBandwidth, relError, 0.0); + kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, estimations); @@ -413,7 +413,7 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) tree::KDTree::template BreadthFirstDualTreeTraverser> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -447,7 +447,7 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -474,7 +474,7 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); // When training using the dataset matrix BOOST_REQUIRE_THROW(kde.Train(reference), std::invalid_argument); @@ -507,7 +507,7 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); // When evaluating using the query dataset matrix @@ -542,7 +542,7 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); // The query set must be empty @@ -575,7 +575,7 @@ BOOST_AUTO_TEST_CASE(SerializationTest) arma::mat, kernel::GaussianKernel, tree::KDTree> - kde(0.25, relError, absError); + kde(relError, absError, GaussianKernel(0.25)); kde.Train(reference); // Get estimations to compare. @@ -606,6 +606,12 @@ BOOST_AUTO_TEST_CASE(SerializationTest) BOOST_REQUIRE_EQUAL(kdeText.IsTrained(), true); BOOST_REQUIRE_EQUAL(kdeBinary.IsTrained(), true); + const KDEMode mode = KDEMode::DUAL_TREE_MODE; + BOOST_REQUIRE_EQUAL(kde.Mode(), mode); + BOOST_REQUIRE_EQUAL(kdeXml.Mode(), mode); + BOOST_REQUIRE_EQUAL(kdeText.Mode(), mode); + BOOST_REQUIRE_EQUAL(kdeBinary.Mode(), mode); + // Test if execution gives the same result. arma::vec xmlEstimations = arma::vec(query.n_cols, arma::fill::zeros); arma::vec textEstimations = arma::vec(query.n_cols, arma::fill::zeros); diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 13a5914c2cc..0c04652b31c 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) arma::mat, kernel::GaussianKernel, tree::RTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, kdeEstimations); // Normalize estimations @@ -110,7 +110,7 @@ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) arma::mat, kernel::TriangularKernel, tree::BallTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, kdeEstimations); @@ -149,7 +149,7 @@ BOOST_AUTO_TEST_CASE(KDEMonoResultsMain) arma::mat, kernel::EpanechnikovKernel, tree::StandardCoverTree> - kde(metric, kernel, relError, 0.0); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); // Perform monochromatic KDE. kde.Evaluate(kdeEstimations); From 21e4b89c2ceac1df833f9cbf4a6e39a7624770dd Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 19:38:55 +0100 Subject: [PATCH 117/150] Adapt KDEModel to the new constructor --- src/mlpack/methods/kde/kde_model.hpp | 5 ++- src/mlpack/methods/kde/kde_model_impl.hpp | 50 +++++++++++------------ 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 3cf4ffa91a3..3055ed57a2e 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -39,7 +39,10 @@ using KDEType = KDE::template DualTreeTraverser>; + arma::mat>::template DualTreeTraverser, + TreeType::template SingleTreeTraverser>; /** * KernerlNormalizer holds a set of methods to normalize estimations applying * in each case the appropiate kernel normalizer function. diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 5eda1bca415..51d8e1ba37b 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -90,127 +90,127 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) if (kernelType == GAUSSIAN_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::GaussianKernel(bandwidth)); } else if (kernelType == GAUSSIAN_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::GaussianKernel(bandwidth)); } else if (kernelType == GAUSSIAN_KERNEL && treeType == COVER_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::GaussianKernel(bandwidth)); } else if (kernelType == GAUSSIAN_KERNEL && treeType == OCTREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::GaussianKernel(bandwidth)); } else if (kernelType == GAUSSIAN_KERNEL && treeType == R_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::GaussianKernel(bandwidth)); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::EpanechnikovKernel(bandwidth)); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::EpanechnikovKernel(bandwidth)); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == COVER_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::EpanechnikovKernel(bandwidth)); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == OCTREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::EpanechnikovKernel(bandwidth)); } else if (kernelType == EPANECHNIKOV_KERNEL && treeType == R_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::EpanechnikovKernel(bandwidth)); } else if (kernelType == LAPLACIAN_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::LaplacianKernel(bandwidth)); } else if (kernelType == LAPLACIAN_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::LaplacianKernel(bandwidth)); } else if (kernelType == LAPLACIAN_KERNEL && treeType == COVER_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::LaplacianKernel(bandwidth)); } else if (kernelType == LAPLACIAN_KERNEL && treeType == OCTREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::LaplacianKernel(bandwidth)); } else if (kernelType == LAPLACIAN_KERNEL && treeType == R_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::LaplacianKernel(bandwidth)); } else if (kernelType == SPHERICAL_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::SphericalKernel(bandwidth)); } else if (kernelType == SPHERICAL_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::SphericalKernel(bandwidth)); } else if (kernelType == SPHERICAL_KERNEL && treeType == COVER_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::SphericalKernel(bandwidth)); } else if (kernelType == SPHERICAL_KERNEL && treeType == OCTREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::SphericalKernel(bandwidth)); } else if (kernelType == SPHERICAL_KERNEL && treeType == R_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::SphericalKernel(bandwidth)); } else if (kernelType == TRIANGULAR_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::TriangularKernel(bandwidth)); } else if (kernelType == TRIANGULAR_KERNEL && treeType == BALL_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::TriangularKernel(bandwidth)); } else if (kernelType == TRIANGULAR_KERNEL && treeType == COVER_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::TriangularKernel(bandwidth)); } else if (kernelType == TRIANGULAR_KERNEL && treeType == OCTREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::TriangularKernel(bandwidth)); } else if (kernelType == TRIANGULAR_KERNEL && treeType == R_TREE) { kdeModel = new KDEType - (bandwidth, relError, absError); + (relError, absError, kernel::TriangularKernel(bandwidth)); } TrainVisitor train(std::move(referenceSet)); From 83f3b11c580da2ea5c0f46d191fc67498ee54e23 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 20:34:48 +0100 Subject: [PATCH 118/150] Add KDEModel single-tree support --- src/mlpack/methods/kde/kde_model.hpp | 17 +++++++++++++++++ src/mlpack/methods/kde/kde_model_impl.hpp | 22 ++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 3055ed57a2e..5cef72b7279 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -170,6 +170,17 @@ class TrainVisitor : public boost::static_visitor TrainVisitor(arma::mat&& referenceSet); }; +/** + * ModeVisitor exposes the Mode() method of the KDEType. + */ +class ModeVisitor : public boost::static_visitor +{ + public: + //! Return mode of KDEType instance. + template + KDEMode& operator()(KDEType* kde) const; +}; + class DeleteVisitor : public boost::static_visitor { public: @@ -315,6 +326,12 @@ class KDEModel //! Modify the kernel type of the model. KernelTypes& KernelType() { return kernelType; } + //! Get the mode of the model. + KDEMode Mode() const; + + //! Modify de mode of the model. + KDEMode& Mode(); + /** * Build the KDE model with the given parameters and then trains it with the * given reference data. diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 51d8e1ba37b..1017241f50c 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -315,6 +315,28 @@ void DeleteVisitor::operator()(KDEType* kde) const delete kde; } +// Mode of model +template +KDEMode& ModeVisitor::operator()(KDEType* kde) const +{ + if (kde) + return kde->Mode(); + else + throw std::runtime_error("no KDE model initialized"); +} + +// Get mode of model +KDEMode KDEModel::Mode() const +{ + return boost::apply_visitor(ModeVisitor(), kdeModel); +} + +// Modify mode of model +KDEMode& KDEModel::Mode() +{ + return boost::apply_visitor(ModeVisitor(), kdeModel); +} + // Serialize the model. template void KDEModel::serialize(Archive& ar, const unsigned int /* version */) From 213c2078c3da340d30e2bb4f914ea5cc662565fe Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 20:35:34 +0100 Subject: [PATCH 119/150] Add KDEMain single-tree support --- src/mlpack/methods/kde/kde_main.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index d2cb9d2bd1b..16df42d1e47 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -81,6 +81,9 @@ PARAM_STRING_IN("kernel", "Kernel to use for the estimation" PARAM_STRING_IN("tree", "Tree to use for the estimation" "('kd-tree', 'ball-tree', 'cover-tree', 'octree', 'r-tree').", "t", "kd-tree"); +PARAM_STRING_IN("algorithm", "Algorithm to use for the estimation" + "('dual-tree', 'single-tree').", + "a", "dual-tree"); PARAM_DOUBLE_IN("rel_error", "Relative error tolerance for the result", "e", @@ -101,6 +104,7 @@ static void mlpackMain() const double bandwidth = CLI::GetParam("bandwidth"); const std::string kernelStr = CLI::GetParam("kernel"); const std::string treeStr = CLI::GetParam("tree"); + const std::string modeStr = CLI::GetParam("algorithm"); const double relError = CLI::GetParam("rel_error"); const double absError = CLI::GetParam("abs_error"); // Initialize results vector. @@ -118,6 +122,8 @@ static void mlpackMain() "laplacian", "spherical", "triangular" }, true, "unknown kernel type"); RequireParamInSet("tree", { "kd-tree", "ball-tree", "cover-tree", "octree", "r-tree"}, true, "unknown tree type"); + RequireParamInSet("algorithm", { "dual-tree", "single-tree"}, + true, "unknown algorithm"); RequireParamValue("rel_error", [](double x){return x >= 0 && x <= 1;}, true, "relative error must be between 0 and 1"); RequireParamValue("abs_error", [](double x){return x >= 0;}, @@ -161,6 +167,12 @@ static void mlpackMain() // Build model kde->BuildModel(std::move(reference)); + + // Set Mode + if (modeStr == "dual-tree") + kde->Mode() = KDEMode::DUAL_TREE_MODE; + else if (modeStr == "single-tree") + kde->Mode() = KDEMode::SINGLE_TREE_MODE; } else { @@ -168,6 +180,7 @@ static void mlpackMain() kde = CLI::GetParam("input_model"); } + // Evaluation if (CLI::HasParam("query")) { arma::mat query = std::move(CLI::GetParam("query")); From 4082f6ffc99af5ddb2918b7b11bfba981a6d69f4 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 20:37:26 +0100 Subject: [PATCH 120/150] Add GaussianSingleKDEBruteForceTest --- src/mlpack/tests/kde_test.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index cab4b147d65..843f5417e28 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -164,6 +164,40 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test single-tree implementation results against brute force results. + */ +BOOST_AUTO_TEST_CASE(GaussianSingleKDEBruteForceTest) +{ + arma::mat reference = arma::randu(2, 300); + arma::mat query = arma::randu(2, 100); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 0.3; + const double relError = 0.01; + + // Brute force KDE + GaussianKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + /** * Test BallTree dual-tree implementation results against brute force results. */ From 565c8ec52782b5abbc173fcb976b7932352fefd0 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 20:39:11 +0100 Subject: [PATCH 121/150] Add KDEGaussianSingleKDTreeResultsMain --- src/mlpack/tests/main_tests/kde_test.cpp | 42 ++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 0c04652b31c..c4613facc93 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -239,4 +239,46 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) BOOST_REQUIRE_CLOSE(oldEstimations[i], newEstimations[i], relError); } +/** + * Ensure that the estimations we get for KDEMain, are the same as the ones we + * get from the KDE class without any wrappers using single-tree mode. + **/ +BOOST_AUTO_TEST_CASE(KDEGaussianSingleKDTreeResultsMain) +{ + // Datasets + arma::mat reference = arma::randu(3, 400); + arma::mat query = arma::randu(3, 400); + arma::vec kdeEstimations, mainEstimations; + double kernelBandwidth = 3.0; + double relError = 0.06; + + kernel::GaussianKernel kernel(kernelBandwidth); + metric::EuclideanDistance metric; + KDE + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde.Train(reference); + kde.Evaluate(query, kdeEstimations); + kdeEstimations /= kernel.Normalizer(reference.n_rows); + + // Main estimations + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("kernel", std::string("gaussian")); + SetInputParam("tree", std::string("kd-tree")); + SetInputParam("algorithm", std::string("single-tree")); + SetInputParam("rel_error", relError); + SetInputParam("bandwidth", kernelBandwidth); + + mlpackMain(); + + mainEstimations = std::move(CLI::GetParam("predictions")); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); +} + BOOST_AUTO_TEST_SUITE_END(); From bb4b1754b402c6723956b8e7ec59f4c3ffe67f33 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 4 Jan 2019 20:50:13 +0100 Subject: [PATCH 122/150] Fix computing_kde timer --- src/mlpack/methods/kde/kde_impl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index b7906341003..300c1b54175 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -314,6 +314,7 @@ Evaluate(MatType querySet, arma::vec& estimations) throw std::invalid_argument("cannot evaluate KDE model: querySet and " "referenceSet dimensions don't match"); + Timer::Start("computing_kde"); // Evaluate typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), From d9cb3baeefae21d0ab07237be70f0489231b66a7 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 15:23:12 +0100 Subject: [PATCH 123/150] Add KDEMainInvalidKernel test --- src/mlpack/tests/main_tests/kde_test.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index c4613facc93..f970ef62197 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -281,4 +281,22 @@ BOOST_AUTO_TEST_CASE(KDEGaussianSingleKDTreeResultsMain) BOOST_REQUIRE_CLOSE(kdeEstimations[i], mainEstimations[i], relError); } +/** + * Ensure we get an exception when an invalid kernel is specified. + **/ +BOOST_AUTO_TEST_CASE(KDEMainInvalidKernel) +{ + arma::mat reference = arma::randu(2, 10); + arma::mat query = arma::randu(2, 5); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("kernel", std::string("linux")); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + BOOST_AUTO_TEST_SUITE_END(); From 84e81ae8ba81ec0641afc4ccb4c5bc6e7e4368d1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 15:27:54 +0100 Subject: [PATCH 124/150] Add KDEMainInvalidTree test --- src/mlpack/tests/main_tests/kde_test.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index f970ef62197..1c3d884e3ff 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -299,4 +299,22 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidKernel) Log::Fatal.ignoreInput = false; } +/** + * Ensure we get an exception when an invalid tree is specified. + **/ +BOOST_AUTO_TEST_CASE(KDEMainInvalidTree) +{ + arma::mat reference = arma::randu(2, 10); + arma::mat query = arma::randu(2, 5); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("tree", std::string("olive")); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + BOOST_AUTO_TEST_SUITE_END(); From 82c3fb5085ae44db00b97762be4e4666a30f1f0e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 15:34:39 +0100 Subject: [PATCH 125/150] Add KDEMainInvalidAlgorithm test --- src/mlpack/tests/main_tests/kde_test.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 1c3d884e3ff..27bf445e16f 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -317,4 +317,22 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidTree) Log::Fatal.ignoreInput = false; } +/** + * Ensure we get an exception when an invalid algorithm is specified. + **/ +BOOST_AUTO_TEST_CASE(KDEMainInvalidAlgorithm) +{ + arma::mat reference = arma::randu(2, 10); + arma::mat query = arma::randu(2, 5); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("algorithm", std::string("bogosort")); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + BOOST_AUTO_TEST_SUITE_END(); From 27d6d5e11b4ca3471c2d2a3f10c5691b612ea1e5 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 15:45:16 +0100 Subject: [PATCH 126/150] Add KDEMainReferenceAndModel test --- src/mlpack/tests/main_tests/kde_test.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 27bf445e16f..cbd3cd41d63 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -335,4 +335,24 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidAlgorithm) Log::Fatal.ignoreInput = false; } +/** + * Ensure we get an exception when both reference and input_model are + * specified. + **/ +BOOST_AUTO_TEST_CASE(KDEMainReferenceAndModel) +{ + arma::mat reference = arma::randu(2, 10); + arma::mat query = arma::randu(2, 5); + KDEModel* model = new KDEModel(); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + SetInputParam("input_model", model); + + Log::Fatal.ignoreInput = true; + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + Log::Fatal.ignoreInput = false; +} + BOOST_AUTO_TEST_SUITE_END(); From 1d12b6e83ad5947dfaf14cc128925de29bacec95 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 18:08:15 +0100 Subject: [PATCH 127/150] Improve KDE main docs --- src/mlpack/methods/kde/kde_main.cpp | 34 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index 16df42d1e47..d5900e75b61 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -28,10 +28,10 @@ PROGRAM_INFO("Kernel Density Estimation", "by applying a kernel function to each reference point. The computational " "complexity of this is O(N^2) where there are N query points and N " "reference points, but this implementation will typically see better " - "performance as it uses an approximate dual-tree algorithm for " + "performance as it uses an approximate dual or single tree algorithm for " "acceleration." "\n\n" - "Dual-tree optimization allows to avoid lots of barely relevant " + "Dual or single tree optimization allows to avoid lots of barely relevant " "calculations (as kernel function values decrease with distance), so it is " "an approximate computation. You can specify the maximum relative error " "tolerance for each query value with " + PRINT_PARAM_STRING("rel_error") + @@ -40,14 +40,16 @@ PROGRAM_INFO("Kernel Density Estimation", "metric. Kernel function can be selected using the " + PRINT_PARAM_STRING("kernel") + " option. You can also choose what which " "type of tree to use for the dual-tree algorithm with " + - PRINT_PARAM_STRING("tree") + + PRINT_PARAM_STRING("tree") + ". It is also possible to select whether to " + "use dual-tree algorithm or single-tree algorithm using the " + + PRINT_PARAM_STRING("algorithm") + " option." "\n\n" "For example, the following will run KDE using the data in " + PRINT_DATASET("ref_data") + " for training and the data in " + PRINT_DATASET("qu_data") + " as query data. It will apply an Epanechnikov " "kernel with a 0.2 bandwidth to each reference point and use a KD-Tree for " - "the dual-tree optimization. The returned results will be within 5% of the " - "real KDE value for each query point." + "the dual-tree optimization. The returned predictions will be within 5% of " + "the real KDE value for each query point." "\n\n" + PRINT_CALL("kde", "reference", "ref_data", "query", "qu_data", "bandwidth", 0.2, "kernel", "epanechnikov", "tree", "kd-tree", "rel_error", @@ -57,12 +59,15 @@ PROGRAM_INFO("Kernel Density Estimation", PRINT_DATASET("out_data") + "." "\n" "If no " + PRINT_PARAM_STRING("query") + " is provided, then KDE will be " - "computed on the " + PRINT_PARAM_STRING("reference") + " dataset."); + "computed on the " + PRINT_PARAM_STRING("reference") + " dataset." + "\n" + "It is possible to select either a reference dataset or an input model " + "but not both at the same time."); // Required options. -PARAM_MATRIX_IN("reference", "Input dataset to KDE on.", "r"); +PARAM_MATRIX_IN("reference", "Input reference dataset use for KDE.", "r"); PARAM_MATRIX_IN("query", "Query dataset to KDE on.", "q"); -PARAM_DOUBLE_IN("bandwidth", "Bandwidth of the kernel", "b", 1.0); +PARAM_DOUBLE_IN("bandwidth", "Bandwidth of the kernel.", "b", 1.0); // Load or save models. PARAM_MODEL_IN(KDEModel, @@ -75,29 +80,30 @@ PARAM_MODEL_OUT(KDEModel, "M"); // Configuration options -PARAM_STRING_IN("kernel", "Kernel to use for the estimation" +PARAM_STRING_IN("kernel", "Kernel to use for the prediction." "('gaussian', 'epanechnikov', 'laplacian', 'spherical', 'triangular').", "k", "gaussian"); -PARAM_STRING_IN("tree", "Tree to use for the estimation" +PARAM_STRING_IN("tree", "Tree to use for the prediction." "('kd-tree', 'ball-tree', 'cover-tree', 'octree', 'r-tree').", "t", "kd-tree"); -PARAM_STRING_IN("algorithm", "Algorithm to use for the estimation" +PARAM_STRING_IN("algorithm", "Algorithm to use for the prediction." "('dual-tree', 'single-tree').", "a", "dual-tree"); PARAM_DOUBLE_IN("rel_error", - "Relative error tolerance for the result", + "Relative error tolerance for the prediction.", "e", 0.05); PARAM_DOUBLE_IN("abs_error", - "Relative error tolerance for the result", + "Relative error tolerance for the prediction.", "E", 0.0); -// Maybe in the future it could be interesting to implement different metrics. // Output predictions options. PARAM_COL_OUT("predictions", "Vector to store density predictions.", "p"); +// Maybe, in the future, it could be interesting to implement different metrics. + static void mlpackMain() { // Get some parameters. From cc74c27e4b9e6687516ac9e410632ce6b14995a6 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 18:23:08 +0100 Subject: [PATCH 128/150] Add KDEMainInvalidAbsoluteError test --- src/mlpack/tests/main_tests/kde_test.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index cbd3cd41d63..20c0a3b1fb7 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -355,4 +355,27 @@ BOOST_AUTO_TEST_CASE(KDEMainReferenceAndModel) Log::Fatal.ignoreInput = false; } +/** + * Ensure we get an exception when an invalid absolute error is specified. + **/ +BOOST_AUTO_TEST_CASE(KDEMainInvalidAbsoluteError) +{ + arma::mat reference = arma::randu(1, 10); + arma::mat query = arma::randu(1, 5); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + + Log::Fatal.ignoreInput = true; + // Invalid value + SetInputParam("abs_error", -0.1); + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + + // Valid value + SetInputParam("abs_error", 5.8); + BOOST_REQUIRE_NO_THROW(mlpackMain()); + Log::Fatal.ignoreInput = false; +} + BOOST_AUTO_TEST_SUITE_END(); From dd3a1f9933dcde58cb5df73c6fc122d3ba998fa1 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 18:23:43 +0100 Subject: [PATCH 129/150] Add KDEMainInvalidRelativeError test --- src/mlpack/tests/main_tests/kde_test.cpp | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 20c0a3b1fb7..26f37e5f472 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -378,4 +378,31 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidAbsoluteError) Log::Fatal.ignoreInput = false; } +/** + * Ensure we get an exception when an invalid relative error is specified. + **/ +BOOST_AUTO_TEST_CASE(KDEMainInvalidRelativeError) +{ + arma::mat reference = arma::randu(1, 10); + arma::mat query = arma::randu(1, 5); + + // Main params + SetInputParam("reference", reference); + SetInputParam("query", query); + + Log::Fatal.ignoreInput = true; + // Invalid under 0. + SetInputParam("rel_error", -0.1); + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + + // Invalid over 1. + SetInputParam("rel_error", 1.1); + BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); + + // Valid value + SetInputParam("rel_error", 0.3); + BOOST_REQUIRE_NO_THROW(mlpackMain()); + Log::Fatal.ignoreInput = false; +} + BOOST_AUTO_TEST_SUITE_END(); From 83c5a4e923879be78610d12c59fcd492fbc7fcfa Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 18:36:18 +0100 Subject: [PATCH 130/150] Add EpanechnikovCoverSingleKDETest test --- src/mlpack/tests/kde_test.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 843f5417e28..f6812c62dab 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -198,6 +198,41 @@ BOOST_AUTO_TEST_CASE(GaussianSingleKDEBruteForceTest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test single-tree implementation results against brute force results using + * a cover-tree and Epanechnikov kernel. + */ +BOOST_AUTO_TEST_CASE(EpanechnikovCoverSingleKDETest) +{ + arma::mat reference = arma::randu(2, 300); + arma::mat query = arma::randu(2, 100); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 1.1; + const double relError = 0.08; + + // Brute force KDE + EpanechnikovKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + /** * Test BallTree dual-tree implementation results against brute force results. */ From b9e26e25d75f29e89c0ec7f9420c04ebd77097b4 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sat, 5 Jan 2019 18:40:47 +0100 Subject: [PATCH 131/150] Add EpanechnikovOctreeSingleKDETest test --- src/mlpack/tests/kde_test.cpp | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index f6812c62dab..f051726342d 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -233,6 +233,41 @@ BOOST_AUTO_TEST_CASE(EpanechnikovCoverSingleKDETest) BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); } +/** + * Test single-tree implementation results against brute force results using + * an octree and Epanechnikov kernel. + */ +BOOST_AUTO_TEST_CASE(EpanechnikovOctreeSingleKDETest) +{ + arma::mat reference = arma::randu(2, 300); + arma::mat query = arma::randu(2, 100); + arma::vec bfEstimations = arma::vec(query.n_cols, arma::fill::zeros); + arma::vec treeEstimations = arma::vec(query.n_cols, arma::fill::zeros); + const double kernelBandwidth = 1.0; + const double relError = 0.05; + + // Brute force KDE + EpanechnikovKernel kernel(kernelBandwidth); + BruteForceKDE(reference, + query, + bfEstimations, + kernel); + + // Optimized KDE + metric::EuclideanDistance metric; + KDE + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde.Train(reference); + kde.Evaluate(query, treeEstimations); + + // Check whether results are equal. + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); +} + /** * Test BallTree dual-tree implementation results against brute force results. */ From cf94b96a3f7e4c9ac7fa52c6f041fc1ec3676f8e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 6 Jan 2019 02:20:31 +0100 Subject: [PATCH 132/150] Fix KDE tests error tolerance Boost error tolerance argument is measured in % --- src/mlpack/tests/kde_test.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index f051726342d..0dd2f690869 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -161,7 +161,7 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -195,7 +195,7 @@ BOOST_AUTO_TEST_CASE(GaussianSingleKDEBruteForceTest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -230,7 +230,7 @@ BOOST_AUTO_TEST_CASE(EpanechnikovCoverSingleKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -265,7 +265,7 @@ BOOST_AUTO_TEST_CASE(EpanechnikovOctreeSingleKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -302,7 +302,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); delete queryTree; delete referenceTree; @@ -339,7 +339,7 @@ BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -373,7 +373,7 @@ BOOST_AUTO_TEST_CASE(RTreeGaussianKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -408,7 +408,7 @@ BOOST_AUTO_TEST_CASE(StandardCoverTreeGaussianKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -448,7 +448,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); delete queryTree; delete referenceTree; @@ -482,7 +482,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) kde.Evaluate(queryTree, oldFromNewQueries, estimations); // Check whether results are equal. - BOOST_REQUIRE_CLOSE(estimations[2], estimations[3], relError); + BOOST_REQUIRE_CLOSE(estimations[2], estimations[3], relError*100); delete queryTree; delete referenceTree; @@ -523,7 +523,7 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -557,7 +557,7 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) // Check whether results are equal. for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError); + BOOST_REQUIRE_CLOSE(bfEstimations[i], treeEstimations[i], relError*100); } /** @@ -727,9 +727,9 @@ BOOST_AUTO_TEST_CASE(SerializationTest) for (size_t i = 0; i < query.n_cols; ++i) { - BOOST_REQUIRE_CLOSE(estimations[i], xmlEstimations[i], relError); - BOOST_REQUIRE_CLOSE(estimations[i], textEstimations[i], relError); - BOOST_REQUIRE_CLOSE(estimations[i], binEstimations[i], relError); + BOOST_REQUIRE_CLOSE(estimations[i], xmlEstimations[i], relError*100); + BOOST_REQUIRE_CLOSE(estimations[i], textEstimations[i], relError*100); + BOOST_REQUIRE_CLOSE(estimations[i], binEstimations[i], relError*100); } } From 7380f0650a308901a31739cdaccb9bdfcbaa99db Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 6 Jan 2019 16:01:48 +0100 Subject: [PATCH 133/150] Fix KDE copy constructor --- src/mlpack/methods/kde/kde_impl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 300c1b54175..4b3298381ea 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -95,8 +95,9 @@ KDE(const KDE& other) : { if (ownsReferenceTree) { - oldFromNewReferences = new std::vector; - referenceTree = new Tree(other.referenceTree, *oldFromNewReferences); + oldFromNewReferences = + new std::vector(*other.oldFromNewReferences); + referenceTree = new Tree(*other.referenceTree); } else { From cb45c43cc673c1b9df50b540be9fc1546eebc254 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 6 Jan 2019 16:02:38 +0100 Subject: [PATCH 134/150] Add KDE CopyConstructor test --- src/mlpack/tests/kde_test.cpp | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 0dd2f690869..84ecbc9f797 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -733,4 +733,41 @@ BOOST_AUTO_TEST_CASE(SerializationTest) } } +/** + * Test if the copy constructor and copy operator works properly. + */ +BOOST_AUTO_TEST_CASE(CopyConstructor) +{ + arma::mat reference = arma::randu(2, 300); + arma::mat query = arma::randu(2, 100); + arma::vec estimations1, estimations2, estimations3; + const double kernelBandwidth = 1.5; + const double relError = 0.05; + + typedef KDE + KDEType; + + // KDE + KDEType kde(relError, 0, kernel::GaussianKernel(kernelBandwidth)); + kde.Train(std::move(reference)); + + // Copy constructor KDE + KDEType constructor(kde); + + // Copy operator KDE + KDEType oper = kde; + + // Evaluations + kde.Evaluate(query, estimations1); + constructor.Evaluate(query, estimations2); + oper.Evaluate(query, estimations3); + + // Check results + for (size_t i = 0; i < query.n_cols; ++i) + { + BOOST_REQUIRE_CLOSE(estimations1[i], estimations2[i], 1e-10); + BOOST_REQUIRE_CLOSE(estimations2[i], estimations3[i], 1e-10); + } +} + BOOST_AUTO_TEST_SUITE_END(); From 239899704bb73d6da19997624b05bd8f5008ff7e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 6 Jan 2019 18:03:37 +0100 Subject: [PATCH 135/150] Change KDE template order Now KernelType is the first argument for the templates. This makes a more friendly interface. --- src/mlpack/methods/kde/kde.hpp | 4 +- src/mlpack/methods/kde/kde_impl.hpp | 124 ++++++++++++++-------------- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index f2ba2930270..13aac4f579f 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -40,9 +40,9 @@ enum KDEMode * @tparam KernelType Kernel function to use for KDE calculations. * @tparam TreeType Type of tree to use; must satisfy the TreeType policy API. */ -template class TreeType = tree::KDTree, diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 4b3298381ea..03319d371f3 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -38,17 +38,17 @@ TreeType* BuildTree( return new TreeType(std::forward(dataset)); } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -KDE:: @@ -68,17 +68,17 @@ KDE(const double relError, CheckErrorValues(relError, absError); } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -KDE:: @@ -107,17 +107,17 @@ KDE(const KDE& other) : } } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -KDE:: @@ -140,23 +140,23 @@ KDE(KDE&& other) : other.trained = false; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -KDE& -KDE:: @@ -183,17 +183,17 @@ operator=(KDE other) return *this; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -KDE:: @@ -206,17 +206,17 @@ KDE class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -240,17 +240,17 @@ Train(MatType referenceSet) this->trained = true; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -271,17 +271,17 @@ Train(Tree* referenceTree, std::vector* oldFromNewReferences) this->trained = true; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -344,17 +344,17 @@ Evaluate(MatType querySet, arma::vec& estimations) } } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -410,17 +410,17 @@ Evaluate(Tree* queryTree, Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -465,17 +465,17 @@ Evaluate(arma::vec& estimations) Log::Info << rules.BaseCases() << " base cases were calculated." << std::endl; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -485,17 +485,17 @@ RelativeError(const double newError) relError = newError; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -505,18 +505,18 @@ AbsoluteError(const double newError) absError = newError; } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> template -void KDE:: @@ -547,17 +547,17 @@ serialize(Archive& ar, const unsigned int /* version */) ar & BOOST_SERIALIZATION_NVP(oldFromNewReferences); } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: @@ -571,17 +571,17 @@ CheckErrorValues(const double relError, const double absError) "greater or equal to 0"); } -template class TreeType, template class DualTreeTraversalType, template class SingleTreeTraversalType> -void KDE:: From 658a05a5148e4018ef27d8422194d487a039b387 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 6 Jan 2019 18:04:41 +0100 Subject: [PATCH 136/150] Adapt KDEModel to new KDE template order --- src/mlpack/methods/kde/kde_model.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 5cef72b7279..958f90f67ed 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -33,9 +33,9 @@ template class TreeType> -using KDEType = KDE Date: Sun, 6 Jan 2019 18:05:11 +0100 Subject: [PATCH 137/150] Adapt KDE tests to new KDE template order --- src/mlpack/tests/kde_test.cpp | 78 +++++++++++++++++------------------ 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 84ecbc9f797..e3fe9d1fae7 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -72,9 +72,9 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) 0.00167470061366603324010116, 0.07658867126520703394465527, 0.01028120384800740999553525}; - KDE kde(0.0, 0.01, GaussianKernel(0.8)); kde.Train(reference); @@ -117,9 +117,9 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); - KDE kde(0.0, 1e-6, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); @@ -151,9 +151,9 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -185,9 +185,9 @@ BOOST_AUTO_TEST_CASE(GaussianSingleKDEBruteForceTest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); @@ -220,9 +220,9 @@ BOOST_AUTO_TEST_CASE(EpanechnikovCoverSingleKDETest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); @@ -255,9 +255,9 @@ BOOST_AUTO_TEST_CASE(EpanechnikovOctreeSingleKDETest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); @@ -292,9 +292,9 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); - KDE kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); @@ -329,9 +329,9 @@ BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -363,9 +363,9 @@ BOOST_AUTO_TEST_CASE(RTreeGaussianKDETest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -398,9 +398,9 @@ BOOST_AUTO_TEST_CASE(StandardCoverTreeGaussianKDETest) // Optimized KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -438,9 +438,9 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); - KDE kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); @@ -473,9 +473,9 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); - KDE kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); @@ -510,9 +510,9 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) // Breadth-First KDE metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -574,9 +574,9 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) // KDE metric::EuclideanDistance metric; GaussianKernel kernel(kernelBandwidth); - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); @@ -607,9 +607,9 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) // KDE metric::EuclideanDistance metric; GaussianKernel kernel(kernelBandwidth); - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -642,9 +642,9 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) // KDE metric::EuclideanDistance metric; GaussianKernel kernel(kernelBandwidth); - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -675,9 +675,9 @@ BOOST_AUTO_TEST_CASE(SerializationTest) const double relError = 0.25; const double absError = 0.0; arma::mat reference = arma::randu(4, 800); - KDE kde(relError, absError, GaussianKernel(0.25)); kde.Train(reference); @@ -688,9 +688,9 @@ BOOST_AUTO_TEST_CASE(SerializationTest) kde.Evaluate(query, estimations); // Initialize serialized objects. - KDE kdeXml, kdeText, kdeBinary; SerializeObjectAll(kde, kdeXml, kdeText, kdeBinary); @@ -744,7 +744,7 @@ BOOST_AUTO_TEST_CASE(CopyConstructor) const double kernelBandwidth = 1.5; const double relError = 0.05; - typedef KDE + typedef KDE KDEType; // KDE From 0a588e39f9dfacebda46dcebae59dd9ad1027256 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 6 Jan 2019 18:05:36 +0100 Subject: [PATCH 138/150] Adapt KDE main tests to new KDE template order --- src/mlpack/tests/main_tests/kde_test.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 26f37e5f472..5676f0eb3a2 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -64,9 +64,9 @@ BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) kernel::GaussianKernel kernel(kernelBandwidth); metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -106,9 +106,9 @@ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) kernel::TriangularKernel kernel(kernelBandwidth); metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -145,9 +145,9 @@ BOOST_AUTO_TEST_CASE(KDEMonoResultsMain) kernel::EpanechnikovKernel kernel(kernelBandwidth); metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); @@ -254,9 +254,9 @@ BOOST_AUTO_TEST_CASE(KDEGaussianSingleKDTreeResultsMain) kernel::GaussianKernel kernel(kernelBandwidth); metric::EuclideanDistance metric; - KDE kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); From 0ac2843fa1a95c80ae3a1662b22fd4388a69148d Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 14:43:24 +0100 Subject: [PATCH 139/150] Add methods to get and modify KDE metric --- src/mlpack/methods/kde/kde.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 13aac4f579f..080d2c2e593 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -179,6 +179,12 @@ class KDE //! Modify the kernel. KernelType& Kernel() { return kernel; } + //! Get the metric. + const MetricType& Metric() const { return metric; } + + //! Modify the metric. + MetricType& Metric() { return metric; } + //! Get the reference tree. Tree* ReferenceTree() { return referenceTree; } From b6fee246f3de4a1963fb177fd1175f18fcaa69b2 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 15:55:09 +0100 Subject: [PATCH 140/150] Fix KDE move constructor --- src/mlpack/methods/kde/kde_impl.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 03319d371f3..13484ad8b79 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -122,8 +122,8 @@ KDE:: KDE(KDE&& other) : - kernel(other.kernel), - metric(other.metric), + kernel(std::move(other.kernel)), + metric(std::move(other.metric)), referenceTree(other.referenceTree), oldFromNewReferences(other.oldFromNewReferences), relError(other.relError), @@ -132,8 +132,8 @@ KDE(KDE&& other) : trained(other.trained), mode(other.mode) { - other.kernel = KernelType(); - other.metric = MetricType(); + other.kernel = std::move(KernelType()); + other.metric = std::move(MetricType()); other.referenceTree = nullptr; other.oldFromNewReferences = nullptr; other.ownsReferenceTree = false; From 2381230d488ca17a26bc003544080c74370c765e Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 15:56:42 +0100 Subject: [PATCH 141/150] Add MoveConstructor KDE test --- src/mlpack/tests/kde_test.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index e3fe9d1fae7..c3e2d6d50f1 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -770,4 +770,33 @@ BOOST_AUTO_TEST_CASE(CopyConstructor) } } +/** + * Test if the move constructor works properly. + */ +BOOST_AUTO_TEST_CASE(MoveConstructor) +{ + arma::mat reference = arma::randu(2, 300); + arma::mat query = arma::randu(2, 100); + arma::vec estimations1, estimations2, estimations3; + const double kernelBandwidth = 1.2; + const double relError = 0.05; + + typedef KDE + KDEType; + + // KDE + KDEType kde(relError, 0, kernel::EpanechnikovKernel(kernelBandwidth)); + kde.Train(std::move(reference)); + kde.Evaluate(query, estimations1); + + // Move constructor KDE + KDEType constructor(std::move(kde)); + constructor.Evaluate(query, estimations2); + + // Check results + BOOST_REQUIRE_THROW(kde.Evaluate(query, estimations3), std::runtime_error); + for (size_t i = 0; i < query.n_cols; ++i) + BOOST_REQUIRE_CLOSE(estimations1[i], estimations2[i], 1e-10); +} + BOOST_AUTO_TEST_SUITE_END(); From 76a1398d10ce96bebf33a202f859402fc0866992 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 15:57:12 +0100 Subject: [PATCH 142/150] Check KDE is trained before evaluation --- src/mlpack/methods/kde/kde_impl.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 13484ad8b79..7f67b672726 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -303,6 +303,10 @@ Evaluate(MatType querySet, arma::vec& estimations) estimations.set_size(querySet.n_cols); estimations.fill(arma::fill::zeros); + // Check whether has already been trained. + if (!trained) + throw std::runtime_error("cannot evaluate KDE model: model needs to be " + "trained before evaluation"); // Check querySet has at least 1 element to evaluate. if (querySet.n_cols == 0) { @@ -367,6 +371,10 @@ Evaluate(Tree* queryTree, estimations.set_size(queryTree->Dataset().n_cols); estimations.fill(arma::fill::zeros); + // Check whether has already been trained. + if (!trained) + throw std::runtime_error("cannot evaluate KDE model: model needs to be " + "trained before evaluation"); // Check querySet has at least 1 element to evaluate. if (queryTree->Dataset().n_cols == 0) { @@ -426,6 +434,11 @@ void KDE:: Evaluate(arma::vec& estimations) { + // Check whether has already been trained. + if (!trained) + throw std::runtime_error("cannot evaluate KDE model: model needs to be " + "trained before evaluation"); + // Get estimations vector ready. estimations.clear(); estimations.set_size(referenceTree->Dataset().n_cols); From e119f6d30ce28226cac6cd1afb0b790c6dbcd838 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 15:57:44 +0100 Subject: [PATCH 143/150] Add NotTrained KDE test --- src/mlpack/tests/kde_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index c3e2d6d50f1..35d40195313 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -799,4 +799,23 @@ BOOST_AUTO_TEST_CASE(MoveConstructor) BOOST_REQUIRE_CLOSE(estimations1[i], estimations2[i], 1e-10); } +/** + * Test if an untrained KDE works properly. + */ +BOOST_AUTO_TEST_CASE(NotTrained) +{ + arma::mat query = arma::randu(1, 10); + std::vector oldFromNew; + arma::vec estimations; + + KDE<> kde; + KDE<>::Tree queryTree(query, oldFromNew); + + // Check results + BOOST_REQUIRE_THROW(kde.Evaluate(query, estimations), std::runtime_error); + BOOST_REQUIRE_THROW(kde.Evaluate(&queryTree, oldFromNew, estimations), + std::runtime_error); + BOOST_REQUIRE_THROW(kde.Evaluate(estimations), std::runtime_error); +} + BOOST_AUTO_TEST_SUITE_END(); From 8cc5a03552588db792c49f5b6407dd6922522b9a Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 16:31:06 +0100 Subject: [PATCH 144/150] Small KDE coding style improvements --- src/mlpack/methods/kde/kde_impl.hpp | 2 ++ src/mlpack/methods/kde/kde_rules_impl.hpp | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 7f67b672726..cf2e5415c0f 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -59,6 +59,8 @@ KDE(const double relError, MetricType metric) : kernel(kernel), metric(metric), + referenceTree(nullptr), + oldFromNewReferences(nullptr), relError(relError), absError(absError), ownsReferenceTree(false), diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index fb8b7f94b68..bd5d5d10926 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -62,8 +62,8 @@ double KDERules::BaseCase( return 0.0; // Calculations. - double distance = metric.Evaluate(querySet.col(queryIndex), - referenceSet.col(referenceIndex)); + const double distance = metric.Evaluate(querySet.col(queryIndex), + referenceSet.col(referenceIndex)); densities(queryIndex) += kernel.Evaluate(distance); ++baseCases; @@ -74,7 +74,7 @@ double KDERules::BaseCase( //! Single-tree scoring function. template -double KDERules:: +inline double KDERules:: Score(const size_t queryIndex, TreeType& referenceNode) { double score, maxKernel, minKernel, bound; @@ -133,7 +133,7 @@ Score(const size_t queryIndex, TreeType& referenceNode) } template -double KDERules::Rescore( +inline double KDERules::Rescore( const size_t /* queryIndex */, TreeType& /* referenceNode */, const double oldScore) const @@ -216,7 +216,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) //! Double-tree template -double KDERules:: +inline double KDERules:: Rescore(TreeType& /*queryNode*/, TreeType& /*referenceNode*/, const double oldScore) const From 7bf036b0e87942fbbd9df7ef077fd31ccc25a6a9 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 19:47:17 +0100 Subject: [PATCH 145/150] KDE style improvements --- src/mlpack/methods/CMakeLists.txt | 2 +- src/mlpack/methods/kde/kde_impl.hpp | 44 +++++-- src/mlpack/methods/kde/kde_main.cpp | 19 +-- src/mlpack/methods/kde/kde_model.hpp | 8 +- src/mlpack/methods/kde/kde_model_impl.hpp | 38 +++--- src/mlpack/methods/kde/kde_rules.hpp | 15 ++- src/mlpack/methods/kde/kde_rules_impl.hpp | 8 +- src/mlpack/tests/CMakeLists.txt | 4 +- src/mlpack/tests/kde_test.cpp | 148 +++++++++++----------- src/mlpack/tests/main_tests/kde_test.cpp | 54 ++++---- 10 files changed, 192 insertions(+), 148 deletions(-) diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt index c5b159bfc0e..db569c3a37c 100644 --- a/src/mlpack/methods/CMakeLists.txt +++ b/src/mlpack/methods/CMakeLists.txt @@ -17,6 +17,7 @@ set(DIRS gmm hmm hoeffding_trees + kde kernel_pca kmeans lars @@ -48,7 +49,6 @@ set(DIRS sparse_coding sparse_svm svdplusplus - kde ) foreach(dir ${DIRS}) diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index cf2e5415c0f..8a679ae71f0 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -16,7 +16,7 @@ namespace mlpack { namespace kde { -//! Construct tree that rearranges the dataset +//! Construct tree that rearranges the dataset. template TreeType* BuildTree( MatType&& dataset, @@ -27,7 +27,7 @@ TreeType* BuildTree( return new TreeType(std::forward(dataset), oldFromNew); } -//! Construct tree that doesn't rearrange the dataset +//! Construct tree that doesn't rearrange the dataset. template TreeType* BuildTree( MatType&& dataset, @@ -228,11 +228,13 @@ Train(MatType referenceSet) if (referenceSet.n_cols == 0) throw std::invalid_argument("cannot train KDE model with an empty " "reference set"); + if (ownsReferenceTree) { delete referenceTree; delete oldFromNewReferences; } + this->ownsReferenceTree = true; Timer::Start("building_reference_tree"); this->oldFromNewReferences = new std::vector; @@ -262,11 +264,13 @@ Train(Tree* referenceTree, std::vector* oldFromNewReferences) if (referenceTree->Dataset().n_cols == 0) throw std::invalid_argument("cannot train KDE model with an empty " "reference set"); + if (ownsReferenceTree == true) { delete this->referenceTree; delete this->oldFromNewReferences; } + this->ownsReferenceTree = false; this->referenceTree = referenceTree; this->oldFromNewReferences = oldFromNewReferences; @@ -307,8 +311,11 @@ Evaluate(MatType querySet, arma::vec& estimations) // Check whether has already been trained. if (!trained) + { throw std::runtime_error("cannot evaluate KDE model: model needs to be " "trained before evaluation"); + } + // Check querySet has at least 1 element to evaluate. if (querySet.n_cols == 0) { @@ -316,10 +323,13 @@ Evaluate(MatType querySet, arma::vec& estimations) << "be returned" << std::endl; return; } + // Check whether dimensions match. if (querySet.n_rows != referenceTree->Dataset().n_rows) + { throw std::invalid_argument("cannot evaluate KDE model: querySet and " "referenceSet dimensions don't match"); + } Timer::Start("computing_kde"); // Evaluate @@ -375,8 +385,11 @@ Evaluate(Tree* queryTree, // Check whether has already been trained. if (!trained) + { throw std::runtime_error("cannot evaluate KDE model: model needs to be " "trained before evaluation"); + } + // Check querySet has at least 1 element to evaluate. if (queryTree->Dataset().n_cols == 0) { @@ -384,19 +397,25 @@ Evaluate(Tree* queryTree, << "be returned" << std::endl; return; } + // Check whether dimensions match. if (queryTree->Dataset().n_rows != referenceTree->Dataset().n_rows) + { throw std::invalid_argument("cannot evaluate KDE model: querySet and " "referenceSet dimensions don't match"); + } + // Check the mode is correct. if (mode != DUAL_TREE_MODE) + { throw std::invalid_argument("cannot evaluate KDE model: cannot use " "a query tree when mode is different from " "dual-tree"); + } Timer::Start("computing_kde"); - // Evaluate + // Evaluate. typedef KDERules RuleType; RuleType rules = RuleType(referenceTree->Dataset(), queryTree->Dataset(), @@ -438,8 +457,10 @@ Evaluate(arma::vec& estimations) { // Check whether has already been trained. if (!trained) + { throw std::runtime_error("cannot evaluate KDE model: model needs to be " "trained before evaluation"); + } // Get estimations vector ready. estimations.clear(); @@ -579,11 +600,15 @@ void KDE 1) + { throw std::invalid_argument("Relative error tolerance must be a value " "between 0 and 1"); + } if (absError < 0) + { throw std::invalid_argument("Absolute error tolerance must be a value " "greater or equal to 0"); + } } template& oldFromNew, { if (tree::TreeTraits::RearrangesDataset) { - const size_t n_queries = oldFromNew.size(); - arma::vec rearranged_estimations(n_queries); - for (size_t i = 0; i < n_queries; ++i) - rearranged_estimations(oldFromNew.at(i)) = estimations(i); - estimations = std::move(rearranged_estimations); + const size_t nQueries = oldFromNew.size(); + arma::vec rearrangedEstimations(nQueries); + + // Remap vector. + for (size_t i = 0; i < nQueries; ++i) + rearrangedEstimations(oldFromNew.at(i)) = estimations(i); + + estimations = std::move(rearrangedEstimations); } } diff --git a/src/mlpack/methods/kde/kde_main.cpp b/src/mlpack/methods/kde/kde_main.cpp index d5900e75b61..6f9b09bf0ec 100644 --- a/src/mlpack/methods/kde/kde_main.cpp +++ b/src/mlpack/methods/kde/kde_main.cpp @@ -79,7 +79,7 @@ PARAM_MODEL_OUT(KDEModel, "If specified, the KDE model will be saved here.", "M"); -// Configuration options +// Configuration options. PARAM_STRING_IN("kernel", "Kernel to use for the prediction." "('gaussian', 'epanechnikov', 'laplacian', 'spherical', 'triangular').", "k", "gaussian"); @@ -113,6 +113,7 @@ static void mlpackMain() const std::string modeStr = CLI::GetParam("algorithm"); const double relError = CLI::GetParam("rel_error"); const double absError = CLI::GetParam("abs_error"); + // Initialize results vector. arma::vec estimations; @@ -142,12 +143,12 @@ static void mlpackMain() arma::mat reference = std::move(CLI::GetParam("reference")); kde = new KDEModel(); - // Set parameters + // Set parameters. kde->Bandwidth() = bandwidth; kde->RelativeError() = relError; kde->AbsoluteError() = absError; - // Set KernelType + // Set KernelType. if (kernelStr == "gaussian") kde->KernelType() = KDEModel::GAUSSIAN_KERNEL; else if (kernelStr == "epanechnikov") @@ -159,7 +160,7 @@ static void mlpackMain() else if (kernelStr == "triangular") kde->KernelType() = KDEModel::TRIANGULAR_KERNEL; - // Set TreeType + // Set TreeType. if (treeStr == "kd-tree") kde->TreeType() = KDEModel::KD_TREE; else if (treeStr == "ball-tree") @@ -171,10 +172,10 @@ static void mlpackMain() else if (treeStr == "r-tree") kde->TreeType() = KDEModel::R_TREE; - // Build model + // Build model. kde->BuildModel(std::move(reference)); - // Set Mode + // Set Mode. if (modeStr == "dual-tree") kde->Mode() = KDEMode::DUAL_TREE_MODE; else if (modeStr == "single-tree") @@ -182,18 +183,20 @@ static void mlpackMain() } else { - // Load model + // Load model. kde = CLI::GetParam("input_model"); } - // Evaluation + // Evaluation. if (CLI::HasParam("query")) { arma::mat query = std::move(CLI::GetParam("query")); kde->Evaluate(std::move(query), estimations); } else + { kde->Evaluate(estimations); + } // Output predictions if needed. if (CLI::HasParam("predictions")) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 958f90f67ed..cf6aa2cf578 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -12,16 +12,16 @@ #ifndef MLPACK_METHODS_KDE_MODEL_HPP #define MLPACK_METHODS_KDE_MODEL_HPP -// Include trees +// Include trees. #include #include #include #include -// Include core +// Include core. #include -// Remaining includes +// Remaining includes. #include #include "kde.hpp" @@ -220,8 +220,10 @@ class KDEModel //! Absolute error tolerance. double absError; + //! Type of kernel. KernelTypes kernelType; + //! Type of tree. TreeTypes treeType; /** diff --git a/src/mlpack/methods/kde/kde_model_impl.hpp b/src/mlpack/methods/kde/kde_model_impl.hpp index 1017241f50c..a4ab7236284 100644 --- a/src/mlpack/methods/kde/kde_model_impl.hpp +++ b/src/mlpack/methods/kde/kde_model_impl.hpp @@ -32,7 +32,7 @@ inline KDEModel::KDEModel(const double bandwidth, kernelType(kernelType), treeType(treeType) { - // Nothing to do + // Nothing to do. } // Copy constructor. @@ -43,7 +43,7 @@ inline KDEModel::KDEModel(const KDEModel& other) : kernelType(other.kernelType), treeType(other.treeType) { - // Nothing to do + // Nothing to do. } // Move constructor. @@ -55,7 +55,7 @@ inline KDEModel::KDEModel(KDEModel&& other) : treeType(other.treeType), kdeModel(std::move(other.kdeModel)) { - // Reset other model + // Reset other model. other.bandwidth = 1.0; other.relError = 0.05; other.absError = 0; @@ -76,7 +76,7 @@ inline KDEModel& KDEModel::operator=(KDEModel other) return *this; } -// Clean memory +// Clean memory. inline KDEModel::~KDEModel() { boost::apply_visitor(DeleteVisitor(), kdeModel); @@ -87,6 +87,7 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) // Clean memory, if necessary. boost::apply_visitor(DeleteVisitor(), kdeModel); + // Build the actual model. if (kernelType == GAUSSIAN_KERNEL && treeType == KD_TREE) { kdeModel = new KDEType @@ -213,11 +214,12 @@ inline void KDEModel::BuildModel(arma::mat&& referenceSet) (relError, absError, kernel::TriangularKernel(bandwidth)); } + // Train the model. TrainVisitor train(std::move(referenceSet)); boost::apply_visitor(train, kdeModel); } -// Perform bichromatic evaluation +// Perform bichromatic evaluation. inline void KDEModel::Evaluate(arma::mat&& querySet, arma::vec& estimations) { Log::Info << "Evaluating KDE..." << std::endl; @@ -225,7 +227,7 @@ inline void KDEModel::Evaluate(arma::mat&& querySet, arma::vec& estimations) boost::apply_visitor(eval, kdeModel); } -// Perform monochromatic evaluation +// Perform monochromatic evaluation. inline void KDEModel::Evaluate(arma::vec& estimations) { Log::Info << "Evaluating KDE..." << std::endl; @@ -233,18 +235,18 @@ inline void KDEModel::Evaluate(arma::vec& estimations) boost::apply_visitor(eval, kdeModel); } -// Clean memory +// Clean memory. inline void KDEModel::CleanMemory() { boost::apply_visitor(DeleteVisitor(), kdeModel); } -// Parameters for KDE evaluation +// Parameters for KDE evaluation. DualMonoKDE::DualMonoKDE(arma::vec& estimations): estimations(estimations) {} -// Default KDE evaluation +// Default KDE evaluation. template* kde) const estimations); } else + { throw std::runtime_error("no KDE model initialized"); + } } -// Parameters for KDE evaluation +// Parameters for KDE evaluation. DualBiKDE::DualBiKDE(arma::mat&& querySet, arma::vec& estimations): dimension(querySet.n_rows), querySet(std::move(querySet)), estimations(estimations) {} -// Default KDE evaluation +// Default KDE evaluation. template* kde) const estimations); } else + { throw std::runtime_error("no KDE model initialized"); + } } // Parameters for Train. @@ -293,7 +299,7 @@ TrainVisitor::TrainVisitor(arma::mat&& referenceSet) : referenceSet(std::move(referenceSet)) {} -// Default Train +// Default Train. template* kde) const throw std::runtime_error("no KDE model initialized"); } -// Delete model +// Delete model. template void DeleteVisitor::operator()(KDEType* kde) const { @@ -315,7 +321,7 @@ void DeleteVisitor::operator()(KDEType* kde) const delete kde; } -// Mode of model +// Mode of model. template KDEMode& ModeVisitor::operator()(KDEType* kde) const { @@ -325,13 +331,13 @@ KDEMode& ModeVisitor::operator()(KDEType* kde) const throw std::runtime_error("no KDE model initialized"); } -// Get mode of model +// Get mode of model. KDEMode KDEModel::Mode() const { return boost::apply_visitor(ModeVisitor(), kdeModel); } -// Modify mode of model +// Modify mode of model. KDEMode& KDEModel::Mode() { return boost::apply_visitor(ModeVisitor(), kdeModel); diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index a93c9956226..e0f1d19f721 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -32,29 +32,31 @@ class KDERules KernelType& kernel, const bool sameSet); - //! Base Case + //! Base Case. double BaseCase(const size_t queryIndex, const size_t referenceIndex); - //! SingleTree Rescore + //! SingleTree Rescore. double Score(const size_t queryIndex, TreeType& referenceNode); - //! SingleTree Score + //! SingleTree Score. double Rescore(const size_t queryIndex, TreeType& referenceNode, const double oldScore) const; - //! DoubleTree Score + //! DoubleTree Score. double Score(TreeType& queryNode, TreeType& referenceNode); - //! DoubleTree Rescore + //! DoubleTree Rescore. double Rescore(TreeType& queryNode, TreeType& referenceNode, const double oldScore) const; typedef typename tree::TraversalInfo TraversalInfoType; + //! Get traversal information. const TraversalInfoType& TraversalInfo() const { return traversalInfo; } + //! Modify traversal information. TraversalInfoType& TraversalInfo() { return traversalInfo; } //! Get the number of base cases. @@ -90,7 +92,7 @@ class KDERules //! Instantiated metric. MetricType& metric; - //! Instantiated kernel + //! Instantiated kernel. KernelType& kernel; //! Whether reference and query sets are the same. @@ -102,6 +104,7 @@ class KDERules //! The last reference index. size_t lastReferenceIndex; + //! Traversal information. TraversalInfoType traversalInfo; //! The number of base cases. diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index bd5d5d10926..63a758e9848 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -103,6 +103,7 @@ Score(const size_t queryIndex, TreeType& referenceNode) if (newCalculations && bound <= (absError + relError * minKernel) / referenceSet.n_cols) { + // Estimate values. double kernelValue; // Calculate kernel value based on reference node centroid. @@ -118,7 +119,7 @@ Score(const size_t queryIndex, TreeType& referenceNode) densities(queryIndex) += referenceNode.NumDescendants() * kernelValue; - // Don't explore this tree branch + // Don't explore this tree branch. score = DBL_MAX; } else @@ -171,7 +172,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) bound = maxKernel - minKernel; } - // If possible, avoid some calculations because of the error tolerance + // If possible, avoid some calculations because of the error tolerance. if (newCalculations && bound <= (absError + relError * minKernel) / referenceSet.n_cols) { @@ -214,13 +215,14 @@ Score(TreeType& queryNode, TreeType& referenceNode) return score; } -//! Double-tree +//! Double-tree rescore. template inline double KDERules:: Rescore(TreeType& /*queryNode*/, TreeType& /*referenceNode*/, const double oldScore) const { + // If a branch is pruned then it continues to be pruned. return oldScore; } diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index d9ef11de00d..3178180b0fa 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -39,6 +39,7 @@ add_executable(mlpack_test hyperplane_test.cpp imputation_test.cpp init_rules_test.cpp + kde_test.cpp kernel_pca_test.cpp kernel_test.cpp kernel_traits_test.cpp @@ -109,7 +110,6 @@ add_executable(mlpack_test ub_tree_test.cpp union_find_test.cpp vantage_point_tree_test.cpp - kde_test.cpp wgan_test.cpp main_tests/test_helper.hpp main_tests/emst_test.cpp @@ -120,6 +120,7 @@ add_executable(mlpack_test main_tests/det_test.cpp main_tests/decision_tree_test.cpp main_tests/decision_stump_test.cpp + main_tests/kde_test.cpp main_tests/linear_regression_test.cpp main_tests/logistic_regression_test.cpp main_tests/lmnn_test.cpp @@ -145,7 +146,6 @@ add_executable(mlpack_test main_tests/radical_test.cpp main_tests/hmm_test_utils.hpp main_tests/kernel_pca_test.cpp - main_tests/kde_test.cpp ) # Link dependencies of test executable. diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 35d40195313..154edeb613c 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -29,12 +29,12 @@ using namespace boost::serialization; BOOST_AUTO_TEST_SUITE(KDETest); -// Brute force gaussian KDE -template +// Brute force gaussian KDE. +template void BruteForceKDE(const arma::mat& reference, const arma::mat& query, arma::vec& densities, - T& kernel) + KernelType& kernel) { metric::EuclideanDistance metric; for (size_t i = 0; i < query.n_cols; ++i) @@ -68,23 +68,23 @@ BOOST_AUTO_TEST_CASE(KDESimpleTest) arma::inplace_trans(query); arma::vec estimations; // Manually calculated results. - arma::vec estimations_result = {0.08323668699564207296148765, - 0.00167470061366603324010116, - 0.07658867126520703394465527, - 0.01028120384800740999553525}; + arma::vec estimationsResult = {0.08323668699564207296148765, + 0.00167470061366603324010116, + 0.07658867126520703394465527, + 0.01028120384800740999553525}; KDE - kde(0.0, 0.01, GaussianKernel(0.8)); + kde(0.0, 0.01, GaussianKernel(0.8)); kde.Train(reference); kde.Evaluate(query, estimations); for (size_t i = 0; i < query.n_cols; ++i) - BOOST_REQUIRE_CLOSE(estimations[i], estimations_result[i], 0.01); + BOOST_REQUIRE_CLOSE(estimations[i], estimationsResult[i], 0.01); } /** - * Test Train(Tree...) and Evaluate(Tree...) + * Test Train(Tree...) and Evaluate(Tree...). */ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) { @@ -121,7 +121,7 @@ BOOST_AUTO_TEST_CASE(KDETreeAsArguments) EuclideanDistance, arma::mat, KDTree> - kde(0.0, 1e-6, GaussianKernel(kernelBandwidth)); + kde(0.0, 1e-6, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), estimations); for (size_t i = 0; i < query.n_cols; ++i) @@ -142,20 +142,20 @@ BOOST_AUTO_TEST_CASE(GaussianKDEBruteForceTest) const double kernelBandwidth = 0.3; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -176,20 +176,20 @@ BOOST_AUTO_TEST_CASE(GaussianSingleKDEBruteForceTest) const double kernelBandwidth = 0.3; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -211,20 +211,20 @@ BOOST_AUTO_TEST_CASE(EpanechnikovCoverSingleKDETest) const double kernelBandwidth = 1.1; const double relError = 0.08; - // Brute force KDE + // Brute force KDE. EpanechnikovKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -246,20 +246,20 @@ BOOST_AUTO_TEST_CASE(EpanechnikovOctreeSingleKDETest) const double kernelBandwidth = 1.0; const double relError = 0.05; - // Brute force KDE + // Brute force KDE. EpanechnikovKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -280,14 +280,14 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) const double kernelBandwidth = 0.4; const double relError = 0.05; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // BallTree KDE + // BallTree KDE. typedef BallTree Tree; std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); @@ -296,7 +296,7 @@ BOOST_AUTO_TEST_CASE(BallTreeGaussianKDETest) EuclideanDistance, arma::mat, BallTree> - kde(relError, 0.0, GaussianKernel(kernelBandwidth)); + kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, std::move(oldFromNewQueries), treeEstimations); @@ -320,20 +320,20 @@ BOOST_AUTO_TEST_CASE(OctreeGaussianKDETest) const double kernelBandwidth = 0.3; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -354,20 +354,20 @@ BOOST_AUTO_TEST_CASE(RTreeGaussianKDETest) const double kernelBandwidth = 0.3; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -389,20 +389,20 @@ BOOST_AUTO_TEST_CASE(StandardCoverTreeGaussianKDETest) const double kernelBandwidth = 0.3; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -423,17 +423,17 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) const double kernelBandwidth = 0.4; const double relError = 0.05; - // Duplicate value + // Duplicate value. reference.col(2) = reference.col(3); - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Dual-tree KDE + // Dual-tree KDE. typedef KDTree Tree; std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); @@ -442,7 +442,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedReferenceSampleKDETest) EuclideanDistance, arma::mat, KDTree> - kde(relError, 0.0, GaussianKernel(kernelBandwidth)); + kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, treeEstimations); @@ -465,10 +465,10 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) const double kernelBandwidth = 0.4; const double relError = 0.05; - // Duplicate value + // Duplicate value. query.col(2) = query.col(3); - // Dual-tree KDE + // Dual-tree KDE. typedef KDTree Tree; std::vector oldFromNewQueries, oldFromNewReferences; Tree* queryTree = new Tree(query, oldFromNewQueries, 2); @@ -477,7 +477,7 @@ BOOST_AUTO_TEST_CASE(DuplicatedQuerySampleKDETest) EuclideanDistance, arma::mat, KDTree> - kde(relError, 0.0, GaussianKernel(kernelBandwidth)); + kde(relError, 0.0, GaussianKernel(kernelBandwidth)); kde.Train(referenceTree, &oldFromNewReferences); kde.Evaluate(queryTree, oldFromNewQueries, estimations); @@ -501,14 +501,14 @@ BOOST_AUTO_TEST_CASE(BreadthFirstKDETest) const double kernelBandwidth = 0.8; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Breadth-First KDE + // Breadth-First KDE. metric::EuclideanDistance metric; KDE::template BreadthFirstDualTreeTraverser> - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -538,20 +538,20 @@ BOOST_AUTO_TEST_CASE(OneDimensionalTest) const double kernelBandwidth = 0.7; const double relError = 0.01; - // Brute force KDE + // Brute force KDE. GaussianKernel kernel(kernelBandwidth); BruteForceKDE(reference, query, bfEstimations, kernel); - // Optimized KDE + // Optimized KDE. metric::EuclideanDistance metric; KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, treeEstimations); @@ -571,19 +571,19 @@ BOOST_AUTO_TEST_CASE(EmptyReferenceTest) const double kernelBandwidth = 0.7; const double relError = 0.01; - // KDE + // KDE. metric::EuclideanDistance metric; GaussianKernel kernel(kernelBandwidth); KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); - // When training using the dataset matrix + // When training using the dataset matrix. BOOST_REQUIRE_THROW(kde.Train(reference), std::invalid_argument); - // When training using a tree + // When training using a tree. std::vector oldFromNewReferences; typedef KDTree Tree; Tree* referenceTree = new Tree(reference, oldFromNewReferences, 2); @@ -604,21 +604,21 @@ BOOST_AUTO_TEST_CASE(EvaluationMatchDimensionsTest) const double kernelBandwidth = 0.7; const double relError = 0.01; - // KDE + // KDE. metric::EuclideanDistance metric; GaussianKernel kernel(kernelBandwidth); KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); - // When evaluating using the query dataset matrix + // When evaluating using the query dataset matrix. BOOST_REQUIRE_THROW(kde.Evaluate(query, estimations), std::invalid_argument); - // When evaluating using a query tree + // When evaluating using a query tree. typedef KDTree Tree; std::vector oldFromNewQueries; Tree* queryTree = new Tree(query, oldFromNewQueries, 3); @@ -634,35 +634,35 @@ BOOST_AUTO_TEST_CASE(EmptyQuerySetTest) { arma::mat reference = arma::randu(1, 10); arma::mat query; - // Set estimations to the wrong size + // Set estimations to the wrong size. arma::vec estimations(33, arma::fill::zeros); const double kernelBandwidth = 0.7; const double relError = 0.01; - // KDE + // KDE. metric::EuclideanDistance metric; GaussianKernel kernel(kernelBandwidth); KDE - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); - // The query set must be empty + // The query set must be empty. BOOST_REQUIRE_EQUAL(query.n_cols, 0); - // When evaluating using the query dataset matrix + // When evaluating using the query dataset matrix. BOOST_REQUIRE_NO_THROW(kde.Evaluate(query, estimations)); - // When evaluating using a query tree + // When evaluating using a query tree. typedef KDTree Tree; std::vector oldFromNewQueries; Tree* queryTree = new Tree(query, oldFromNewQueries, 3); BOOST_REQUIRE_NO_THROW( - kde.Evaluate(queryTree, oldFromNewQueries, estimations)); + kde.Evaluate(queryTree, oldFromNewQueries, estimations)); delete queryTree; - // Estimations must be empty + // Estimations must be empty. BOOST_REQUIRE_EQUAL(estimations.size(), 0); } @@ -679,7 +679,7 @@ BOOST_AUTO_TEST_CASE(SerializationTest) metric::EuclideanDistance, arma::mat, tree::KDTree> - kde(relError, absError, GaussianKernel(0.25)); + kde(relError, absError, GaussianKernel(0.25)); kde.Train(reference); // Get estimations to compare. @@ -747,22 +747,22 @@ BOOST_AUTO_TEST_CASE(CopyConstructor) typedef KDE KDEType; - // KDE + // KDE. KDEType kde(relError, 0, kernel::GaussianKernel(kernelBandwidth)); kde.Train(std::move(reference)); - // Copy constructor KDE + // Copy constructor KDE. KDEType constructor(kde); - // Copy operator KDE + // Copy operator KDE. KDEType oper = kde; - // Evaluations + // Evaluations. kde.Evaluate(query, estimations1); constructor.Evaluate(query, estimations2); oper.Evaluate(query, estimations3); - // Check results + // Check results. for (size_t i = 0; i < query.n_cols; ++i) { BOOST_REQUIRE_CLOSE(estimations1[i], estimations2[i], 1e-10); @@ -784,16 +784,16 @@ BOOST_AUTO_TEST_CASE(MoveConstructor) typedef KDE KDEType; - // KDE + // KDE. KDEType kde(relError, 0, kernel::EpanechnikovKernel(kernelBandwidth)); kde.Train(std::move(reference)); kde.Evaluate(query, estimations1); - // Move constructor KDE + // Move constructor KDE. KDEType constructor(std::move(kde)); constructor.Evaluate(query, estimations2); - // Check results + // Check results. BOOST_REQUIRE_THROW(kde.Evaluate(query, estimations3), std::runtime_error); for (size_t i = 0; i < query.n_cols; ++i) BOOST_REQUIRE_CLOSE(estimations1[i], estimations2[i], 1e-10); @@ -811,7 +811,7 @@ BOOST_AUTO_TEST_CASE(NotTrained) KDE<> kde; KDE<>::Tree queryTree(query, oldFromNew); - // Check results + // Check results. BOOST_REQUIRE_THROW(kde.Evaluate(query, estimations), std::runtime_error); BOOST_REQUIRE_THROW(kde.Evaluate(&queryTree, oldFromNew, estimations), std::runtime_error); diff --git a/src/mlpack/tests/main_tests/kde_test.cpp b/src/mlpack/tests/main_tests/kde_test.cpp index 5676f0eb3a2..5517ff47006 100644 --- a/src/mlpack/tests/main_tests/kde_test.cpp +++ b/src/mlpack/tests/main_tests/kde_test.cpp @@ -55,7 +55,7 @@ BOOST_FIXTURE_TEST_SUITE(KDEMainTest, KDETestFixture); **/ BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) { - // Datasets + // Datasets. arma::mat reference = arma::randu(3, 500); arma::mat query = arma::randu(3, 100); arma::vec kdeEstimations, mainEstimations; @@ -68,13 +68,13 @@ BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) metric::EuclideanDistance, arma::mat, tree::RTree> - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, kdeEstimations); - // Normalize estimations + // Normalize estimations. kdeEstimations /= kernel.Normalizer(reference.n_rows); - // Main estimations + // Main estimations. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("kernel", std::string("gaussian")); @@ -97,7 +97,7 @@ BOOST_AUTO_TEST_CASE(KDEGaussianRTreeResultsMain) **/ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) { - // Datasets + // Datasets. arma::mat reference = arma::randu(3, 300); arma::mat query = arma::randu(3, 100); arma::vec kdeEstimations, mainEstimations; @@ -110,11 +110,11 @@ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) metric::EuclideanDistance, arma::mat, tree::BallTree> - kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::DUAL_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, kdeEstimations); - // Main estimations + // Main estimations. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("kernel", std::string("triangular")); @@ -137,7 +137,7 @@ BOOST_AUTO_TEST_CASE(KDETriangularBallTreeResultsMain) **/ BOOST_AUTO_TEST_CASE(KDEMonoResultsMain) { - // Datasets + // Datasets. arma::mat reference = arma::randu(2, 300); arma::vec kdeEstimations, mainEstimations; double kernelBandwidth = 2.3; @@ -153,10 +153,10 @@ BOOST_AUTO_TEST_CASE(KDEMonoResultsMain) kde.Train(reference); // Perform monochromatic KDE. kde.Evaluate(kdeEstimations); - // Normalize + // Normalize. kdeEstimations /= kernel.Normalizer(reference.n_rows); - // Main estimations + // Main estimations. SetInputParam("reference", reference); SetInputParam("kernel", std::string("epanechnikov")); SetInputParam("tree", std::string("cover-tree")); @@ -193,12 +193,12 @@ BOOST_AUTO_TEST_CASE(KDEOutputSize) arma::mat reference = arma::randu(dim, 325); arma::mat query = arma::randu(dim, samples); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); mlpackMain(); - // Check number of output elements + // Check number of output elements. BOOST_REQUIRE_EQUAL(CLI::GetParam("predictions").size(), samples); } @@ -213,7 +213,7 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) arma::mat reference = arma::randu(dim, 300); arma::mat query = arma::randu(dim, samples); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("bandwidth", 2.4); @@ -223,7 +223,7 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) arma::vec oldEstimations = std::move(CLI::GetParam("predictions")); - // Change parameters and load model + // Change parameters and load model. CLI::GetSingleton().Parameters()["reference"].wasPassed = false; SetInputParam("bandwidth", 0.5); SetInputParam("query", query); @@ -234,7 +234,7 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) arma::vec newEstimations = std::move(CLI::GetParam("predictions")); - // Check estimations are the same + // Check estimations are the same. for (size_t i = 0; i < samples; ++i) BOOST_REQUIRE_CLOSE(oldEstimations[i], newEstimations[i], relError); } @@ -245,7 +245,7 @@ BOOST_AUTO_TEST_CASE(KDEModelReuse) **/ BOOST_AUTO_TEST_CASE(KDEGaussianSingleKDTreeResultsMain) { - // Datasets + // Datasets. arma::mat reference = arma::randu(3, 400); arma::mat query = arma::randu(3, 400); arma::vec kdeEstimations, mainEstimations; @@ -258,12 +258,12 @@ BOOST_AUTO_TEST_CASE(KDEGaussianSingleKDTreeResultsMain) metric::EuclideanDistance, arma::mat, tree::BallTree> - kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); + kde(relError, 0.0, kernel, KDEMode::SINGLE_TREE_MODE, metric); kde.Train(reference); kde.Evaluate(query, kdeEstimations); kdeEstimations /= kernel.Normalizer(reference.n_rows); - // Main estimations + // Main estimations. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("kernel", std::string("gaussian")); @@ -289,7 +289,7 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidKernel) arma::mat reference = arma::randu(2, 10); arma::mat query = arma::randu(2, 5); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("kernel", std::string("linux")); @@ -307,7 +307,7 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidTree) arma::mat reference = arma::randu(2, 10); arma::mat query = arma::randu(2, 5); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("tree", std::string("olive")); @@ -325,7 +325,7 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidAlgorithm) arma::mat reference = arma::randu(2, 10); arma::mat query = arma::randu(2, 5); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("algorithm", std::string("bogosort")); @@ -345,7 +345,7 @@ BOOST_AUTO_TEST_CASE(KDEMainReferenceAndModel) arma::mat query = arma::randu(2, 5); KDEModel* model = new KDEModel(); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); SetInputParam("input_model", model); @@ -363,16 +363,16 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidAbsoluteError) arma::mat reference = arma::randu(1, 10); arma::mat query = arma::randu(1, 5); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); Log::Fatal.ignoreInput = true; - // Invalid value + // Invalid value. SetInputParam("abs_error", -0.1); BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); - // Valid value + // Valid value. SetInputParam("abs_error", 5.8); BOOST_REQUIRE_NO_THROW(mlpackMain()); Log::Fatal.ignoreInput = false; @@ -386,7 +386,7 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidRelativeError) arma::mat reference = arma::randu(1, 10); arma::mat query = arma::randu(1, 5); - // Main params + // Main params. SetInputParam("reference", reference); SetInputParam("query", query); @@ -399,7 +399,7 @@ BOOST_AUTO_TEST_CASE(KDEMainInvalidRelativeError) SetInputParam("rel_error", 1.1); BOOST_REQUIRE_THROW(mlpackMain(), std::runtime_error); - // Valid value + // Valid value. SetInputParam("rel_error", 0.3); BOOST_REQUIRE_NO_THROW(mlpackMain()); Log::Fatal.ignoreInput = false; From bc392323a83524153acf77d78e4325a730555e25 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Mon, 7 Jan 2019 20:32:42 +0100 Subject: [PATCH 146/150] Remove KDE comment --- src/mlpack/methods/kde/kde_rules_impl.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index 63a758e9848..a9fedd09e63 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -193,9 +193,7 @@ Score(TreeType& queryNode, TreeType& referenceNode) referenceStat.Centroid()); } - // Can be paralellized but we avoid it for now because of a compilation - // error in visual C++ compiler. - // #pragma omp for + // Sum up estimations. for (size_t i = 0; i < queryNode.NumDescendants(); ++i) { densities(queryNode.Descendant(i)) += From c191e9b2a38b51677a85a8269cca18157b9469ef Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 13 Jan 2019 14:32:08 +0100 Subject: [PATCH 147/150] Update KDE author --- COPYRIGHT.txt | 2 +- src/mlpack/methods/kde/kde.hpp | 2 +- src/mlpack/methods/kde/kde_impl.hpp | 2 +- src/mlpack/methods/kde/kde_rules.hpp | 2 +- src/mlpack/methods/kde/kde_rules_impl.hpp | 2 +- src/mlpack/tests/kde_test.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index a2f8cfcd25b..b41979c8f4d 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -93,7 +93,7 @@ Copyright: Copyright 2018, B Kartheek Reddy Copyright 2018, Atharva Khandait Copyright 2018, Wenhao Huang - Copyright 2018, Roberto Hueso + Copyright 2018-2019, Roberto Hueso Copyright 2018, Prabhat Sharma Copyright 2018, Tan Jun An Copyright 2018, Moksh Jain diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 080d2c2e593..4da56f28f5a 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -1,6 +1,6 @@ /** * @file kde.hpp - * @author Roberto Hueso (robertohueso96@gmail.com) + * @author Roberto Hueso * * Kernel Density Estimation. * diff --git a/src/mlpack/methods/kde/kde_impl.hpp b/src/mlpack/methods/kde/kde_impl.hpp index 8a679ae71f0..5ffd8d2c500 100644 --- a/src/mlpack/methods/kde/kde_impl.hpp +++ b/src/mlpack/methods/kde/kde_impl.hpp @@ -1,6 +1,6 @@ /** * @file kde_impl.hpp - * @author Roberto Hueso (robertohueso96@gmail.com) + * @author Roberto Hueso * * Implementation of Kernel Density Estimation. * diff --git a/src/mlpack/methods/kde/kde_rules.hpp b/src/mlpack/methods/kde/kde_rules.hpp index e0f1d19f721..0e153a0726d 100644 --- a/src/mlpack/methods/kde/kde_rules.hpp +++ b/src/mlpack/methods/kde/kde_rules.hpp @@ -1,6 +1,6 @@ /** * @file kde_rules.hpp - * @author Roberto Hueso (robertohueso96@gmail.com) + * @author Roberto Hueso * * Rules Kernel Density estimation, so that it can be done with arbitrary tree * types. diff --git a/src/mlpack/methods/kde/kde_rules_impl.hpp b/src/mlpack/methods/kde/kde_rules_impl.hpp index a9fedd09e63..87273ebfc96 100644 --- a/src/mlpack/methods/kde/kde_rules_impl.hpp +++ b/src/mlpack/methods/kde/kde_rules_impl.hpp @@ -1,6 +1,6 @@ /** * @file kde_rules_impl.hpp - * @author Roberto Hueso (robertohueso96@gmail.com) + * @author Roberto Hueso * * Implementation of rules for Kernel Density Estimation with generic trees. * diff --git a/src/mlpack/tests/kde_test.cpp b/src/mlpack/tests/kde_test.cpp index 154edeb613c..3d1cecb7d9a 100644 --- a/src/mlpack/tests/kde_test.cpp +++ b/src/mlpack/tests/kde_test.cpp @@ -1,6 +1,6 @@ /** * @file kde_test.cpp - * @author Roberto Hueso (robertohueso96@gmail.com) + * @author Roberto Hueso * * mlpack is free software; you may redistribute it and/or modify it under the * terms of the 3-clause BSD license. You should have received a copy of the From e3a5eee8394fb19086e2633cf8ed5446fd30df44 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Sun, 13 Jan 2019 14:33:21 +0100 Subject: [PATCH 148/150] Update KDE docs --- src/mlpack/methods/kde/kde.hpp | 6 ++++-- src/mlpack/methods/kde/kde_rules.hpp | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index 4da56f28f5a..f143920f8f5 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -30,15 +30,17 @@ enum KDEMode /** * The KDE class is a template class for performing Kernel Density Estimations. - * In statistics, kernel density estimation, is a way to estimate the + * In statistics, kernel density estimation is a way to estimate the * probability density function of a variable in a non parametric way. * This implementation performs this estimation using a tree-independent * dual-tree algorithm. Details about this algorithm are available in KDERules. * + * @tparam KernelType Kernel function to use for KDE calculations. * @tparam MetricType Metric to use for KDE calculations. * @tparam MatType Type of data to use. - * @tparam KernelType Kernel function to use for KDE calculations. * @tparam TreeType Type of tree to use; must satisfy the TreeType policy API. + * @tparam DualTreeTraversalType Type of dual-tree traversal to use. + * @tparam SingleTreeTraversalType Type of single-tree traversal to use. */ template class KDERules { public: + /** + * Construct KDERules. + * + * @param referenceSet Reference set data. + * @param querySet Query set data. + * @param densities Vector where estimations will be written. + * @param relError Relative error tolerance. + * @param absError Absolute error tolerance. + * @param metric Instantiated metric. + * @param kernel Instantiated kernel. + * @param sameSet True if query and reference sets are the same + * (monochromatic evaluation). + */ KDERules(const arma::mat& referenceSet, const arma::mat& querySet, arma::vec& densities, From 0d5bb4641cc0e1795dc1a17669b4f480275b8854 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Wed, 16 Jan 2019 11:04:54 +0100 Subject: [PATCH 149/150] Improve KDE docs --- src/mlpack/methods/kde/kde_model.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index cf6aa2cf578..680b89ba44f 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -44,13 +44,13 @@ using KDEType = KDE::template SingleTreeTraverser>; /** - * KernerlNormalizer holds a set of methods to normalize estimations applying + * KernelNormalizer holds a set of methods to normalize estimations applying * in each case the appropiate kernel normalizer function. */ class KernelNormalizer { private: - // SFINAE helper to check if has a Normalizer function. + // SFINAE check if Normalizer function is present. HAS_MEM_FUNC(Normalizer, HasNormalizer); public: From 20230ae14408ae5655ad8f02be2d0a26b3b612b7 Mon Sep 17 00:00:00 2001 From: Roberto Hueso Gomez Date: Fri, 18 Jan 2019 01:46:07 +0100 Subject: [PATCH 150/150] Improve KDE docs - Fix typos. - Improve expressions. --- src/mlpack/methods/kde/kde.hpp | 2 +- src/mlpack/methods/kde/kde_model.hpp | 6 +++--- src/mlpack/tests/main_tests/kde_test.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/kde/kde.hpp b/src/mlpack/methods/kde/kde.hpp index f143920f8f5..2691671aeab 100644 --- a/src/mlpack/methods/kde/kde.hpp +++ b/src/mlpack/methods/kde/kde.hpp @@ -122,7 +122,7 @@ class KDE * - If TreeTraits::RearrangesDataset is False then it is possible * to use an empty oldFromNewReferences vector. * - * @param referenceTree New already created reference tree. + * @param referenceTree Built reference tree. * @param oldFromNewReferences Permutations of reference points obtained * during tree generation. */ diff --git a/src/mlpack/methods/kde/kde_model.hpp b/src/mlpack/methods/kde/kde_model.hpp index 680b89ba44f..89d49e25782 100644 --- a/src/mlpack/methods/kde/kde_model.hpp +++ b/src/mlpack/methods/kde/kde_model.hpp @@ -90,7 +90,7 @@ class DualMonoKDE : public boost::static_visitor arma::vec& estimations; public: - //! Alias template necessary for visual C++ compiler. + //! Alias template necessary for Visual C++ compiler. template arma::vec& estimations; public: - //! Alias template necessary for visual C++ compiler. + //! Alias template necessary for Visual C++ compiler. template