From ec79c1bad889dd3715d69be9b41580039d3629d2 Mon Sep 17 00:00:00 2001 From: theJonan Date: Mon, 17 Oct 2016 22:56:55 +0300 Subject: [PATCH 1/2] - Arma sorting work with SpMat. --- src/mlpack/core/arma_extend/CMakeLists.txt | 1 + src/mlpack/core/arma_extend/arma_extend.hpp | 3 ++ .../core/arma_extend/fn_sort_sparse.hpp | 45 +++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 src/mlpack/core/arma_extend/fn_sort_sparse.hpp diff --git a/src/mlpack/core/arma_extend/CMakeLists.txt b/src/mlpack/core/arma_extend/CMakeLists.txt index 3eba5089025..e9120f0d94d 100644 --- a/src/mlpack/core/arma_extend/CMakeLists.txt +++ b/src/mlpack/core/arma_extend/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES arma_extend.hpp fn_ccov.hpp fn_ind2sub.hpp + fn_sort_sparse.hpp glue_ccov_meat.hpp glue_ccov_proto.hpp hdf5_misc.hpp diff --git a/src/mlpack/core/arma_extend/arma_extend.hpp b/src/mlpack/core/arma_extend/arma_extend.hpp index 088b8d8b52c..ea646aada77 100644 --- a/src/mlpack/core/arma_extend/arma_extend.hpp +++ b/src/mlpack/core/arma_extend/arma_extend.hpp @@ -69,6 +69,9 @@ namespace arma { // unary minus for sparse matrices #include "operator_minus.hpp" + + // sorting of sparse matrices + #include "fn_sort_sparse.hpp" }; #endif diff --git a/src/mlpack/core/arma_extend/fn_sort_sparse.hpp b/src/mlpack/core/arma_extend/fn_sort_sparse.hpp new file mode 100644 index 00000000000..83349dee005 --- /dev/null +++ b/src/mlpack/core/arma_extend/fn_sort_sparse.hpp @@ -0,0 +1,45 @@ +//! \addtogroup fn_sort_sparse +//! @{ + +/** + * @file fn_sort_sparse.hpp + * @author Ivan Georgiev (ivan@jonan.info) + * + * Sorting of sparse matrices as extension to arma library. + */ + +template +SpMat sort(const SpMat& data) +{ + // Construct the vector of values. + std::vector valsVec(data.begin(), data.end()); + + // ... and sort it! + std::sort(valsVec.begin(), valsVec.end()); + + // Now prepare the structures for the batch construction of the + // sorted sparse matrix. + arma::umat locations(2, data.n_nonzero); + arma::Col vals(data.n_nonzero); + ElemType lastVal = -std::numeric_limits::max(); + size_t padding = 0; + + for (size_t ii = 0; ii < valsVec.size(); ++ii) + { + const ElemType newVal = valsVec[ii]; + if (lastVal < ElemType(0) && newVal > ElemType(0)) + { + assert(padding == 0); // we should arrive here once! + padding = data.n_elem - data.n_nonzero; + } + + locations.at(0, ii) = (ii + padding) % data.n_rows; + locations.at(1, ii) = (ii + padding) / data.n_rows; + vals.at(ii) = lastVal = newVal; + } + + return SpMat(locations, vals, data.n_rows, data.n_cols, false, false); +}; + + +//! @} From 687d73bf7d8a85db53e6b67f658b827565ec04d6 Mon Sep 17 00:00:00 2001 From: Ryan Curtin Date: Mon, 5 Dec 2016 11:11:44 -0500 Subject: [PATCH 2/2] Add tests for sparse sort(). --- src/mlpack/tests/CMakeLists.txt | 1 + src/mlpack/tests/sparse_sort_test.cpp | 153 ++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 src/mlpack/tests/sparse_sort_test.cpp diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt index 3b3ab0d30b1..c8613fbeda1 100644 --- a/src/mlpack/tests/CMakeLists.txt +++ b/src/mlpack/tests/CMakeLists.txt @@ -81,6 +81,7 @@ add_executable(mlpack_test sort_policy_test.cpp sparse_autoencoder_test.cpp sparse_coding_test.cpp + sparse_sort_test.cpp spill_tree_test.cpp split_data_test.cpp svd_batch_test.cpp diff --git a/src/mlpack/tests/sparse_sort_test.cpp b/src/mlpack/tests/sparse_sort_test.cpp new file mode 100644 index 00000000000..c8bdac27309 --- /dev/null +++ b/src/mlpack/tests/sparse_sort_test.cpp @@ -0,0 +1,153 @@ +/** + * @file sparse_sort_test.cpp + * @author Ryan Curtin + * + * Some tests for the Armadillo sparse sorting code. + */ +#include +#include +#include "test_tools.hpp" + +using namespace arma; + +BOOST_AUTO_TEST_SUITE(SparseSortTest); + +BOOST_AUTO_TEST_CASE(SimpleSparseVectorSortTest) +{ + sp_vec sc(10); + sc[2] = 10.0; + sc[5] = 3.0; + sc[6] = -1.0; + sc[8] = 2.5; + sc[9] = 0.3; + + // Sort the vector. + sp_vec out = sort(sc); + + // Check that the output is in the right order. + BOOST_REQUIRE_CLOSE((double) out[0], -1.0, 1e-5); + BOOST_REQUIRE_SMALL((double) out[1], 1e-5); + BOOST_REQUIRE_SMALL((double) out[2], 1e-5); + BOOST_REQUIRE_SMALL((double) out[3], 1e-5); + BOOST_REQUIRE_SMALL((double) out[4], 1e-5); + BOOST_REQUIRE_SMALL((double) out[5], 1e-5); + BOOST_REQUIRE_CLOSE((double) out[6], 0.3, 1e-5); + BOOST_REQUIRE_CLOSE((double) out[7], 2.5, 1e-5); + BOOST_REQUIRE_CLOSE((double) out[8], 3.0, 1e-5); + BOOST_REQUIRE_CLOSE((double) out[9], 10.0, 1e-5); +} + +BOOST_AUTO_TEST_CASE(RandomSparseVectorSortTest) +{ + sp_vec sc; + sc.sprandu(1000, 1, 0.6); + + vec c(sc); + + // Sort both. + sp_vec sout = sort(sc); + vec out = sort(c); + + // Check that the results are equivalent. + for (size_t i = 0; i < 1000; ++i) + { + if (out[i] < 1e-5) + BOOST_REQUIRE_SMALL((double) sout[i], 1e-5); + else + BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5); + } +} + +BOOST_AUTO_TEST_CASE(SimpleSparseRowSortTest) +{ + sp_rowvec sc(10); + sc[2] = 10.0; + sc[5] = 3.0; + sc[6] = -1.0; + sc[8] = 2.5; + sc[9] = 0.3; + + // Sort the vector. + sp_rowvec out = sort(sc); + + // Check that the output is in the right order. + BOOST_REQUIRE_CLOSE((double) out[0], -1.0, 1e-5); + BOOST_REQUIRE_SMALL((double) out[1], 1e-5); + BOOST_REQUIRE_SMALL((double) out[2], 1e-5); + BOOST_REQUIRE_SMALL((double) out[3], 1e-5); + BOOST_REQUIRE_SMALL((double) out[4], 1e-5); + BOOST_REQUIRE_SMALL((double) out[5], 1e-5); + BOOST_REQUIRE_CLOSE((double) out[6], 0.3, 1e-5); + BOOST_REQUIRE_CLOSE((double) out[7], 2.5, 1e-5); + BOOST_REQUIRE_CLOSE((double) out[8], 3.0, 1e-5); + BOOST_REQUIRE_CLOSE((double) out[9], 10.0, 1e-5); +} + +BOOST_AUTO_TEST_CASE(RandomSparseRowSortTest) +{ + sp_rowvec sc; + sc.sprandu(1, 1000, 0.6); + + rowvec c(sc); + + // Sort both. + sp_rowvec sout = sort(sc); + rowvec out = sort(c); + + // Check that the results are equivalent. + for (size_t i = 0; i < 1000; ++i) + { + if (out[i] < 1e-5) + BOOST_REQUIRE_SMALL((double) sout[i], 1e-5); + else + BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5); + } +} + +// These two don't work: we need an overload of sort() for sparse matrices that +// takes a dimension to sort in, just like the dense version. +/* +BOOST_AUTO_TEST_CASE(SparseRandomMatrixSortTest) +{ + sp_mat sc; + sc.sprandu(50, 50, 0.6); + + mat c(sc); + + // Sort both. + sp_mat sout = sort(sc, 0); + mat out = sort(c, 0); + + // Check that both results are equivalent. + for (size_t i = 0; i < 1000; ++i) + { + if (out[i] < 1e-5) + BOOST_REQUIRE_SMALL((double) sout[i], 1e-5); + else + BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5); + } +} + +BOOST_AUTO_TEST_CASE(SparseRandomMatrixSortRowTest) +{ + sp_mat sc; + sc.sprandu(50, 50, 0.6); + + mat c(sc); + + // Sort both. + sp_mat sout = sort(sc, 1); + mat out = sort(c, 1); + + // Check that both results are equivalent. + for (size_t i = 0; i < 1000; ++i) + { + if (out[i] < 1e-5) + BOOST_REQUIRE_SMALL((double) sout[i], 1e-5); + else + BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5); + } +} +*/ + +BOOST_AUTO_TEST_SUITE_END();