Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arma sorting work with SpMat. #808

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mlpack/core/arma_extend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(SOURCES
arma_extend.hpp
fn_ccov.hpp
fn_ind2sub.hpp
fn_sort_sparse.hpp
glue_ccov_meat.hpp
glue_ccov_proto.hpp
hdf5_misc.hpp
Expand Down
3 changes: 3 additions & 0 deletions src/mlpack/core/arma_extend/arma_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ namespace arma {

// unary minus for sparse matrices
#include "operator_minus.hpp"

// sorting of sparse matrices
#include "fn_sort_sparse.hpp"
};

#endif
45 changes: 45 additions & 0 deletions src/mlpack/core/arma_extend/fn_sort_sparse.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//! \addtogroup fn_sort_sparse
//! @{

/**
* @file fn_sort_sparse.hpp
* @author Ivan Georgiev (ivan@jonan.info)
*
* Sorting of sparse matrices as extension to arma library.
*/

template <typename ElemType>
SpMat<ElemType> sort(const SpMat<ElemType>& data)
{
// Construct the vector of values.
std::vector<ElemType> valsVec(data.begin(), data.end());

// ... and sort it!
std::sort(valsVec.begin(), valsVec.end());

// Now prepare the structures for the batch construction of the
// sorted sparse matrix.
arma::umat locations(2, data.n_nonzero);
arma::Col<ElemType> vals(data.n_nonzero);
ElemType lastVal = -std::numeric_limits<ElemType>::max();
size_t padding = 0;

for (size_t ii = 0; ii < valsVec.size(); ++ii)
{
const ElemType newVal = valsVec[ii];
if (lastVal < ElemType(0) && newVal > ElemType(0))
{
assert(padding == 0); // we should arrive here once!
padding = data.n_elem - data.n_nonzero;
}

locations.at(0, ii) = (ii + padding) % data.n_rows;
locations.at(1, ii) = (ii + padding) / data.n_rows;
vals.at(ii) = lastVal = newVal;
}

return SpMat<ElemType>(locations, vals, data.n_rows, data.n_cols, false, false);
};


//! @}
1 change: 1 addition & 0 deletions src/mlpack/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ add_executable(mlpack_test
sort_policy_test.cpp
sparse_autoencoder_test.cpp
sparse_coding_test.cpp
sparse_sort_test.cpp
spill_tree_test.cpp
split_data_test.cpp
svd_batch_test.cpp
Expand Down
153 changes: 153 additions & 0 deletions src/mlpack/tests/sparse_sort_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/**
* @file sparse_sort_test.cpp
* @author Ryan Curtin
*
* Some tests for the Armadillo sparse sorting code.
*/
#include <mlpack/core.hpp>
#include <boost/test/unit_test.hpp>
#include "test_tools.hpp"

using namespace arma;

BOOST_AUTO_TEST_SUITE(SparseSortTest);

BOOST_AUTO_TEST_CASE(SimpleSparseVectorSortTest)
{
sp_vec sc(10);
sc[2] = 10.0;
sc[5] = 3.0;
sc[6] = -1.0;
sc[8] = 2.5;
sc[9] = 0.3;

// Sort the vector.
sp_vec out = sort(sc);

// Check that the output is in the right order.
BOOST_REQUIRE_CLOSE((double) out[0], -1.0, 1e-5);
BOOST_REQUIRE_SMALL((double) out[1], 1e-5);
BOOST_REQUIRE_SMALL((double) out[2], 1e-5);
BOOST_REQUIRE_SMALL((double) out[3], 1e-5);
BOOST_REQUIRE_SMALL((double) out[4], 1e-5);
BOOST_REQUIRE_SMALL((double) out[5], 1e-5);
BOOST_REQUIRE_CLOSE((double) out[6], 0.3, 1e-5);
BOOST_REQUIRE_CLOSE((double) out[7], 2.5, 1e-5);
BOOST_REQUIRE_CLOSE((double) out[8], 3.0, 1e-5);
BOOST_REQUIRE_CLOSE((double) out[9], 10.0, 1e-5);
}

BOOST_AUTO_TEST_CASE(RandomSparseVectorSortTest)
{
sp_vec sc;
sc.sprandu(1000, 1, 0.6);

vec c(sc);

// Sort both.
sp_vec sout = sort(sc);
vec out = sort(c);

// Check that the results are equivalent.
for (size_t i = 0; i < 1000; ++i)
{
if (out[i] < 1e-5)
BOOST_REQUIRE_SMALL((double) sout[i], 1e-5);
else
BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5);
}
}

BOOST_AUTO_TEST_CASE(SimpleSparseRowSortTest)
{
sp_rowvec sc(10);
sc[2] = 10.0;
sc[5] = 3.0;
sc[6] = -1.0;
sc[8] = 2.5;
sc[9] = 0.3;

// Sort the vector.
sp_rowvec out = sort(sc);

// Check that the output is in the right order.
BOOST_REQUIRE_CLOSE((double) out[0], -1.0, 1e-5);
BOOST_REQUIRE_SMALL((double) out[1], 1e-5);
BOOST_REQUIRE_SMALL((double) out[2], 1e-5);
BOOST_REQUIRE_SMALL((double) out[3], 1e-5);
BOOST_REQUIRE_SMALL((double) out[4], 1e-5);
BOOST_REQUIRE_SMALL((double) out[5], 1e-5);
BOOST_REQUIRE_CLOSE((double) out[6], 0.3, 1e-5);
BOOST_REQUIRE_CLOSE((double) out[7], 2.5, 1e-5);
BOOST_REQUIRE_CLOSE((double) out[8], 3.0, 1e-5);
BOOST_REQUIRE_CLOSE((double) out[9], 10.0, 1e-5);
}

BOOST_AUTO_TEST_CASE(RandomSparseRowSortTest)
{
sp_rowvec sc;
sc.sprandu(1, 1000, 0.6);

rowvec c(sc);

// Sort both.
sp_rowvec sout = sort(sc);
rowvec out = sort(c);

// Check that the results are equivalent.
for (size_t i = 0; i < 1000; ++i)
{
if (out[i] < 1e-5)
BOOST_REQUIRE_SMALL((double) sout[i], 1e-5);
else
BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5);
}
}

// These two don't work: we need an overload of sort() for sparse matrices that
// takes a dimension to sort in, just like the dense version.
/*
BOOST_AUTO_TEST_CASE(SparseRandomMatrixSortTest)
{
sp_mat sc;
sc.sprandu(50, 50, 0.6);

mat c(sc);

// Sort both.
sp_mat sout = sort(sc, 0);
mat out = sort(c, 0);

// Check that both results are equivalent.
for (size_t i = 0; i < 1000; ++i)
{
if (out[i] < 1e-5)
BOOST_REQUIRE_SMALL((double) sout[i], 1e-5);
else
BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5);
}
}

BOOST_AUTO_TEST_CASE(SparseRandomMatrixSortRowTest)
{
sp_mat sc;
sc.sprandu(50, 50, 0.6);

mat c(sc);

// Sort both.
sp_mat sout = sort(sc, 1);
mat out = sort(c, 1);

// Check that both results are equivalent.
for (size_t i = 0; i < 1000; ++i)
{
if (out[i] < 1e-5)
BOOST_REQUIRE_SMALL((double) sout[i], 1e-5);
else
BOOST_REQUIRE_CLOSE(out[i], (double) sout[i], 1e-5);
}
}
*/

BOOST_AUTO_TEST_SUITE_END();