From 72999dd6730801f011e82d46656e5d33f0cbd20f Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Sun, 19 Jun 2016 14:17:56 +0300 Subject: [PATCH 01/23] Implements parallel query processing for LSH --- src/mlpack/methods/lsh/lsh_search.hpp | 14 ++++++++ src/mlpack/methods/lsh/lsh_search_impl.hpp | 33 +++++++++++++++--- src/mlpack/tests/lsh_test.cpp | 39 ++++++++++++++++++++++ 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp index ad285eab48f..9898d844aca 100644 --- a/src/mlpack/methods/lsh/lsh_search.hpp +++ b/src/mlpack/methods/lsh/lsh_search.hpp @@ -189,6 +189,7 @@ class LSHSearch arma::mat& distances, const size_t numTablesToSearch = 0); + /** * Serialize the LSH model. * @@ -236,6 +237,12 @@ class LSHSearch //! removed in mlpack 2.1.0! const arma::mat& Projection(size_t i) { return projections.slice(i); } + //! Set the maximum number of threads the object is allowed to use + void MaxThreads(size_t numThreads) { maxThreads = numThreads;} + + //! Return the current maxumum threads the object is allowed to use + size_t MaxThreads(void) const { return maxThreads; } + private: /** * This function takes a query and hashes it into each of the hash tables to @@ -350,6 +357,13 @@ class LSHSearch //! The number of distance evaluations. size_t distanceEvaluations; + + //! The maximum number of threads allowed. + size_t maxThreads; + + //! The number of threads currently in use. + size_t numThreadsUsed; + }; // class LSHSearch } // namespace neighbor diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index b34d22c4880..bd98f6418e2 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -9,6 +9,8 @@ #include +using std::cout; using std::endl; //TODO: remove + namespace mlpack { namespace neighbor { @@ -28,7 +30,9 @@ LSHSearch(const arma::mat& referenceSet, hashWidth(hashWidthIn), secondHashSize(secondHashSize), bucketSize(bucketSize), - distanceEvaluations(0) + distanceEvaluations(0), + maxThreads(omp_get_max_threads()), + numThreadsUsed(0) { // Pass work to training function. Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, @@ -50,7 +54,9 @@ LSHSearch(const arma::mat& referenceSet, hashWidth(hashWidthIn), secondHashSize(secondHashSize), bucketSize(bucketSize), - distanceEvaluations(0) + distanceEvaluations(0), + maxThreads(omp_get_max_threads()), + numThreadsUsed(0) { // Pass work to training function Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, @@ -67,7 +73,9 @@ LSHSearch::LSHSearch() : hashWidth(0), secondHashSize(99901), bucketSize(500), - distanceEvaluations(0) + distanceEvaluations(0), + maxThreads(omp_get_max_threads()), + numThreadsUsed(0) { // Nothing to do. } @@ -522,13 +530,28 @@ Search(const size_t k, distances.fill(SortPolicy::WorstDistance()); resultingNeighbors.fill(referenceSet->n_cols); + size_t avgIndicesReturned = 0; Timer::Start("computing_neighbors"); - // Go through every query point sequentially. + // Parallelization allows us to process more than one query at a time. To + // control workload and thread access, we use numThreadsUsed and maxThreads to + // make sure we only use as many threads as the user specified. + #pragma omp parallel for \ + if (numThreadsUsed <= maxThreads) \ + num_threads (maxThreads-numThreadsUsed)\ + shared(avgIndicesReturned, resultingNeighbors, distances) \ + schedule(dynamic) + // Go through every query point. for (size_t i = 0; i < referenceSet->n_cols; i++) { + // Master thread updates the number of threads used + if (i == 0 && omp_get_thread_num() == 0) + { + numThreadsUsed+=omp_get_num_threads(); + cout<<"Using "< sequentialNeighbors; + arma::Mat parallelNeighbors; + arma::mat distances; + + // Construct an LSH object. By default, it uses the maximum number of threads + LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters + lshTest.Search(qdata, k, parallelNeighbors, distances); + + // Now perform same search but with 1 thread + lshTest.MaxThreads(1); + lshTest.Search(qdata, k, sequentialNeighbors, distances); + + // Require both have same results + double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors); + BOOST_REQUIRE_EQUAL(recall, 1); + +} + + BOOST_AUTO_TEST_CASE(LSHTrainTest) { // This is a not very good test that simply checks that the re-trained LSH From 95d417f59e411d629dc9b74fe5e5b0f0b5c01626 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Sun, 19 Jun 2016 14:21:17 +0300 Subject: [PATCH 02/23] Implements parallel query processing for LSH --- src/mlpack/methods/lsh/lsh_search_impl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index bd98f6418e2..a7e3be7a9b3 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -550,7 +550,8 @@ Search(const size_t k, if (i == 0 && omp_get_thread_num() == 0) { numThreadsUsed+=omp_get_num_threads(); - cout<<"Using "< Date: Sun, 19 Jun 2016 14:44:57 +0300 Subject: [PATCH 03/23] Book-keeping of number of threads used --- src/mlpack/methods/lsh/lsh_search_impl.hpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index a7e3be7a9b3..8ac794c7fdb 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -32,7 +32,7 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0), maxThreads(omp_get_max_threads()), - numThreadsUsed(0) + numThreadsUsed(1) { // Pass work to training function. Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, @@ -56,7 +56,7 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0), maxThreads(omp_get_max_threads()), - numThreadsUsed(0) + numThreadsUsed(1) { // Pass work to training function Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, @@ -75,7 +75,7 @@ LSHSearch::LSHSearch() : bucketSize(500), distanceEvaluations(0), maxThreads(omp_get_max_threads()), - numThreadsUsed(0) + numThreadsUsed(1) { // Nothing to do. } @@ -549,7 +549,7 @@ Search(const size_t k, // Master thread updates the number of threads used if (i == 0 && omp_get_thread_num() == 0) { - numThreadsUsed+=omp_get_num_threads(); + numThreadsUsed+=omp_get_num_threads(); // Log::Info << "Using "<< numThreadsUsed << " threads to process queries." << endl; } @@ -568,8 +568,13 @@ Search(const size_t k, // candidates. for (size_t j = 0; j < refIndices.n_elem; j++) BaseCase(i, (size_t) refIndices[j], resultingNeighbors, distances); + } + // parallel region over, reset number of threads to 1 + numThreadsUsed = omp_get_num_threads(); + + Timer::Stop("computing_neighbors"); distanceEvaluations += avgIndicesReturned; From 5db5423ae584814d66b4d3da84d4031a205d1389 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Sun, 19 Jun 2016 14:56:09 +0300 Subject: [PATCH 04/23] Adds parallelization to bichromatic search --- src/mlpack/methods/lsh/lsh_search_impl.hpp | 25 ++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 8ac794c7fdb..571e548e707 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -489,9 +489,26 @@ void LSHSearch::Search(const arma::mat& querySet, Timer::Start("computing_neighbors"); - // Go through every query point sequentially. + // Parallelization allows us to process more than one query at a time. To + // control workload and thread access, we use numThreadsUsed and maxThreads to + // make sure we only use as many threads as the user specified. + #pragma omp parallel for \ + if (numThreadsUsed <= maxThreads) \ + num_threads (maxThreads-numThreadsUsed)\ + shared(avgIndicesReturned, resultingNeighbors, distances) \ + schedule(dynamic) + + // Go through every query point. for (size_t i = 0; i < querySet.n_cols; i++) { + // Master thread updates the number of threads used + if (i == 0 && omp_get_thread_num() == 0) + { + numThreadsUsed+=omp_get_num_threads(); + Log::Info + << "Using "<< numThreadsUsed << " threads to process queries." << endl; + } + // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; @@ -499,6 +516,8 @@ void LSHSearch::Search(const arma::mat& querySet, // An informative book-keeping for the number of neighbor candidates // returned on average. + // Make atomic to avoid race conditions when multiple threads are running + #pragma omp atomic avgIndicesReturned += refIndices.n_elem; // Sequentially go through all the candidates and save the best 'k' @@ -507,6 +526,8 @@ void LSHSearch::Search(const arma::mat& querySet, BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors, distances); } + // parallel region over, reset number of threads to 1 + numThreadsUsed = omp_get_num_threads(); Timer::Stop("computing_neighbors"); @@ -549,7 +570,7 @@ Search(const size_t k, // Master thread updates the number of threads used if (i == 0 && omp_get_thread_num() == 0) { - numThreadsUsed+=omp_get_num_threads(); // + numThreadsUsed+=omp_get_num_threads(); Log::Info << "Using "<< numThreadsUsed << " threads to process queries." << endl; } From abef5045c4ac6f352bbc33897b4130276d099800 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Sun, 19 Jun 2016 15:01:38 +0300 Subject: [PATCH 05/23] Adds Bichromatic Parallel Test --- src/mlpack/tests/lsh_test.cpp | 38 ++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp index 8ddb7d39a21..2307ec85222 100644 --- a/src/mlpack/tests/lsh_test.cpp +++ b/src/mlpack/tests/lsh_test.cpp @@ -481,9 +481,9 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge) /** * Test: This test verifies that parallel query processing returns correct - * results. + * results for the bichromatic search. */ -BOOST_AUTO_TEST_CASE(ParallelQueryTest) +BOOST_AUTO_TEST_CASE(ParallelBichromatic) { // kNN and LSH parameters (use LSH default parameters). const int k = 4; @@ -514,9 +514,41 @@ BOOST_AUTO_TEST_CASE(ParallelQueryTest) // Require both have same results double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors); BOOST_REQUIRE_EQUAL(recall, 1); - } +/** + * Test: This test verifies that parallel query processing returns correct + * results for the monochromatic search. + */ +BOOST_AUTO_TEST_CASE(ParallelMonochromatic) +{ + // kNN and LSH parameters (use LSH default parameters). + const int k = 4; + const int numTables = 16; + const int numProj = 3; + + // Read iris training data as reference and query set. + const string trainSet = "iris_train.csv"; + arma::mat rdata; + data::Load(trainSet, rdata, true); + + // Where to store neighbors and distances + arma::Mat sequentialNeighbors; + arma::Mat parallelNeighbors; + arma::mat distances; + + // Construct an LSH object. By default, it uses the maximum number of threads + LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters + lshTest.Search(k, parallelNeighbors, distances); + + // Now perform same search but with 1 thread + lshTest.MaxThreads(1); + lshTest.Search(k, sequentialNeighbors, distances); + + // Require both have same results + double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors); + BOOST_REQUIRE_EQUAL(recall, 1); +} BOOST_AUTO_TEST_CASE(LSHTrainTest) { From a60ff919012d6c76f5ed5155558f87b8c07b7e1b Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 10:39:29 +0100 Subject: [PATCH 06/23] Adds parallelism option to CMakeFiles, removes most omp.h dependence --- CMakeLists.txt | 12 ++- src/mlpack/core.hpp | 5 ++ src/mlpack/methods/lsh/lsh_search.hpp | 2 +- src/mlpack/methods/lsh/lsh_search_impl.hpp | 97 ++++++++++++++-------- src/mlpack/tests/lsh_test.cpp | 4 +- 5 files changed, 79 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b5ce3bca7c7..855212ef55a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,15 +18,16 @@ endif() # First, define all the compilation options. # We default to debugging mode for developers. -option(DEBUG "Compile with debugging information" ON) -option(PROFILE "Compile with profiling information" ON) +option(DEBUG "Compile with debugging information." ON) +option(PROFILE "Compile with profiling information." ON) option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF) option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF) option(TEST_VERBOSE "Run test cases with verbose output." OFF) option(BUILD_TESTS "Build tests." ON) option(BUILD_CLI_EXECUTABLES "Build command-line executables" ON) option(BUILD_SHARED_LIBS - "Compile shared libraries (if OFF, static libraries are compiled)" ON) + "Compile shared libraries (if OFF, static libraries are compiled)." ON) +option(HAS_OPENMP "Use OpenMP for parallel execution." OFF) enable_testing() @@ -117,6 +118,11 @@ if(ARMA_EXTRA_DEBUG) add_definitions(-DARMA_EXTRA_DEBUG) endif() +# If the user has an OpenMP-enabled compiler, turn OpenMP on +if (HAS_OPENMP) + add_definitions(-DHAS_OPENMP) +endif() + # Now, find the libraries we need to compile against. Several variables can be # set to manually specify the directory in which each of these libraries # resides. diff --git a/src/mlpack/core.hpp b/src/mlpack/core.hpp index 6f5c181c061..a3a6b0490e2 100644 --- a/src/mlpack/core.hpp +++ b/src/mlpack/core.hpp @@ -232,6 +232,11 @@ #include #include +// Use OpenMP if compiled with -DHAS_OPENMP. +#ifdef HAS_OPENMP + #include +#endif + // Use Armadillo's C++ version detection. #ifdef ARMA_USE_CXX11 #define MLPACK_USE_CX11 diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp index e1767cba88e..799ee2125ae 100644 --- a/src/mlpack/methods/lsh/lsh_search.hpp +++ b/src/mlpack/methods/lsh/lsh_search.hpp @@ -270,7 +270,7 @@ class LSHSearch template void ReturnIndicesFromTable(const VecType& queryPoint, arma::uvec& referenceIndices, - size_t numTablesToSearch) const; + size_t numTablesToSearch); /** * This is a helper function that computes the distance of the query to the diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index d780de336f7..a81fa552a0e 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -14,6 +14,16 @@ using std::cout; using std::endl; //TODO: remove namespace mlpack { namespace neighbor { +// Simple small function to set threads to 1 if OpenMP is not used +inline size_t DefineMaxThreads() +{ + #ifdef _OPENMP + return omp_get_max_threads(); + #else + return 1; + #endif +} + // Construct the object with random tables template LSHSearch:: @@ -31,7 +41,7 @@ LSHSearch(const arma::mat& referenceSet, secondHashSize(secondHashSize), bucketSize(bucketSize), distanceEvaluations(0), - maxThreads(omp_get_max_threads()), + maxThreads(DefineMaxThreads()), numThreadsUsed(1) { // Pass work to training function. @@ -344,7 +354,7 @@ template void LSHSearch::ReturnIndicesFromTable( const VecType& queryPoint, arma::uvec& referenceIndices, - size_t numTablesToSearch) const + size_t numTablesToSearch) { // Decide on the number of tables to look into. if (numTablesToSearch == 0) // If no user input is given, search all. @@ -406,18 +416,37 @@ void LSHSearch::ReturnIndicesFromTable( arma::Col refPointsConsidered; refPointsConsidered.zeros(referenceSet->n_cols); - for (size_t i = 0; i < hashVec.n_elem; ++i) + // Define the number of threads used to process this. + size_t numThreadsUsed = std::min(maxThreads, numTablesToSearch); + + // Parallelization: By default nested parallelism is off, so this won't be + // parallel. The user might turn nested parallelism on if (for example) they + // have a query-by-query processing scheme and so processing more than one + // query at the same time doesn't make sense for them. + + #pragma omp parallel for \ + num_threads (numThreadsUsed) \ + shared (hashVec, refPointsConsidered) \ + schedule(dynamic) + for (size_t i = 0; i < numTablesToSearch; ++i) { + const size_t hashInd = (size_t) hashVec[i]; const size_t tableRow = bucketRowInHashTable[hashInd]; // Pick the indices in the bucket corresponding to 'hashInd'. if (tableRow != secondHashSize) + { for (size_t j = 0; j < bucketContentSize[tableRow]; j++) + { + #pragma omp atomic refPointsConsidered[secondHashTable[tableRow](j)]++; + } + } } // Only keep reference points found in at least one bucket. + // TODO: maybe write parallel implementation of this? referenceIndices = arma::find(refPointsConsidered > 0); return; } @@ -431,6 +460,19 @@ void LSHSearch::ReturnIndicesFromTable( // Retrieve candidates. size_t start = 0; + + // Define the number of threads used to process this. + size_t numThreadsUsed = std::min(maxThreads, numTablesToSearch); + + // Parallelization: By default nested parallelism is off, so this won't be + // parallel. The user might turn nested parallelism on if (for example) they + // have a query-by-query processing scheme and so processing more than one + // query at the same time doesn't make sense for them. + + #pragma omp parallel for \ + num_threads (numThreadsUsed) \ + shared (hashVec, refPointsConsideredSmall, start) \ + schedule(dynamic) for (size_t i = 0; i < numTablesToSearch; ++i) // For all tables { const size_t hashInd = (size_t) hashVec[i]; // Find the query's bucket. @@ -438,11 +480,19 @@ void LSHSearch::ReturnIndicesFromTable( // Store all secondHashTable points in the candidates set. if (tableRow != secondHashSize) + { for (size_t j = 0; j < bucketContentSize[tableRow]; ++j) - refPointsConsideredSmall(start++) = secondHashTable[tableRow][j]; + { + #pragma omp critical + { + refPointsConsideredSmall(start++) = secondHashTable[tableRow][j]; + } + } + } } // Only keep unique candidates. + // TODO: again main bottleneck is here. Parallelize? referenceIndices = arma::unique(refPointsConsideredSmall); return; } @@ -489,25 +539,16 @@ void LSHSearch::Search(const arma::mat& querySet, Timer::Start("computing_neighbors"); - // Parallelization allows us to process more than one query at a time. To - // control workload and thread access, we use numThreadsUsed and maxThreads to - // make sure we only use as many threads as the user specified. + // Parallelization to process more than one query at a time. + // use as many threads possible but not more than allowed number + size_t numThreadsUsed = std::min( (arma::uword) maxThreads, querySet.n_cols ); #pragma omp parallel for \ - if (numThreadsUsed <= maxThreads) \ - num_threads (maxThreads-numThreadsUsed)\ + num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) - // Go through every query point. for (size_t i = 0; i < querySet.n_cols; i++) { - // Master thread updates the number of threads used - if (i == 0 && omp_get_thread_num() == 0) - { - numThreadsUsed+=omp_get_num_threads(); - Log::Info - << "Using "<< numThreadsUsed << " threads to process queries." << endl; - } // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. @@ -526,8 +567,6 @@ void LSHSearch::Search(const arma::mat& querySet, BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors, distances); } - // parallel region over, reset number of threads to 1 - numThreadsUsed = omp_get_num_threads(); Timer::Stop("computing_neighbors"); @@ -556,24 +595,16 @@ Search(const size_t k, Timer::Start("computing_neighbors"); - // Parallelization allows us to process more than one query at a time. To - // control workload and thread access, we use numThreadsUsed and maxThreads to - // make sure we only use as many threads as the user specified. + // Parallelization to process more than one query at a time. + // use as many threads possible but not more than allowed number + size_t numThreadsUsed = std::min( (arma::uword) maxThreads, referenceSet->n_cols ); #pragma omp parallel for \ - if (numThreadsUsed <= maxThreads) \ - num_threads (maxThreads-numThreadsUsed)\ + num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) // Go through every query point. for (size_t i = 0; i < referenceSet->n_cols; i++) { - // Master thread updates the number of threads used - if (i == 0 && omp_get_thread_num() == 0) - { - numThreadsUsed+=omp_get_num_threads(); - Log::Info - << "Using "<< numThreadsUsed << " threads to process queries." << endl; - } // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; @@ -592,10 +623,6 @@ Search(const size_t k, } - // parallel region over, reset number of threads to 1 - numThreadsUsed = omp_get_num_threads(); - - Timer::Stop("computing_neighbors"); distanceEvaluations += avgIndicesReturned; diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp index 52ef1c97708..ec2e394f5f4 100644 --- a/src/mlpack/tests/lsh_test.cpp +++ b/src/mlpack/tests/lsh_test.cpp @@ -499,7 +499,7 @@ BOOST_AUTO_TEST_CASE(ParallelBichromatic) lshTest.Search(qdata, k, sequentialNeighbors, distances); // Require both have same results - double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors); + double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); BOOST_REQUIRE_EQUAL(recall, 1); } @@ -533,7 +533,7 @@ BOOST_AUTO_TEST_CASE(ParallelMonochromatic) lshTest.Search(k, sequentialNeighbors, distances); // Require both have same results - double recall = ComputeRecall(sequentialNeighbors, parallelNeighbors); + double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); BOOST_REQUIRE_EQUAL(recall, 1); } From 6152527d44f3d30ef529f8cfcbd6bf300e29ceb3 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 11:01:30 +0100 Subject: [PATCH 07/23] Removes numThreadsUsed variable, changes how maxThreads is initialized --- src/mlpack/methods/lsh/lsh_search_impl.hpp | 70 ++++++++++++---------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index a81fa552a0e..6ec13d83e32 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -9,20 +9,9 @@ #include -using std::cout; using std::endl; //TODO: remove - namespace mlpack { namespace neighbor { -// Simple small function to set threads to 1 if OpenMP is not used -inline size_t DefineMaxThreads() -{ - #ifdef _OPENMP - return omp_get_max_threads(); - #else - return 1; - #endif -} // Construct the object with random tables template @@ -40,10 +29,14 @@ LSHSearch(const arma::mat& referenceSet, hashWidth(hashWidthIn), secondHashSize(secondHashSize), bucketSize(bucketSize), - distanceEvaluations(0), - maxThreads(DefineMaxThreads()), - numThreadsUsed(1) + distanceEvaluations(0) { + + #ifdef _OPENMP + maxThreads = omp_get_max_threads(); + #else + maxThreads = 1; + #endif // Pass work to training function. Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, bucketSize); @@ -64,10 +57,13 @@ LSHSearch(const arma::mat& referenceSet, hashWidth(hashWidthIn), secondHashSize(secondHashSize), bucketSize(bucketSize), - distanceEvaluations(0), - maxThreads(omp_get_max_threads()), - numThreadsUsed(1) + distanceEvaluations(0) { + #ifdef _OPENMP + maxThreads = omp_get_max_threads(); + #else + maxThreads = 1; + #endif // Pass work to training function Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, bucketSize, projections); @@ -83,11 +79,14 @@ LSHSearch::LSHSearch() : hashWidth(0), secondHashSize(99901), bucketSize(500), - distanceEvaluations(0), - maxThreads(omp_get_max_threads()), - numThreadsUsed(1) + distanceEvaluations(0) { - // Nothing to do. + // Only define maxThreads. Nothing else to do. + #ifdef _OPENMP + maxThreads = omp_get_max_threads(); + #else + maxThreads = 1; + #endif } // Destructor. @@ -445,10 +444,15 @@ void LSHSearch::ReturnIndicesFromTable( } } - // Only keep reference points found in at least one bucket. - // TODO: maybe write parallel implementation of this? - referenceIndices = arma::find(refPointsConsidered > 0); - return; + // Only keep reference points found in at least one bucket. If OpenMP is + // found, do it in parallel + #ifdef _OPENMP + referenceIndices = arma::find(refPointsConsidered > 0); + return; + #else + referenceIndices = OmpFind(refPointsConsideredSmall); + return; + #endif } else { @@ -491,10 +495,14 @@ void LSHSearch::ReturnIndicesFromTable( } } - // Only keep unique candidates. - // TODO: again main bottleneck is here. Parallelize? - referenceIndices = arma::unique(refPointsConsideredSmall); - return; + // Only keep unique candidates. If OpenMP is found, do it in parallel. + #ifdef _OPENMP + referenceIndices = arma::unique(refPointsConsideredSmall); + return; + #else + referenceIndices = OmpUnique(refPointsConsideredSmall); + return; + #endif } } @@ -611,8 +619,8 @@ Search(const size_t k, ReturnIndicesFromTable(referenceSet->col(i), refIndices, numTablesToSearch); // An informative book-keeping for the number of neighbor candidates - // returned on average. - // Make atomic to avoid race conditions when multiple threads are running + // returned on average. Make atomic to avoid race conditions when multiple + // threads are running. #pragma omp atomic avgIndicesReturned += refIndices.n_elem; From 7cf77cdffbf26f84cc34adbcae54848182a90778 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 11:08:34 +0100 Subject: [PATCH 08/23] Changes placeholder code for OpenMP --- src/mlpack/methods/lsh/lsh_search_impl.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 6ec13d83e32..0a20bb3197c 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -447,10 +447,11 @@ void LSHSearch::ReturnIndicesFromTable( // Only keep reference points found in at least one bucket. If OpenMP is // found, do it in parallel #ifdef _OPENMP + // TODO: change this to our own function? referenceIndices = arma::find(refPointsConsidered > 0); return; #else - referenceIndices = OmpFind(refPointsConsideredSmall); + referenceIndices = arma::find(refPointsConsidered > 0); return; #endif } @@ -497,10 +498,11 @@ void LSHSearch::ReturnIndicesFromTable( // Only keep unique candidates. If OpenMP is found, do it in parallel. #ifdef _OPENMP + // TODO: change this to our own function? referenceIndices = arma::unique(refPointsConsideredSmall); return; #else - referenceIndices = OmpUnique(refPointsConsideredSmall); + referenceIndices = arma::unique(refPointsConsideredSmall); return; #endif } From 2ca48c63f7aba8def3f08e96d155cd9be2b8f7d5 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 13:15:37 +0100 Subject: [PATCH 09/23] Fixes CMakeFiles and code to make openMP actually transparent --- CMakeLists.txt | 18 ++++++++---------- src/mlpack/methods/lsh/lsh_search_impl.hpp | 10 +++++----- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 855212ef55a..2dba9e56f74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ option(BUILD_TESTS "Build tests." ON) option(BUILD_CLI_EXECUTABLES "Build command-line executables" ON) option(BUILD_SHARED_LIBS "Compile shared libraries (if OFF, static libraries are compiled)." ON) -option(HAS_OPENMP "Use OpenMP for parallel execution." OFF) +option(HAS_OPENMP "Use OpenMP for parallel execution, if available" ON) enable_testing() @@ -118,11 +118,6 @@ if(ARMA_EXTRA_DEBUG) add_definitions(-DARMA_EXTRA_DEBUG) endif() -# If the user has an OpenMP-enabled compiler, turn OpenMP on -if (HAS_OPENMP) - add_definitions(-DHAS_OPENMP) -endif() - # Now, find the libraries we need to compile against. Several variables can be # set to manually specify the directory in which each of these libraries # resides. @@ -239,10 +234,13 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # Detect OpenMP support in a compiler. If the compiler supports OpenMP, the # flags to compile with OpenMP are returned and added. -find_package(OpenMP) -if (OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +if (HAS_OPENMP) + find_package(OpenMP) + if (OPENMP_FOUND) + add_definitions(-DOPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + endif () endif () # Create a 'distclean' target in case the user is using an in-source build for diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 0a20bb3197c..17886dcfffd 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -32,7 +32,7 @@ LSHSearch(const arma::mat& referenceSet, distanceEvaluations(0) { - #ifdef _OPENMP + #ifdef OPENMP_FOUND maxThreads = omp_get_max_threads(); #else maxThreads = 1; @@ -59,7 +59,7 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0) { - #ifdef _OPENMP + #ifdef OPENMP_FOUND maxThreads = omp_get_max_threads(); #else maxThreads = 1; @@ -82,7 +82,7 @@ LSHSearch::LSHSearch() : distanceEvaluations(0) { // Only define maxThreads. Nothing else to do. - #ifdef _OPENMP + #ifdef OPENMP_FOUND maxThreads = omp_get_max_threads(); #else maxThreads = 1; @@ -446,7 +446,7 @@ void LSHSearch::ReturnIndicesFromTable( // Only keep reference points found in at least one bucket. If OpenMP is // found, do it in parallel - #ifdef _OPENMP + #ifdef OPENMP_FOUND // TODO: change this to our own function? referenceIndices = arma::find(refPointsConsidered > 0); return; @@ -497,7 +497,7 @@ void LSHSearch::ReturnIndicesFromTable( } // Only keep unique candidates. If OpenMP is found, do it in parallel. - #ifdef _OPENMP + #ifdef OPENMP_FOUND // TODO: change this to our own function? referenceIndices = arma::unique(refPointsConsideredSmall); return; From a6aca410001a4161018de08d4ff4322f68bf1dce Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 13:56:13 +0100 Subject: [PATCH 10/23] CMake trickery to maybe make travis not crash --- CMakeLists.txt | 18 ++++++++------ src/mlpack/methods/lsh/lsh_search_impl.hpp | 28 ++++++++++------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dba9e56f74..ad267713831 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -235,13 +235,17 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # Detect OpenMP support in a compiler. If the compiler supports OpenMP, the # flags to compile with OpenMP are returned and added. if (HAS_OPENMP) - find_package(OpenMP) - if (OPENMP_FOUND) - add_definitions(-DOPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - endif () -endif () + add_definitions(-DHAS_OPENMP) + find_package(OpenMP) + if (OPENMP_FOUND) + add_definitions(-DOPENMP_FOUND) + set(HAS_OPENMP "1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else () + set(HAS_OPENMP "0") + endif () +endif() # Create a 'distclean' target in case the user is using an in-source build for # some reason. diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 17886dcfffd..0e8b11fbcbe 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -12,6 +12,15 @@ namespace mlpack { namespace neighbor { +inline size_t CalculateMaxThreads() +{ + #ifdef OPENMP_FOUND + if (HAS_OPENMP) + return omp_get_max_threads(); + return 1; + #endif + return 1; +} // Construct the object with random tables template @@ -31,12 +40,7 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0) { - - #ifdef OPENMP_FOUND - maxThreads = omp_get_max_threads(); - #else - maxThreads = 1; - #endif + maxThreads = CalculateMaxThreads(); // Pass work to training function. Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, bucketSize); @@ -59,11 +63,7 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0) { - #ifdef OPENMP_FOUND - maxThreads = omp_get_max_threads(); - #else - maxThreads = 1; - #endif + maxThreads = CalculateMaxThreads(); // Pass work to training function Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, bucketSize, projections); @@ -82,11 +82,7 @@ LSHSearch::LSHSearch() : distanceEvaluations(0) { // Only define maxThreads. Nothing else to do. - #ifdef OPENMP_FOUND - maxThreads = omp_get_max_threads(); - #else - maxThreads = 1; - #endif + maxThreads = CalculateMaxThreads(); } // Destructor. From 3d536c7ccae4b944d71137d030fa802877643d81 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 15:03:43 +0100 Subject: [PATCH 11/23] Simplifies OpenMP transparency code --- CMakeLists.txt | 10 +++++++--- src/mlpack/methods/lsh/lsh_search_impl.hpp | 11 ++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad267713831..1b0cec2ea0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -232,13 +232,17 @@ endif () # library. add_definitions(-DBOOST_TEST_DYN_LINK) -# Detect OpenMP support in a compiler. If the compiler supports OpenMP, the -# flags to compile with OpenMP are returned and added. +# Detect OpenMP support in a compiler. If the compiler supports OpenMP, and the +# user has not specified that OpenMP should not be used, flags to compile with +# OpenMP are returned and added and the HAS_OPENMP will be set to 1. If OpenMP +# is found but the user asked for it not to be used, HAS_OPENMP will be set to +# 0. +# This way we can skip calls to functions defined in omp.h with code like: +# if(HAS_OPENMP) {omp related stuff} if (HAS_OPENMP) add_definitions(-DHAS_OPENMP) find_package(OpenMP) if (OPENMP_FOUND) - add_definitions(-DOPENMP_FOUND) set(HAS_OPENMP "1") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 0e8b11fbcbe..2a769046023 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -12,13 +12,18 @@ namespace mlpack { namespace neighbor { +// If OpenMP was not found by the compiler and it was used in compiling mlpack, +// then we can get more than one thread. inline size_t CalculateMaxThreads() { - #ifdef OPENMP_FOUND - if (HAS_OPENMP) + // User asked for OpenMP to be used, check if we could find it. + #ifdef HAS_OPENMP + if (HAS_OPENMP) // We found it. Use all cores. return omp_get_max_threads(); - return 1; + return 1; // We didn't find it. Use 1 core. #endif + + // User asked for OpenMP to not be used. Use 1 core. return 1; } From c04b073de58fdb84aa437de2855f2b9ebf0e3032 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Mon, 27 Jun 2016 15:30:49 +0100 Subject: [PATCH 12/23] Adds -Wno-unknown-pragmas --- CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b0cec2ea0a..2a3b451ade6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,9 +234,8 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # Detect OpenMP support in a compiler. If the compiler supports OpenMP, and the # user has not specified that OpenMP should not be used, flags to compile with -# OpenMP are returned and added and the HAS_OPENMP will be set to 1. If OpenMP -# is found but the user asked for it not to be used, HAS_OPENMP will be set to -# 0. +# OpenMP are returned and added and the HAS_OPENMP is set to 1. If OpenMP is +# found but the user asked for it not to be used, HAS_OPENMP will be set to 0. # This way we can skip calls to functions defined in omp.h with code like: # if(HAS_OPENMP) {omp related stuff} if (HAS_OPENMP) @@ -248,7 +247,10 @@ if (HAS_OPENMP) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") else () set(HAS_OPENMP "0") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") endif () +else () + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") endif() # Create a 'distclean' target in case the user is using an in-source build for From 65983d1a3999c10935fa93ac2d2800b77aabbe5f Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Tue, 28 Jun 2016 10:09:06 +0100 Subject: [PATCH 13/23] Commit to switch branch --- src/mlpack/methods/lsh/lsh_search.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp index 799ee2125ae..2e994628da0 100644 --- a/src/mlpack/methods/lsh/lsh_search.hpp +++ b/src/mlpack/methods/lsh/lsh_search.hpp @@ -373,9 +373,6 @@ class LSHSearch //! The maximum number of threads allowed. size_t maxThreads; - //! The number of threads currently in use. - size_t numThreadsUsed; - }; // class LSHSearch } // namespace neighbor From b95a3ce6f77fffad549e5558d879c2148c28d765 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 1 Jul 2016 14:44:09 +0100 Subject: [PATCH 14/23] Changes BaseCase to include loop over candidate set. Changes loop variable to signed for OpenMP loops. --- src/mlpack/methods/lsh/lsh_search.hpp | 40 ++++- src/mlpack/methods/lsh/lsh_search_impl.hpp | 177 +++++++++++++-------- src/mlpack/tests/lsh_test.cpp | 51 +++++- 3 files changed, 195 insertions(+), 73 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp index 2e994628da0..6227ca4f60e 100644 --- a/src/mlpack/methods/lsh/lsh_search.hpp +++ b/src/mlpack/methods/lsh/lsh_search.hpp @@ -272,6 +272,40 @@ class LSHSearch arma::uvec& referenceIndices, size_t numTablesToSearch); +// /** +// * This is a helper function that computes the distance of the query to the +// * neighbor candidates and appropriately stores the best 'k' candidates. This +// * is specific to the monochromatic search case, where the query set is the +// * reference set. +// * +// * @param queryIndex The index of the query in question +// * @param referenceIndex The index of the neighbor candidate in question +// * @param neighbors Matrix holding output neighbors. +// * @param distances Matrix holding output distances. +// */ +// void BaseCase(const size_t queryIndex, +// const size_t referenceIndex, +// arma::Mat& neighbors, +// arma::mat& distances) const; +// +// /** +// * This is a helper function that computes the distance of the query to the +// * neighbor candidates and appropriately stores the best 'k' candidates. This +// * is specific to bichromatic search, where the query set is not the same as +// * the reference set. +// * +// * @param queryIndex The index of the query in question +// * @param referenceIndex The index of the neighbor candidate in question +// * @param querySet Set of query points. +// * @param neighbors Matrix holding output neighbors. +// * @param distances Matrix holding output distances. +// */ +// void BaseCase(const size_t queryIndex, +// const size_t referenceIndex, +// const arma::mat& querySet, +// arma::Mat& neighbors, +// arma::mat& distances) const; + /** * This is a helper function that computes the distance of the query to the * neighbor candidates and appropriately stores the best 'k' candidates. This @@ -283,8 +317,9 @@ class LSHSearch * @param neighbors Matrix holding output neighbors. * @param distances Matrix holding output distances. */ + // TODO: change documentation above void BaseCase(const size_t queryIndex, - const size_t referenceIndex, + const arma::uvec& referenceIndices, arma::Mat& neighbors, arma::mat& distances) const; @@ -300,8 +335,9 @@ class LSHSearch * @param neighbors Matrix holding output neighbors. * @param distances Matrix holding output distances. */ + //TODO: change documentation above. void BaseCase(const size_t queryIndex, - const size_t referenceIndex, + const arma::uvec& referenceIndices, const arma::mat& querySet, arma::Mat& neighbors, arma::mat& distances) const; diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 2a769046023..dae15a62657 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -293,6 +293,7 @@ void LSHSearch::InsertNeighbor(arma::mat& distances, neighbors(pos, queryIndex) = neighbor; } +/* // Base case where the query set is the reference set. (So, we can't return // ourselves as the nearest neighbor.) template @@ -319,8 +320,13 @@ void LSHSearch::BaseCase(const size_t queryIndex, // SortDistance() returns (size_t() - 1) if we shouldn't add it. if (insertPosition != (size_t() - 1)) - InsertNeighbor(distances, neighbors, queryIndex, insertPosition, - referenceIndex, distance); + { + #pragma omp critical + { + InsertNeighbor(distances, neighbors, queryIndex, insertPosition, + referenceIndex, distance); + } + } } // Base case for bichromatic search. @@ -345,10 +351,79 @@ void LSHSearch::BaseCase(const size_t queryIndex, // SortDistance() returns (size_t() - 1) if we shouldn't add it. if (insertPosition != (size_t() - 1)) - InsertNeighbor(distances, neighbors, queryIndex, insertPosition, - referenceIndex, distance); + { + #pragma omp critical + { + InsertNeighbor(distances, neighbors, queryIndex, insertPosition, + referenceIndex, distance); + } + } +} +*/ + +// Base case where the query set is the reference set. (So, we can't return +// ourselves as the nearest neighbor.) +template +inline force_inline +void LSHSearch::BaseCase(const size_t queryIndex, + const arma::uvec& referenceIndices, + arma::Mat& neighbors, + arma::mat& distances) const +{ + for (size_t j = 0; j < referenceIndices.n_elem; ++j) + { + const size_t referenceIndex = referenceIndices[j]; + // If the points are the same, skip this point. + if (queryIndex == referenceIndex) + continue; + + const double distance = metric::EuclideanDistance::Evaluate( + referenceSet->unsafe_col(queryIndex), + referenceSet->unsafe_col(referenceIndex)); + + // If this distance is better than any of the current candidates, the + // SortDistance() function will give us the position to insert it into. + arma::vec queryDist = distances.unsafe_col(queryIndex); + arma::Col queryIndices = neighbors.unsafe_col(queryIndex); + size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices, + distance); + + // SortDistance() returns (size_t() - 1) if we shouldn't add it. + if (insertPosition != (size_t() - 1)) + InsertNeighbor(distances, neighbors, queryIndex, insertPosition, + referenceIndex, distance); + } } +// Base case for bichromatic search. +template +inline force_inline +void LSHSearch::BaseCase(const size_t queryIndex, + const arma::uvec& referenceIndices, + const arma::mat& querySet, + arma::Mat& neighbors, + arma::mat& distances) const +{ + for (size_t j = 0; j < referenceIndices.n_elem; ++j) + { + const size_t referenceIndex = referenceIndices[j]; + const double distance = metric::EuclideanDistance::Evaluate( + querySet.unsafe_col(queryIndex), + referenceSet->unsafe_col(referenceIndex)); + + // If this distance is better than any of the current candidates, the + // SortDistance() function will give us the position to insert it into. + arma::vec queryDist = distances.unsafe_col(queryIndex); + arma::Col queryIndices = neighbors.unsafe_col(queryIndex); + size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices, + distance); + + // SortDistance() returns (size_t() - 1) if we shouldn't add it. + if (insertPosition != (size_t() - 1)) + InsertNeighbor(distances, neighbors, queryIndex, insertPosition, + referenceIndex, distance); + } +} template template void LSHSearch::ReturnIndicesFromTable( @@ -416,19 +491,7 @@ void LSHSearch::ReturnIndicesFromTable( arma::Col refPointsConsidered; refPointsConsidered.zeros(referenceSet->n_cols); - // Define the number of threads used to process this. - size_t numThreadsUsed = std::min(maxThreads, numTablesToSearch); - - // Parallelization: By default nested parallelism is off, so this won't be - // parallel. The user might turn nested parallelism on if (for example) they - // have a query-by-query processing scheme and so processing more than one - // query at the same time doesn't make sense for them. - - #pragma omp parallel for \ - num_threads (numThreadsUsed) \ - shared (hashVec, refPointsConsidered) \ - schedule(dynamic) - for (size_t i = 0; i < numTablesToSearch; ++i) + for (long long int i = 0; i < numTablesToSearch; ++i) { const size_t hashInd = (size_t) hashVec[i]; @@ -436,25 +499,13 @@ void LSHSearch::ReturnIndicesFromTable( // Pick the indices in the bucket corresponding to 'hashInd'. if (tableRow != secondHashSize) - { for (size_t j = 0; j < bucketContentSize[tableRow]; j++) - { - #pragma omp atomic refPointsConsidered[secondHashTable[tableRow](j)]++; - } - } } - // Only keep reference points found in at least one bucket. If OpenMP is - // found, do it in parallel - #ifdef OPENMP_FOUND - // TODO: change this to our own function? - referenceIndices = arma::find(refPointsConsidered > 0); - return; - #else - referenceIndices = arma::find(refPointsConsidered > 0); - return; - #endif + // Only keep reference points found in at least one bucket. + referenceIndices = arma::find(refPointsConsidered > 0); + return; } else { @@ -467,45 +518,20 @@ void LSHSearch::ReturnIndicesFromTable( // Retrieve candidates. size_t start = 0; - // Define the number of threads used to process this. - size_t numThreadsUsed = std::min(maxThreads, numTablesToSearch); - - // Parallelization: By default nested parallelism is off, so this won't be - // parallel. The user might turn nested parallelism on if (for example) they - // have a query-by-query processing scheme and so processing more than one - // query at the same time doesn't make sense for them. - - #pragma omp parallel for \ - num_threads (numThreadsUsed) \ - shared (hashVec, refPointsConsideredSmall, start) \ - schedule(dynamic) - for (size_t i = 0; i < numTablesToSearch; ++i) // For all tables + for (long long int i = 0; i < numTablesToSearch; ++i) // For all tables { const size_t hashInd = (size_t) hashVec[i]; // Find the query's bucket. const size_t tableRow = bucketRowInHashTable[hashInd]; // Store all secondHashTable points in the candidates set. if (tableRow != secondHashSize) - { for (size_t j = 0; j < bucketContentSize[tableRow]; ++j) - { - #pragma omp critical - { - refPointsConsideredSmall(start++) = secondHashTable[tableRow][j]; - } - } - } + refPointsConsideredSmall(start++) = secondHashTable[tableRow][j]; } - // Only keep unique candidates. If OpenMP is found, do it in parallel. - #ifdef OPENMP_FOUND - // TODO: change this to our own function? - referenceIndices = arma::unique(refPointsConsideredSmall); - return; - #else - referenceIndices = arma::unique(refPointsConsideredSmall); - return; - #endif + // Keep only one copy of each candidate. + referenceIndices = arma::unique(refPointsConsideredSmall); + return; } } @@ -557,8 +583,9 @@ void LSHSearch::Search(const arma::mat& querySet, num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) - // Go through every query point. - for (size_t i = 0; i < querySet.n_cols; i++) + // Go through every query point. Use long int because some compilers complain + // for openMP unsigned index variables. + for (long long int i = 0; i < querySet.n_cols; i++) { // Hash every query into every hash table and eventually into the @@ -574,9 +601,17 @@ void LSHSearch::Search(const arma::mat& querySet, // Sequentially go through all the candidates and save the best 'k' // candidates. + /* + numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem); + #pragma omp parallel for\ + num_threads( numThreadsUsed )\ + shared(refIndices, resultingNeighbors, distances, querySet)\ + schedule(dynamic) for (size_t j = 0; j < refIndices.n_elem; j++) BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors, distances); + */ + BaseCase(i, refIndices, querySet, resultingNeighbors, distances); } Timer::Stop("computing_neighbors"); @@ -613,8 +648,9 @@ Search(const size_t k, num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) - // Go through every query point. - for (size_t i = 0; i < referenceSet->n_cols; i++) + // Go through every query point. Use long int because some compilers complain + // for openMP unsigned index variables. + for (long long int i = 0; i < referenceSet->n_cols; i++) { // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. @@ -629,8 +665,17 @@ Search(const size_t k, // Sequentially go through all the candidates and save the best 'k' // candidates. + + /* + numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem); + #pragma omp parallel for\ + num_threads( numThreadsUsed )\ + shared(refIndices, resultingNeighbors, distances)\ + schedule(dynamic) for (size_t j = 0; j < refIndices.n_elem; j++) BaseCase(i, (size_t) refIndices[j], resultingNeighbors, distances); + */ + BaseCase(i, refIndices, resultingNeighbors, distances); } diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp index ec2e394f5f4..c594991238f 100644 --- a/src/mlpack/tests/lsh_test.cpp +++ b/src/mlpack/tests/lsh_test.cpp @@ -509,7 +509,7 @@ BOOST_AUTO_TEST_CASE(ParallelBichromatic) */ BOOST_AUTO_TEST_CASE(ParallelMonochromatic) { - // kNN and LSH parameters (use LSH default parameters). + // kNN and LSH parameters. const int k = 4; const int numTables = 16; const int numProj = 3; @@ -524,15 +524,56 @@ BOOST_AUTO_TEST_CASE(ParallelMonochromatic) arma::Mat parallelNeighbors; arma::mat distances; - // Construct an LSH object. By default, it uses the maximum number of threads - LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters + // Construct an LSH object, using maximum number of available threads. + LSHSearch<> lshTest(rdata, numProj, numTables); lshTest.Search(k, parallelNeighbors, distances); - // Now perform same search but with 1 thread + // Now perform same search but with 1 thread. lshTest.MaxThreads(1); lshTest.Search(k, sequentialNeighbors, distances); - // Require both have same results + // Require both have same results. + double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); + BOOST_REQUIRE_EQUAL(recall, 1); +} + +/** + * Test: This test verifies that processing a query in parallel returns the same + * results with processing it sequentially. + * Requires OMP_NESTED environment variable to be set to TRUE. To set it, + * execute: + * ``` + * export OMP_NESTED=TRUE + * ``` + */ +BOOST_AUTO_TEST_CASE(ParallelSingleQuery) +{ + // kNN and LSH parameters. + const int k = 4; + const int numTables = 16; + const int numProj = 3; + + // Read iris training data as reference and query set. + const string trainSet = "iris_train.csv"; + arma::mat rdata; + data::Load(trainSet, rdata, true); + + arma::mat qdata = rdata.col(0); // Only 1 query. + + // Where to store neighbors and distances. + arma::Mat sequentialNeighbors; + arma::Mat parallelNeighbors; + arma::mat distances; + + // Construct an LSH object. By default, maximum number of threads are used. + LSHSearch<> lshTest(rdata, numProj, numTables); + lshTest.Search(qdata, k, parallelNeighbors, distances); + + // Now perform same search but with 1 thread. + lshTest.MaxThreads(1); + lshTest.Search(qdata, k, sequentialNeighbors, distances); + + // Require both have same results. double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); BOOST_REQUIRE_EQUAL(recall, 1); } From 0d382719a29754bfa6e2ddab976b5ae7386811c0 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 1 Jul 2016 14:51:21 +0100 Subject: [PATCH 15/23] Removes commented-out old BaseCase code --- src/mlpack/methods/lsh/lsh_search.hpp | 41 +++------------------------ 1 file changed, 4 insertions(+), 37 deletions(-) diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp index 6227ca4f60e..088239338d2 100644 --- a/src/mlpack/methods/lsh/lsh_search.hpp +++ b/src/mlpack/methods/lsh/lsh_search.hpp @@ -272,39 +272,6 @@ class LSHSearch arma::uvec& referenceIndices, size_t numTablesToSearch); -// /** -// * This is a helper function that computes the distance of the query to the -// * neighbor candidates and appropriately stores the best 'k' candidates. This -// * is specific to the monochromatic search case, where the query set is the -// * reference set. -// * -// * @param queryIndex The index of the query in question -// * @param referenceIndex The index of the neighbor candidate in question -// * @param neighbors Matrix holding output neighbors. -// * @param distances Matrix holding output distances. -// */ -// void BaseCase(const size_t queryIndex, -// const size_t referenceIndex, -// arma::Mat& neighbors, -// arma::mat& distances) const; -// -// /** -// * This is a helper function that computes the distance of the query to the -// * neighbor candidates and appropriately stores the best 'k' candidates. This -// * is specific to bichromatic search, where the query set is not the same as -// * the reference set. -// * -// * @param queryIndex The index of the query in question -// * @param referenceIndex The index of the neighbor candidate in question -// * @param querySet Set of query points. -// * @param neighbors Matrix holding output neighbors. -// * @param distances Matrix holding output distances. -// */ -// void BaseCase(const size_t queryIndex, -// const size_t referenceIndex, -// const arma::mat& querySet, -// arma::Mat& neighbors, -// arma::mat& distances) const; /** * This is a helper function that computes the distance of the query to the @@ -313,11 +280,11 @@ class LSHSearch * reference set. * * @param queryIndex The index of the query in question - * @param referenceIndex The index of the neighbor candidate in question + * @param referenceIndices The vector of indices of candidate neighbors for + * the query. * @param neighbors Matrix holding output neighbors. * @param distances Matrix holding output distances. */ - // TODO: change documentation above void BaseCase(const size_t queryIndex, const arma::uvec& referenceIndices, arma::Mat& neighbors, @@ -330,12 +297,12 @@ class LSHSearch * the reference set. * * @param queryIndex The index of the query in question - * @param referenceIndex The index of the neighbor candidate in question + * @param referenceIndices The vector of indices of candidate neighbors for + * the query. * @param querySet Set of query points. * @param neighbors Matrix holding output neighbors. * @param distances Matrix holding output distances. */ - //TODO: change documentation above. void BaseCase(const size_t queryIndex, const arma::uvec& referenceIndices, const arma::mat& querySet, From b02e2f341aa800b3397bf5651f99af9b88645d88 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Wed, 6 Jul 2016 16:56:44 +0100 Subject: [PATCH 16/23] Changes BaseCase to include the loop over candidate set. Places parallelization tests into #ifdef block --- CMakeLists.txt | 4 +- src/mlpack/methods/lsh/lsh_search_impl.hpp | 104 ++---------- src/mlpack/tests/lsh_test.cpp | 189 ++++++++------------- 3 files changed, 87 insertions(+), 210 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a3b451ade6..df949eaae6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,10 +24,10 @@ option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF) option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF) option(TEST_VERBOSE "Run test cases with verbose output." OFF) option(BUILD_TESTS "Build tests." ON) -option(BUILD_CLI_EXECUTABLES "Build command-line executables" ON) +option(BUILD_CLI_EXECUTABLES "Build command-line executables." ON) option(BUILD_SHARED_LIBS "Compile shared libraries (if OFF, static libraries are compiled)." ON) -option(HAS_OPENMP "Use OpenMP for parallel execution, if available" ON) +option(HAS_OPENMP "Use OpenMP for parallel execution, if available." ON) enable_testing() diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 8d3cdb1f31f..75a12015498 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -217,13 +217,14 @@ void LSHSearch::Train(const arma::mat& referenceSet, double shs = (double) secondHashSize; // Convenience cast. if (unmodVector[j] >= 0.0) { - secondHashVectors(i, j) = size_t(fmod(unmodVector[j], shs)); + const size_t key = size_t(fmod(unmodVector[j], shs)); + secondHashVectors(i, j) = key; } else { const double mod = fmod(-unmodVector[j], shs); - secondHashVectors(i, j) = (mod < 1.0) ? 0 : secondHashSize - - size_t(mod); + const size_t key = (mod < 1.0) ? 0 : secondHashSize - size_t(mod); + secondHashVectors(i, j) = key; } } } @@ -305,74 +306,6 @@ void LSHSearch::InsertNeighbor(arma::mat& distances, neighbors(pos, queryIndex) = neighbor; } -/* -// Base case where the query set is the reference set. (So, we can't return -// ourselves as the nearest neighbor.) -template -inline force_inline -void LSHSearch::BaseCase(const size_t queryIndex, - const size_t referenceIndex, - arma::Mat& neighbors, - arma::mat& distances) const -{ - // If the points are the same, we can't continue. - if (queryIndex == referenceIndex) - return; - - const double distance = metric::EuclideanDistance::Evaluate( - referenceSet->unsafe_col(queryIndex), - referenceSet->unsafe_col(referenceIndex)); - - // If this distance is better than any of the current candidates, the - // SortDistance() function will give us the position to insert it into. - arma::vec queryDist = distances.unsafe_col(queryIndex); - arma::Col queryIndices = neighbors.unsafe_col(queryIndex); - size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices, - distance); - - // SortDistance() returns (size_t() - 1) if we shouldn't add it. - if (insertPosition != (size_t() - 1)) - { - #pragma omp critical - { - InsertNeighbor(distances, neighbors, queryIndex, insertPosition, - referenceIndex, distance); - } - } -} - -// Base case for bichromatic search. -template -inline force_inline -void LSHSearch::BaseCase(const size_t queryIndex, - const size_t referenceIndex, - const arma::mat& querySet, - arma::Mat& neighbors, - arma::mat& distances) const -{ - const double distance = metric::EuclideanDistance::Evaluate( - querySet.unsafe_col(queryIndex), - referenceSet->unsafe_col(referenceIndex)); - - // If this distance is better than any of the current candidates, the - // SortDistance() function will give us the position to insert it into. - arma::vec queryDist = distances.unsafe_col(queryIndex); - arma::Col queryIndices = neighbors.unsafe_col(queryIndex); - size_t insertPosition = SortPolicy::SortDistance(queryDist, queryIndices, - distance); - - // SortDistance() returns (size_t() - 1) if we shouldn't add it. - if (insertPosition != (size_t() - 1)) - { - #pragma omp critical - { - InsertNeighbor(distances, neighbors, queryIndex, insertPosition, - referenceIndex, distance); - } - } -} -*/ - // Base case where the query set is the reference set. (So, we can't return // ourselves as the nearest neighbor.) template @@ -382,6 +315,7 @@ void LSHSearch::BaseCase(const size_t queryIndex, arma::Mat& neighbors, arma::mat& distances) const { + for (size_t j = 0; j < referenceIndices.n_elem; ++j) { const size_t referenceIndex = referenceIndices[j]; @@ -432,8 +366,11 @@ void LSHSearch::BaseCase(const size_t queryIndex, // SortDistance() returns (size_t() - 1) if we shouldn't add it. if (insertPosition != (size_t() - 1)) + #pragma omp critical InsertNeighbor(distances, neighbors, queryIndex, insertPosition, referenceIndex, distance); + + #pragma omp flush(distances, neighbors) } } template @@ -907,7 +844,7 @@ void LSHSearch::Search(const arma::mat& querySet, // Parallelization to process more than one query at a time. // use as many threads possible but not more than allowed number - size_t numThreadsUsed = std::min( (arma::uword) maxThreads, querySet.n_cols ); + size_t numThreadsUsed = maxThreads; #pragma omp parallel for \ num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ @@ -931,16 +868,6 @@ void LSHSearch::Search(const arma::mat& querySet, // Sequentially go through all the candidates and save the best 'k' // candidates. - /* - numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem); - #pragma omp parallel for\ - num_threads( numThreadsUsed )\ - shared(refIndices, resultingNeighbors, distances, querySet)\ - schedule(dynamic) - for (size_t j = 0; j < refIndices.n_elem; j++) - BaseCase(i, (size_t) refIndices[j], querySet, resultingNeighbors, - distances); - */ BaseCase(i, refIndices, querySet, resultingNeighbors, distances); } @@ -989,7 +916,7 @@ Search(const size_t k, // Parallelization to process more than one query at a time. // use as many threads possible but not more than allowed number - size_t numThreadsUsed = std::min( (arma::uword) maxThreads, referenceSet->n_cols ); + size_t numThreadsUsed = maxThreads; #pragma omp parallel for \ num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ @@ -1012,18 +939,7 @@ Search(const size_t k, // Sequentially go through all the candidates and save the best 'k' // candidates. - - /* - numTheadsUsed = std::min( (arma::uword) maxThreads, refIndices.n_elem); - #pragma omp parallel for\ - num_threads( numThreadsUsed )\ - shared(refIndices, resultingNeighbors, distances)\ - schedule(dynamic) - for (size_t j = 0; j < refIndices.n_elem; j++) - BaseCase(i, (size_t) refIndices[j], resultingNeighbors, distances); - */ BaseCase(i, refIndices, resultingNeighbors, distances); - } Timer::Stop("computing_neighbors"); diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp index 8aac2381507..179b3b1d7e5 100644 --- a/src/mlpack/tests/lsh_test.cpp +++ b/src/mlpack/tests/lsh_test.cpp @@ -471,7 +471,6 @@ BOOST_AUTO_TEST_CASE(DeterministicNoMerge) } } - /** * Test: Create an LSHSearch object and use an increasing number of probes to * search for points. Require that recall for the same object doesn't decrease @@ -612,119 +611,6 @@ BOOST_AUTO_TEST_CASE(MultiprobeDeterministicTest) neighbors.col(0) >= N / 4 && neighbors.col(0) < N / 2)); } - -/** - * Test: This test verifies that parallel query processing returns correct - * results for the bichromatic search. - */ -BOOST_AUTO_TEST_CASE(ParallelBichromatic) -{ - // kNN and LSH parameters (use LSH default parameters). - const int k = 4; - const int numTables = 16; - const int numProj = 3; - - // Read iris training and testing data as reference and query sets. - const string trainSet = "iris_train.csv"; - const string testSet = "iris_test.csv"; - arma::mat rdata; - arma::mat qdata; - data::Load(trainSet, rdata, true); - data::Load(testSet, qdata, true); - - // Where to store neighbors and distances - arma::Mat sequentialNeighbors; - arma::Mat parallelNeighbors; - arma::mat distances; - - // Construct an LSH object. By default, it uses the maximum number of threads - LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters - lshTest.Search(qdata, k, parallelNeighbors, distances); - - // Now perform same search but with 1 thread - lshTest.MaxThreads(1); - lshTest.Search(qdata, k, sequentialNeighbors, distances); - - // Require both have same results - double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); - BOOST_REQUIRE_EQUAL(recall, 1); -} - -/** - * Test: This test verifies that parallel query processing returns correct - * results for the monochromatic search. - */ -BOOST_AUTO_TEST_CASE(ParallelMonochromatic) -{ - // kNN and LSH parameters. - const int k = 4; - const int numTables = 16; - const int numProj = 3; - - // Read iris training data as reference and query set. - const string trainSet = "iris_train.csv"; - arma::mat rdata; - data::Load(trainSet, rdata, true); - - // Where to store neighbors and distances - arma::Mat sequentialNeighbors; - arma::Mat parallelNeighbors; - arma::mat distances; - - // Construct an LSH object, using maximum number of available threads. - LSHSearch<> lshTest(rdata, numProj, numTables); - lshTest.Search(k, parallelNeighbors, distances); - - // Now perform same search but with 1 thread. - lshTest.MaxThreads(1); - lshTest.Search(k, sequentialNeighbors, distances); - - // Require both have same results. - double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); - BOOST_REQUIRE_EQUAL(recall, 1); -} - -/** - * Test: This test verifies that processing a query in parallel returns the same - * results with processing it sequentially. - * Requires OMP_NESTED environment variable to be set to TRUE. To set it, - * execute: - * ``` - * export OMP_NESTED=TRUE - * ``` - */ -BOOST_AUTO_TEST_CASE(ParallelSingleQuery) -{ - // kNN and LSH parameters. - const int k = 4; - const int numTables = 16; - const int numProj = 3; - - // Read iris training data as reference and query set. - const string trainSet = "iris_train.csv"; - arma::mat rdata; - data::Load(trainSet, rdata, true); - - arma::mat qdata = rdata.col(0); // Only 1 query. - - // Where to store neighbors and distances. - arma::Mat sequentialNeighbors; - arma::Mat parallelNeighbors; - arma::mat distances; - - // Construct an LSH object. By default, maximum number of threads are used. - LSHSearch<> lshTest(rdata, numProj, numTables); - lshTest.Search(qdata, k, parallelNeighbors, distances); - - // Now perform same search but with 1 thread. - lshTest.MaxThreads(1); - lshTest.Search(qdata, k, sequentialNeighbors, distances); - - // Require both have same results. - double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); - BOOST_REQUIRE_EQUAL(recall, 1); -} - BOOST_AUTO_TEST_CASE(LSHTrainTest) { // This is a not very good test that simply checks that the re-trained LSH @@ -866,4 +752,79 @@ BOOST_AUTO_TEST_CASE(EmptyConstructorTest) BOOST_REQUIRE_EQUAL(distances.n_rows, 3); } +// These two tests are only compiled if the user has specified OpenMP to be +// used. +#ifdef HAS_OPENMP +/** + * Test: This test verifies that parallel query processing returns correct + * results for the bichromatic search. + */ +BOOST_AUTO_TEST_CASE(ParallelBichromatic) +{ + // kNN and LSH parameters (use LSH default parameters). + const int k = 4; + const int numTables = 16; + const int numProj = 3; + + // Read iris training and testing data as reference and query sets. + const string trainSet = "iris_train.csv"; + const string testSet = "iris_test.csv"; + arma::mat rdata; + arma::mat qdata; + data::Load(trainSet, rdata, true); + data::Load(testSet, qdata, true); + + // Where to store neighbors and distances + arma::Mat sequentialNeighbors; + arma::Mat parallelNeighbors; + arma::mat distances; + + // Construct an LSH object. By default, it uses the maximum number of threads + LSHSearch<> lshTest(rdata, numProj, numTables); //default parameters + lshTest.Search(qdata, k, parallelNeighbors, distances); + + // Now perform same search but with 1 thread + lshTest.MaxThreads(1); + lshTest.Search(qdata, k, sequentialNeighbors, distances); + + // Require both have same results + double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); + BOOST_REQUIRE_EQUAL(recall, 1); +} + +/** + * Test: This test verifies that parallel query processing returns correct + * results for the monochromatic search. + */ +BOOST_AUTO_TEST_CASE(ParallelMonochromatic) +{ + // kNN and LSH parameters. + const int k = 4; + const int numTables = 16; + const int numProj = 3; + + // Read iris training data as reference and query set. + const string trainSet = "iris_train.csv"; + arma::mat rdata; + data::Load(trainSet, rdata, true); + + // Where to store neighbors and distances + arma::Mat sequentialNeighbors; + arma::Mat parallelNeighbors; + arma::mat distances; + + // Construct an LSH object, using maximum number of available threads. + LSHSearch<> lshTest(rdata, numProj, numTables); + lshTest.Search(k, parallelNeighbors, distances); + + // Now perform same search but with 1 thread. + lshTest.MaxThreads(1); + lshTest.Search(k, sequentialNeighbors, distances); + + // Require both have same results. + double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); + BOOST_REQUIRE_EQUAL(recall, 1); +} +#endif + BOOST_AUTO_TEST_SUITE_END(); From ad8e6d3cfd9094e7c7200b5ccd50845e7c146801 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Thu, 7 Jul 2016 10:50:20 +0100 Subject: [PATCH 17/23] Modifies CMakeLists to remove -DHAS_OPENMP --- CMakeLists.txt | 40 +++++++++++++++------- src/mlpack/methods/lsh/lsh_search_impl.hpp | 13 ++++--- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index df949eaae6b..1a41553a372 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ option(BUILD_TESTS "Build tests." ON) option(BUILD_CLI_EXECUTABLES "Build command-line executables." ON) option(BUILD_SHARED_LIBS "Compile shared libraries (if OFF, static libraries are compiled)." ON) -option(HAS_OPENMP "Use OpenMP for parallel execution, if available." ON) +#option(HAS_OPENMP "Use OpenMP for parallel execution, if available." ON) enable_testing() @@ -232,26 +232,40 @@ endif () # library. add_definitions(-DBOOST_TEST_DYN_LINK) +# TODO: Remove these lines. # Detect OpenMP support in a compiler. If the compiler supports OpenMP, and the # user has not specified that OpenMP should not be used, flags to compile with # OpenMP are returned and added and the HAS_OPENMP is set to 1. If OpenMP is # found but the user asked for it not to be used, HAS_OPENMP will be set to 0. # This way we can skip calls to functions defined in omp.h with code like: # if(HAS_OPENMP) {omp related stuff} -if (HAS_OPENMP) +#if (HAS_OPENMP) +# add_definitions(-DHAS_OPENMP) +# find_package(OpenMP) +# if (OPENMP_FOUND) +# set(HAS_OPENMP "1") +# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +# else () +# set(HAS_OPENMP "0") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") +# endif () +#else () +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") +#endif() +# Simpler OpenMP activation: If OpenMP is found, define HAS_OPENMP to be 1. +# Otherwise define it to be 0. +find_package(OpenMP) +if (OPENMP_FOUND) add_definitions(-DHAS_OPENMP) - find_package(OpenMP) - if (OPENMP_FOUND) - set(HAS_OPENMP "1") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - else () - set(HAS_OPENMP "0") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") - endif () + set(HAS_OPENMP "1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") else () - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") -endif() + add_definitions(-DHAS_OPENMP) + set(HAS_OPENMP "0") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") +endif () # Create a 'distclean' target in case the user is using an in-source build for # some reason. diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 75a12015498..6e125a7bc76 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -12,18 +12,19 @@ namespace mlpack { namespace neighbor { -// If OpenMP was not found by the compiler and it was used in compiling mlpack, +// If OpenMP was found by the compiler and was used in compiling mlpack, // then we can get more than one thread. inline size_t CalculateMaxThreads() { - // User asked for OpenMP to be used, check if we could find it. + // HAS_OPENMP should be defined by CMakeLists after the check for OpenMP has + // been performed. #ifdef HAS_OPENMP - if (HAS_OPENMP) // We found it. Use all cores. + if (HAS_OPENMP) // If compiler has OpenMP support, use all available threads. return omp_get_max_threads(); - return 1; // We didn't find it. Use 1 core. + return 1; // Compiler doesn't support OpenMP. Hard-wire maxThreads to 1. #endif - // User asked for OpenMP to not be used. Use 1 core. + // In case HAS_OPENMP wasn't properly defined by CMakeLists, use 1 thread. return 1; } @@ -366,11 +367,9 @@ void LSHSearch::BaseCase(const size_t queryIndex, // SortDistance() returns (size_t() - 1) if we shouldn't add it. if (insertPosition != (size_t() - 1)) - #pragma omp critical InsertNeighbor(distances, neighbors, queryIndex, insertPosition, referenceIndex, distance); - #pragma omp flush(distances, neighbors) } } template From c4c8ff950be8a06e06084764f188095c650b7a60 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Thu, 7 Jul 2016 11:45:17 +0100 Subject: [PATCH 18/23] Removes old code and comments from CMakeLists.txt --- CMakeLists.txt | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a41553a372..63407e6b1ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -232,29 +232,11 @@ endif () # library. add_definitions(-DBOOST_TEST_DYN_LINK) -# TODO: Remove these lines. -# Detect OpenMP support in a compiler. If the compiler supports OpenMP, and the -# user has not specified that OpenMP should not be used, flags to compile with -# OpenMP are returned and added and the HAS_OPENMP is set to 1. If OpenMP is -# found but the user asked for it not to be used, HAS_OPENMP will be set to 0. +# Detect OpenMP support in a compiler. If the compiler supports OpenMP, flags +# to compile with OpenMP are returned and added and the HAS_OPENMP is set to 1. # This way we can skip calls to functions defined in omp.h with code like: -# if(HAS_OPENMP) {omp related stuff} -#if (HAS_OPENMP) -# add_definitions(-DHAS_OPENMP) -# find_package(OpenMP) -# if (OPENMP_FOUND) -# set(HAS_OPENMP "1") -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -# else () -# set(HAS_OPENMP "0") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") -# endif () -#else () -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") -#endif() -# Simpler OpenMP activation: If OpenMP is found, define HAS_OPENMP to be 1. -# Otherwise define it to be 0. +# if (HAS_OPENMP == 1) { openMP code here } +# If OpenMP is found, define HAS_OPENMP to be 1. Otherwise define it to be 0. find_package(OpenMP) if (OPENMP_FOUND) add_definitions(-DHAS_OPENMP) From 074d7267a1926c5af9732dc47142af9a5a179f48 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 8 Jul 2016 10:35:59 +0100 Subject: [PATCH 19/23] Restores size_t for openMP loop counters, changes CmakeLists to require OpenMP version 3 --- CMakeLists.txt | 10 +++++----- src/mlpack/methods/lsh/lsh_search_impl.hpp | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 63407e6b1ec..84b3245f9b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ endif() # First, define all the compilation options. # We default to debugging mode for developers. -option(DEBUG "Compile with debugging information." ON) -option(PROFILE "Compile with profiling information." ON) +option(DEBUG "Compile with debugging information." OFF) +option(PROFILE "Compile with profiling information." OFF) option(ARMA_EXTRA_DEBUG "Compile with extra Armadillo debugging symbols." OFF) option(MATLAB_BINDINGS "Compile MATLAB bindings if MATLAB is found." OFF) option(TEST_VERBOSE "Run test cases with verbose output." OFF) @@ -232,12 +232,12 @@ endif () # library. add_definitions(-DBOOST_TEST_DYN_LINK) -# Detect OpenMP support in a compiler. If the compiler supports OpenMP, flags -# to compile with OpenMP are returned and added and the HAS_OPENMP is set to 1. +# Detect OpenMP support in a compiler. If the compiler supports OpenMP, flags +# to compile with OpenMP are returned and added and the HAS_OPENMP is set to 1. # This way we can skip calls to functions defined in omp.h with code like: # if (HAS_OPENMP == 1) { openMP code here } # If OpenMP is found, define HAS_OPENMP to be 1. Otherwise define it to be 0. -find_package(OpenMP) +find_package(OpenMP 3) if (OPENMP_FOUND) add_definitions(-DHAS_OPENMP) set(HAS_OPENMP "1") diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 6e125a7bc76..cbbcb816830 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -850,7 +850,7 @@ void LSHSearch::Search(const arma::mat& querySet, schedule(dynamic) // Go through every query point. Use long int because some compilers complain // for openMP unsigned index variables. - for (long long int i = 0; i < querySet.n_cols; i++) + for (size_t i = 0; i < querySet.n_cols; i++) { // Hash every query into every hash table and eventually into the @@ -922,7 +922,7 @@ Search(const size_t k, schedule(dynamic) // Go through every query point. Use long int because some compilers complain // for openMP unsigned index variables. - for (long long int i = 0; i < referenceSet->n_cols; i++) + for (size_t i = 0; i < referenceSet->n_cols; i++) { // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. From f982ca50f0a9d3b464852af9e9d3055be4a7c46f Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 8 Jul 2016 11:43:25 +0100 Subject: [PATCH 20/23] Removes maxThreads functionality from LSHSearch class --- CMakeLists.txt | 3 +-- src/mlpack/methods/lsh/lsh_search.hpp | 9 ------- src/mlpack/methods/lsh/lsh_search_impl.hpp | 31 ++-------------------- src/mlpack/tests/lsh_test.cpp | 8 ++++-- 4 files changed, 9 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 84b3245f9b6..e75af7200fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,6 @@ option(BUILD_TESTS "Build tests." ON) option(BUILD_CLI_EXECUTABLES "Build command-line executables." ON) option(BUILD_SHARED_LIBS "Compile shared libraries (if OFF, static libraries are compiled)." ON) -#option(HAS_OPENMP "Use OpenMP for parallel execution, if available." ON) enable_testing() @@ -237,7 +236,7 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # This way we can skip calls to functions defined in omp.h with code like: # if (HAS_OPENMP == 1) { openMP code here } # If OpenMP is found, define HAS_OPENMP to be 1. Otherwise define it to be 0. -find_package(OpenMP 3) +find_package(OpenMP 3.0.0 ) if (OPENMP_FOUND) add_definitions(-DHAS_OPENMP) set(HAS_OPENMP "1") diff --git a/src/mlpack/methods/lsh/lsh_search.hpp b/src/mlpack/methods/lsh/lsh_search.hpp index 811a73d7917..f3fff9ab431 100644 --- a/src/mlpack/methods/lsh/lsh_search.hpp +++ b/src/mlpack/methods/lsh/lsh_search.hpp @@ -268,12 +268,6 @@ class LSHSearch //! removed in mlpack 2.1.0! const arma::mat& Projection(size_t i) { return projections.slice(i); } - //! Set the maximum number of threads the object is allowed to use - void MaxThreads(size_t numThreads) { maxThreads = numThreads;} - - //! Return the current maxumum threads the object is allowed to use - size_t MaxThreads(void) const { return maxThreads; } - private: /** * This function takes a query and hashes it into each of the hash tables to @@ -451,9 +445,6 @@ class LSHSearch //! The number of distance evaluations. size_t distanceEvaluations; - //! The maximum number of threads allowed. - size_t maxThreads; - }; // class LSHSearch } // namespace neighbor diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index cbbcb816830..c37aed9586a 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -12,22 +12,6 @@ namespace mlpack { namespace neighbor { -// If OpenMP was found by the compiler and was used in compiling mlpack, -// then we can get more than one thread. -inline size_t CalculateMaxThreads() -{ - // HAS_OPENMP should be defined by CMakeLists after the check for OpenMP has - // been performed. - #ifdef HAS_OPENMP - if (HAS_OPENMP) // If compiler has OpenMP support, use all available threads. - return omp_get_max_threads(); - return 1; // Compiler doesn't support OpenMP. Hard-wire maxThreads to 1. - #endif - - // In case HAS_OPENMP wasn't properly defined by CMakeLists, use 1 thread. - return 1; -} - // Construct the object with random tables template LSHSearch:: @@ -46,7 +30,6 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0) { - maxThreads = CalculateMaxThreads(); // Pass work to training function. Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, bucketSize); @@ -69,7 +52,6 @@ LSHSearch(const arma::mat& referenceSet, bucketSize(bucketSize), distanceEvaluations(0) { - maxThreads = CalculateMaxThreads(); // Pass work to training function Train(referenceSet, numProj, numTables, hashWidthIn, secondHashSize, bucketSize, projections); @@ -87,8 +69,6 @@ LSHSearch::LSHSearch() : bucketSize(500), distanceEvaluations(0) { - // Only define maxThreads. Nothing else to do. - maxThreads = CalculateMaxThreads(); } // Destructor. @@ -763,7 +743,7 @@ void LSHSearch::ReturnIndicesFromTable( // Retrieve candidates. size_t start = 0; - for (long long int i = 0; i < numTablesToSearch; ++i) // For all tables + for (size_t i = 0; i < numTablesToSearch; ++i) // For all tables { for (size_t p = 0; p < T + 1; ++p) { @@ -842,14 +822,10 @@ void LSHSearch::Search(const arma::mat& querySet, Timer::Start("computing_neighbors"); // Parallelization to process more than one query at a time. - // use as many threads possible but not more than allowed number - size_t numThreadsUsed = maxThreads; #pragma omp parallel for \ - num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) - // Go through every query point. Use long int because some compilers complain - // for openMP unsigned index variables. + // Go through every query point. for (size_t i = 0; i < querySet.n_cols; i++) { @@ -914,10 +890,7 @@ Search(const size_t k, Timer::Start("computing_neighbors"); // Parallelization to process more than one query at a time. - // use as many threads possible but not more than allowed number - size_t numThreadsUsed = maxThreads; #pragma omp parallel for \ - num_threads ( numThreadsUsed )\ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) // Go through every query point. Use long int because some compilers complain diff --git a/src/mlpack/tests/lsh_test.cpp b/src/mlpack/tests/lsh_test.cpp index 179b3b1d7e5..33485fce76d 100644 --- a/src/mlpack/tests/lsh_test.cpp +++ b/src/mlpack/tests/lsh_test.cpp @@ -784,8 +784,10 @@ BOOST_AUTO_TEST_CASE(ParallelBichromatic) lshTest.Search(qdata, k, parallelNeighbors, distances); // Now perform same search but with 1 thread - lshTest.MaxThreads(1); + size_t prevNumThreads = omp_get_max_threads(); // Store number of threads used. + omp_set_num_threads(1); lshTest.Search(qdata, k, sequentialNeighbors, distances); + omp_set_num_threads(prevNumThreads); // Require both have same results double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); @@ -818,8 +820,10 @@ BOOST_AUTO_TEST_CASE(ParallelMonochromatic) lshTest.Search(k, parallelNeighbors, distances); // Now perform same search but with 1 thread. - lshTest.MaxThreads(1); + size_t prevNumThreads = omp_get_max_threads(); // Store number of threads used. + omp_set_num_threads(1); lshTest.Search(k, sequentialNeighbors, distances); + omp_set_num_threads(prevNumThreads); // Require both have same results. double recall = LSHSearch<>::ComputeRecall(sequentialNeighbors, parallelNeighbors); From 1fb998fbd1eace400122fcebdbdaac63801c5185 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 8 Jul 2016 12:38:09 +0100 Subject: [PATCH 21/23] Removes HAS_OPENMP definition in CMakeFiles --- CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e75af7200fc..2f91fb64c3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,15 +236,12 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # This way we can skip calls to functions defined in omp.h with code like: # if (HAS_OPENMP == 1) { openMP code here } # If OpenMP is found, define HAS_OPENMP to be 1. Otherwise define it to be 0. -find_package(OpenMP 3.0.0 ) +find_package(OpenMP 3.0.0) if (OPENMP_FOUND) - add_definitions(-DHAS_OPENMP) - set(HAS_OPENMP "1") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") else () add_definitions(-DHAS_OPENMP) - set(HAS_OPENMP "0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") endif () From b92d46527c5d8b1487e4aa4810359292292107f1 Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 8 Jul 2016 13:14:54 +0100 Subject: [PATCH 22/23] Workaround for OpenMP 2.0 (based on dt_utils.cpp) --- CMakeLists.txt | 2 +- src/mlpack/methods/lsh/lsh_search_impl.hpp | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f91fb64c3e..02e98030af4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,7 +236,7 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # This way we can skip calls to functions defined in omp.h with code like: # if (HAS_OPENMP == 1) { openMP code here } # If OpenMP is found, define HAS_OPENMP to be 1. Otherwise define it to be 0. -find_package(OpenMP 3.0.0) +find_package(OpenMP) if (OPENMP_FOUND) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index c37aed9586a..066d5c01c57 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -825,10 +825,17 @@ void LSHSearch::Search(const arma::mat& querySet, #pragma omp parallel for \ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) - // Go through every query point. - for (size_t i = 0; i < querySet.n_cols; i++) +#ifdef _WIN32 + // Tiny workaround: Visual Studio only implements OpenMP 2.0, which doesn't + // support unsigned loop variables. If we're building for Visual Studio, use + // the intmax_t type instead. + intmax_t querySetSize = (intmax_t) querySet.n_cols; + for (intmax_t i = 0; i < querySetSize; ++i) +#else + for (size_t i = 0; i < querySet.n_cols; ++i) +#endif { - + // Go through every query point. // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; @@ -893,6 +900,15 @@ Search(const size_t k, #pragma omp parallel for \ shared(avgIndicesReturned, resultingNeighbors, distances) \ schedule(dynamic) +#ifdef _WIN32 + // Tiny workaround: Visual Studio only implements OpenMP 2.0, which doesn't + // support unsigned loop variables. If we're building for Visual Studio, use + // the intmax_t type instead. + intmax_t referenceSetSize = (intmax_t) referenceSet->n_cols; + for (intmax_t i = 0; i < referenceSetSize; ++i) +#else + for (size_t i = 0; i < referenceSet->n_cols; ++i) +#endif // Go through every query point. Use long int because some compilers complain // for openMP unsigned index variables. for (size_t i = 0; i < referenceSet->n_cols; i++) From 2fee61e28ceb1f1cd07396e6683158debe5c8bff Mon Sep 17 00:00:00 2001 From: Yannis Mentekidis Date: Fri, 8 Jul 2016 14:23:24 +0100 Subject: [PATCH 23/23] Transforms omp loop to reduction --- CMakeLists.txt | 3 + src/mlpack/methods/lsh/lsh_search_impl.hpp | 72 ++++++++++++---------- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 02e98030af4..2ca0ace8808 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -238,10 +238,13 @@ add_definitions(-DBOOST_TEST_DYN_LINK) # If OpenMP is found, define HAS_OPENMP to be 1. Otherwise define it to be 0. find_package(OpenMP) if (OPENMP_FOUND) + add_definitions(-DHAS_OPENMP) + set(HAS_OPENMP 1) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") else () add_definitions(-DHAS_OPENMP) + set(HAS_OPENMP 0) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") endif () diff --git a/src/mlpack/methods/lsh/lsh_search_impl.hpp b/src/mlpack/methods/lsh/lsh_search_impl.hpp index 066d5c01c57..77d4794123e 100644 --- a/src/mlpack/methods/lsh/lsh_search_impl.hpp +++ b/src/mlpack/methods/lsh/lsh_search_impl.hpp @@ -373,8 +373,8 @@ bool LSHSearch::PerturbationShift(std::vector& A) const for (size_t i = 0; i < A.size(); ++i) if (A[i] == 1) // marked true maxPos=i; - - if ( maxPos + 1 < A.size()) // otherwise, this is an invalid vector + + if ( maxPos + 1 < A.size()) // otherwise, this is an invalid vector { A[maxPos] = 0; A[maxPos + 1] = 1; @@ -445,8 +445,8 @@ void LSHSearch::GetAdditionalProbingBins( // Each column of additionalProbingBins is the code of a bin. additionalProbingBins.set_size(numProj, T); - - // Copy the query's code, then in the end we will add/subtract according + + // Copy the query's code, then in the end we will add/subtract according // to perturbations we calculated. for (size_t c = 0; c < T; ++c) additionalProbingBins.col(c) = queryCode; @@ -497,7 +497,7 @@ void LSHSearch::GetAdditionalProbingBins( minloc = s; } } - + // Add or subtract 1 to dimension corresponding to minimum score. additionalProbingBins(positions[minloc], 0) += actions[minloc]; if (T == 1) @@ -512,7 +512,7 @@ void LSHSearch::GetAdditionalProbingBins( // smallest and the second smallest, it's obvious that score(Ae) > // score(As). Therefore the second perturbation vector is ALWAYS the vector // containing only the second-lowest scoring perturbation. - + double minscore2 = scores[0]; size_t minloc2 = 0; for (size_t s = 0; s < (2 * numProj); ++s) // here we can't start from 1 @@ -590,13 +590,13 @@ void LSHSearch::GetAdditionalProbingBins( { perturbationSets.push_back(As); // add shifted set to sets minHeap.push( - std::make_pair(PerturbationScore(As, scores), + std::make_pair(PerturbationScore(As, scores), perturbationSets.size() - 1)); } // Expand operation on Ai (add max+1 to set). std::vector Ae = Ai; - if (PerturbationExpand(Ae) && PerturbationValid(Ae)) + if (PerturbationExpand(Ae) && PerturbationValid(Ae)) // Don't add invalid sets. { perturbationSets.push_back(Ae); // add expanded set to sets @@ -610,7 +610,7 @@ void LSHSearch::GetAdditionalProbingBins( // Found valid perturbation set Ai. Construct perturbation vector from set. for (size_t pos = 0; pos < Ai.size(); ++pos) // If Ai[pos] is marked, add action to probing vector. - additionalProbingBins(positions(pos), pvec) + additionalProbingBins(positions(pos), pvec) += Ai[pos] ? actions(pos) : 0; } } @@ -808,13 +808,13 @@ void LSHSearch::Search(const arma::mat& querySet, { Teffective = (1 << numProj) - 1; Log::Warn << "Requested " << T << " additional bins are more than " - << "theoretical maximum. Using " << Teffective << " instead." + << "theoretical maximum. Using " << Teffective << " instead." << std::endl; } // If the user set multiprobe, log it if (Teffective > 0) - Log::Info << "Running multiprobe LSH with " << Teffective + Log::Info << "Running multiprobe LSH with " << Teffective <<" additional probing bins per table per query." << std::endl; size_t avgIndicesReturned = 0; @@ -822,16 +822,20 @@ void LSHSearch::Search(const arma::mat& querySet, Timer::Start("computing_neighbors"); // Parallelization to process more than one query at a time. - #pragma omp parallel for \ - shared(avgIndicesReturned, resultingNeighbors, distances) \ - schedule(dynamic) #ifdef _WIN32 // Tiny workaround: Visual Studio only implements OpenMP 2.0, which doesn't // support unsigned loop variables. If we're building for Visual Studio, use // the intmax_t type instead. - intmax_t querySetSize = (intmax_t) querySet.n_cols; - for (intmax_t i = 0; i < querySetSize; ++i) + #pragma omp parallel for \ + shared(resultingNeighbors, distances) \ + schedule(dynamic)\ + reduction(+:avgIndicesReturned) + for (intmax_t i = 0; i < (intmax_t) querySet.n_cols; ++i) #else + #pragma omp parallel for \ + shared(resultingNeighbors, distances) \ + schedule(dynamic)\ + reduction(+:avgIndicesReturned) for (size_t i = 0; i < querySet.n_cols; ++i) #endif { @@ -839,14 +843,14 @@ void LSHSearch::Search(const arma::mat& querySet, // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; - ReturnIndicesFromTable(querySet.col(i), refIndices, numTablesToSearch, + ReturnIndicesFromTable(querySet.col(i), refIndices, numTablesToSearch, Teffective); // An informative book-keeping for the number of neighbor candidates // returned on average. // Make atomic to avoid race conditions when multiple threads are running - #pragma omp atomic - avgIndicesReturned += refIndices.n_elem; + // #pragma omp atomic + avgIndicesReturned = avgIndicesReturned + refIndices.n_elem; // Sequentially go through all the candidates and save the best 'k' // candidates. @@ -883,7 +887,7 @@ Search(const size_t k, { Teffective = (1 << numProj) - 1; Log::Warn << "Requested " << T << " additional bins are more than " - << "theoretical maximum. Using " << Teffective << " instead." + << "theoretical maximum. Using " << Teffective << " instead." << std::endl; } @@ -891,28 +895,30 @@ Search(const size_t k, if (T > 0) Log::Info << "Running multiprobe LSH with " << Teffective << " additional probing bins per table per query."<< std::endl; - + size_t avgIndicesReturned = 0; Timer::Start("computing_neighbors"); // Parallelization to process more than one query at a time. - #pragma omp parallel for \ - shared(avgIndicesReturned, resultingNeighbors, distances) \ - schedule(dynamic) #ifdef _WIN32 // Tiny workaround: Visual Studio only implements OpenMP 2.0, which doesn't // support unsigned loop variables. If we're building for Visual Studio, use // the intmax_t type instead. - intmax_t referenceSetSize = (intmax_t) referenceSet->n_cols; - for (intmax_t i = 0; i < referenceSetSize; ++i) + #pragma omp parallel for \ + shared(resultingNeighbors, distances) \ + schedule(dynamic)\ + reduction(+:avgIndicesReturned) + for (intmax_t i = 0; i < (intmax_t) referenceSet->n_cols; ++i) #else + #pragma omp parallel for \ + shared(resultingNeighbors, distances) \ + schedule(dynamic)\ + reduction(+:avgIndicesReturned) for (size_t i = 0; i < referenceSet->n_cols; ++i) #endif - // Go through every query point. Use long int because some compilers complain - // for openMP unsigned index variables. - for (size_t i = 0; i < referenceSet->n_cols; i++) { + // Go through every query point. // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; @@ -920,10 +926,10 @@ Search(const size_t k, Teffective); // An informative book-keeping for the number of neighbor candidates - // returned on average. Make atomic to avoid race conditions when multiple - // threads are running. - #pragma omp atomic - avgIndicesReturned += refIndices.n_elem; + // returned on average. + // Make atomic to avoid race conditions when multiple threads are running. + // #pragma omp atomic + avgIndicesReturned = avgIndicesReturned + refIndices.n_elem; // Sequentially go through all the candidates and save the best 'k' // candidates.