From fecccd38d96f24a3c774586a078b4e8e2749b8bd Mon Sep 17 00:00:00 2001 From: Janzen Brewer Date: Thu, 14 May 2015 07:53:25 -0700 Subject: [PATCH 1/3] Add OpenMP support to density estimation tree code There are two parts to the OpenMP support. First, cmake was instructed to compile with compiler-appropriate OpenMP flags. Second, three OpenMP pragmas were added to dt_utils.cpp to parallelize the cross-validation loop. No non-pragma code changes were necessary. --- CMakeLists.txt | 10 +++++----- src/mlpack/methods/det/dt_utils.cpp | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c47aa1467a..2949dc8e997 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -250,13 +250,13 @@ endif (MSVC) add_definitions(-DBOOST_TEST_DYN_LINK) # We require OpenMP now. -#find_package(OpenMP REQUIRED) -#if (OPENMP_FOUND) -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +find_package(OpenMP REQUIRED) +if (OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") # set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} # ${OpenMP_EXE_LINKER_FLAGS}") -#endif (OPENMP_FOUND) +endif (OPENMP_FOUND) # Create a 'distclean' target in case the user is using an in-source build for # some reason. diff --git a/src/mlpack/methods/det/dt_utils.cpp b/src/mlpack/methods/det/dt_utils.cpp index 273ed9828c0..e1aab34f9b9 100644 --- a/src/mlpack/methods/det/dt_utils.cpp +++ b/src/mlpack/methods/det/dt_utils.cpp @@ -177,6 +177,9 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, regularizationConstants.resize(prunedSequence.size(), 0); // Go through each fold. + #pragma omp parallel for default(none) \ + shared(testSize,cvData,prunedSequence,regularizationConstants,dataset) \ + private(alpha,oldAlpha) for (size_t fold = 0; fold < folds; fold++) { // Break up data into train and test sets. @@ -228,6 +231,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, } // Update the cv regularization constant. + #pragma omp atomic regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols; // Determine the new alpha value and prune accordingly. @@ -245,6 +249,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, } if (prunedSequence.size() > 2) + #pragma omp atomic regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal / (double) dataset.n_cols; From 340b34b091fc00976206a54d467536df27a88788 Mon Sep 17 00:00:00 2001 From: Janzen Brewer Date: Fri, 15 May 2015 07:48:54 -0700 Subject: [PATCH 2/3] Optimize small things in density estimation trees While doing other work, I noticed a few things that could use improvement: * An std::vector that is used as an array * Unnecessary dynamic memory allocation I fixed these things and measured a marginal speedup. --- src/mlpack/methods/det/dt_utils.cpp | 46 +++++++++++++---------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/src/mlpack/methods/det/dt_utils.cpp b/src/mlpack/methods/det/dt_utils.cpp index e1aab34f9b9..f46714d4d7a 100644 --- a/src/mlpack/methods/det/dt_utils.cpp +++ b/src/mlpack/methods/det/dt_utils.cpp @@ -104,7 +104,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, const std::string unprunedTreeOutput) { // Initialize the tree. - DTree* dtree = new DTree(dataset); + DTree dtree(dataset); // Prepare to grow the tree... arma::Col oldFromNew(dataset.n_cols); @@ -116,10 +116,10 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, // Growing the tree double oldAlpha = 0.0; - double alpha = dtree->Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize, + double alpha = dtree.Grow(newDataset, oldFromNew, useVolumeReg, maxLeafSize, minLeafSize); - Log::Info << dtree->SubtreeLeaves() << " leaf nodes in the tree using full " + Log::Info << dtree.SubtreeLeaves() << " leaf nodes in the tree using full " << "dataset; minimum alpha: " << alpha << "." << std::endl; // Compute densities for the training points in the full tree, if we were @@ -132,7 +132,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, for (size_t i = 0; i < dataset.n_cols; ++i) { arma::vec testPoint = dataset.unsafe_col(i); - outfile << dtree->ComputeValue(testPoint) << std::endl; + outfile << dtree.ComputeValue(testPoint) << std::endl; } } else @@ -146,40 +146,37 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, // Sequentially prune and save the alpha values and the values of c_t^2 * r_t. std::vector > prunedSequence; - while (dtree->SubtreeLeaves() > 1) + while (dtree.SubtreeLeaves() > 1) { std::pair treeSeq(oldAlpha, - dtree->SubtreeLeavesLogNegError()); + dtree.SubtreeLeavesLogNegError()); prunedSequence.push_back(treeSeq); oldAlpha = alpha; - alpha = dtree->PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg); + alpha = dtree.PruneAndUpdate(oldAlpha, dataset.n_cols, useVolumeReg); // Some sanity checks. Log::Assert((alpha < std::numeric_limits::max()) || - (dtree->SubtreeLeaves() == 1)); + (dtree.SubtreeLeaves() == 1)); Log::Assert(alpha > oldAlpha); - Log::Assert(dtree->SubtreeLeavesLogNegError() < treeSeq.second); + Log::Assert(dtree.SubtreeLeavesLogNegError() < treeSeq.second); } std::pair treeSeq(oldAlpha, - dtree->SubtreeLeavesLogNegError()); + dtree.SubtreeLeavesLogNegError()); prunedSequence.push_back(treeSeq); Log::Info << prunedSequence.size() << " trees in the sequence; maximum alpha:" << " " << oldAlpha << "." << std::endl; - delete dtree; - arma::mat cvData(dataset); size_t testSize = dataset.n_cols / folds; - std::vector regularizationConstants; - regularizationConstants.resize(prunedSequence.size(), 0); + double regularizationConstants[prunedSequence.size()] = {0}; + Timer::Start("cross_validation"); // Go through each fold. #pragma omp parallel for default(none) \ - shared(testSize,cvData,prunedSequence,regularizationConstants,dataset) \ - private(alpha,oldAlpha) + shared(testSize,cvData,prunedSequence,regularizationConstants,dataset) for (size_t fold = 0; fold < folds; fold++) { // Break up data into train and test sets. @@ -204,7 +201,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, } // Initialize the tree. - DTree* cvDTree = new DTree(train); + DTree cvDTree(train); // Getting ready to grow the tree... arma::Col cvOldFromNew(train.n_cols); @@ -212,8 +209,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, cvOldFromNew[i] = i; // Grow the tree. - oldAlpha = 0.0; - alpha = cvDTree->Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize, + cvDTree.Grow(train, cvOldFromNew, useVolumeReg, maxLeafSize, minLeafSize); // Sequentially prune with all the values of available alphas and adding @@ -227,7 +223,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, for (size_t j = 0; j < test.n_cols; j++) { arma::vec testPoint = test.unsafe_col(j); - cvVal += cvDTree->ComputeValue(testPoint); + cvVal += cvDTree.ComputeValue(testPoint); } // Update the cv regularization constant. @@ -235,9 +231,9 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols; // Determine the new alpha value and prune accordingly. - oldAlpha = 0.5 * (prunedSequence[i + 1].first + + double cvOldAlpha = 0.5 * (prunedSequence[i + 1].first + prunedSequence[i + 2].first); - alpha = cvDTree->PruneAndUpdate(oldAlpha, train.n_cols, useVolumeReg); + cvDTree.PruneAndUpdate(cvOldAlpha, train.n_cols, useVolumeReg); } // Compute test values for this state of the tree. @@ -245,17 +241,15 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, for (size_t i = 0; i < test.n_cols; ++i) { arma::vec testPoint = test.unsafe_col(i); - cvVal += cvDTree->ComputeValue(testPoint); + cvVal += cvDTree.ComputeValue(testPoint); } if (prunedSequence.size() > 2) #pragma omp atomic regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal / (double) dataset.n_cols; - - test.reset(); - delete cvDTree; } + Timer::Stop("cross_validation"); double optimalAlpha = -1.0; long double cvBestError = -std::numeric_limits::max(); From a63ddcdd77182244e19d9f11bcb0f6b43e403c32 Mon Sep 17 00:00:00 2001 From: Janzen Brewer Date: Fri, 15 May 2015 22:44:25 +0000 Subject: [PATCH 3/3] Replace unsupported array operation with arma::vec Dynamic length arrays are a g++ extension and their initialization is only supported from g++-4.9. --- src/mlpack/methods/det/dt_utils.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mlpack/methods/det/dt_utils.cpp b/src/mlpack/methods/det/dt_utils.cpp index f46714d4d7a..a976415d72d 100644 --- a/src/mlpack/methods/det/dt_utils.cpp +++ b/src/mlpack/methods/det/dt_utils.cpp @@ -171,7 +171,8 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, arma::mat cvData(dataset); size_t testSize = dataset.n_cols / folds; - double regularizationConstants[prunedSequence.size()] = {0}; + arma::vec regularizationConstants(prunedSequence.size()); + regularizationConstants.fill(0.0); Timer::Start("cross_validation"); // Go through each fold. @@ -215,6 +216,8 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, // Sequentially prune with all the values of available alphas and adding // values for test values. Don't enter this loop if there are less than two // trees in the pruned sequence. + arma::vec cvRegularizationConstants(prunedSequence.size()); + cvRegularizationConstants.fill(0.0); for (size_t i = 0; i < ((prunedSequence.size() < 2) ? 0 : prunedSequence.size() - 2); ++i) { @@ -227,8 +230,7 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, } // Update the cv regularization constant. - #pragma omp atomic - regularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols; + cvRegularizationConstants[i] += 2.0 * cvVal / (double) dataset.n_cols; // Determine the new alpha value and prune accordingly. double cvOldAlpha = 0.5 * (prunedSequence[i + 1].first + @@ -245,9 +247,11 @@ DTree* mlpack::det::Trainer(arma::mat& dataset, } if (prunedSequence.size() > 2) - #pragma omp atomic - regularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal / + cvRegularizationConstants[prunedSequence.size() - 2] += 2.0 * cvVal / (double) dataset.n_cols; + + #pragma omp critical + regularizationConstants += cvRegularizationConstants; } Timer::Stop("cross_validation");