diff --git a/dmlc-core b/dmlc-core index f35f14f30835..78b78be34ac2 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit f35f14f30835af238257b979cc1fac3e41ff3291 +Subproject commit 78b78be34ac27d30f2193f3d51848c62887669c4 diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f9dc84cc1fab..0f535b250319 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -282,11 +282,7 @@ XGB_DLL int XGDMatrixCreateFromCSCEx(const size_t* col_ptr, std::unique_ptr source(new data::SimpleCSRSource()); API_BEGIN(); - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } + const int nthread = omp_get_max_threads(); data::SimpleCSRSource& mat = *source; common::ParallelGroupBuilder builder(&mat.row_ptr_, &mat.row_data_); builder.InitBudget(0, nthread); diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index ae78e3864b15..c7c0b3d1b855 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -83,13 +83,8 @@ void SimpleDMatrix::MakeOneBatch(const std::vector& enabled, // clear rowset buffered_rowset_.clear(); // bit map - int nthread; + const int nthread = omp_get_max_threads(); std::vector bmap; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } - pcol->Clear(); common::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); @@ -204,15 +199,7 @@ void SimpleDMatrix::MakeColPage(const RowBatch& batch, size_t buffer_begin, const std::vector& enabled, SparsePage* pcol) { - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - int max_nthread = std::max(omp_get_num_procs() / 2 - 2, 1); - if (nthread > max_nthread) { - nthread = max_nthread; - } - } + const int nthread = std::min(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 2, 1)); pcol->Clear(); common::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index 74a85e9caa10..61fde3e50e10 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -169,12 +169,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector& enabled, SparsePage *pcol) { pcol->Clear(); pcol->min_index = buffered_rowset_[begin]; - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - nthread = std::max(nthread, std::max(omp_get_num_procs() / 2 - 1, 1)); - } + const int nthread = std::max(omp_get_max_threads(), std::max(omp_get_num_procs() / 2 - 1, 1)); common::ParallelGroupBuilder builder(&pcol->offset, &pcol->data); builder.InitBudget(info.num_col, nthread); diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 3a8767249459..18d1cea34af3 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -301,11 +301,7 @@ class GBTree : public GradientBooster { void PredictLeaf(DMatrix* p_fmat, std::vector* out_preds, unsigned ntree_limit) override { - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } + const int nthread = omp_get_max_threads(); InitThreadTemp(nthread); this->PredPath(p_fmat, out_preds, ntree_limit); } @@ -365,11 +361,7 @@ class GBTree : public GradientBooster { unsigned tree_begin, unsigned tree_end) { const MetaInfo& info = p_fmat->info(); - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } + const int nthread = omp_get_max_threads(); CHECK_EQ(num_group, mparam.num_output_group); InitThreadTemp(nthread); std::vector &preds = *out_preds; diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h index 6d043b46795e..f70f63ad09ee 100644 --- a/src/tree/updater_basemaker-inl.h +++ b/src/tree/updater_basemaker-inl.h @@ -118,15 +118,6 @@ class BaseMaker: public TreeUpdater { } return n.cdefault(); } - /*! \brief get number of omp thread in current context */ - inline static int get_nthread() { - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } - return nthread; - } // ------class member helpers--------- /*! \brief initialize temp data structure */ inline void InitData(const std::vector &gpair, @@ -350,7 +341,7 @@ class BaseMaker: public TreeUpdater { std::vector *p_node_stats) { std::vector< std::vector > &thread_temp = *p_thread_temp; const MetaInfo &info = fmat.info(); - thread_temp.resize(this->get_nthread()); + thread_temp.resize(omp_get_max_threads()); p_node_stats->resize(tree.param.num_nodes); #pragma omp parallel { diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index 2d63d9d74d44..5c41267022dd 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -81,7 +81,7 @@ class ColMaker: public TreeUpdater { struct Builder { public: // constructor - explicit Builder(const TrainParam& param) : param(param) {} + explicit Builder(const TrainParam& param) : param(param), nthread(omp_get_max_threads()) {} // update one tree, growing virtual void Update(const std::vector& gpair, DMatrix* p_fmat, @@ -166,10 +166,6 @@ class ColMaker: public TreeUpdater { } { // setup temp space for each thread - #pragma omp parallel - { - this->nthread = omp_get_num_threads(); - } // reserve a small space stemp.clear(); stemp.resize(this->nthread, std::vector()); @@ -277,8 +273,7 @@ class ColMaker: public TreeUpdater { for (size_t j = 0; j < qexpand.size(); ++j) { temp[qexpand[j]].stats.Clear(); } - nthread = omp_get_num_threads(); - bst_uint step = (col.length + nthread - 1) / nthread; + bst_uint step = (col.length + this->nthread - 1) / this->nthread; bst_uint end = std::min(col.length, step * (tid + 1)); for (bst_uint i = tid * step; i < end; ++i) { const bst_uint ridx = col[i].index; @@ -298,7 +293,7 @@ class ColMaker: public TreeUpdater { for (bst_omp_uint j = 0; j < nnode; ++j) { const int nid = qexpand[j]; TStats sum(param), tmp(param), c(param); - for (int tid = 0; tid < nthread; ++tid) { + for (int tid = 0; tid < this->nthread; ++tid) { tmp = stemp[tid][nid].stats; stemp[tid][nid].stats = sum; sum.Add(tmp); @@ -306,7 +301,7 @@ class ColMaker: public TreeUpdater { std::swap(stemp[tid - 1][nid].last_fvalue, stemp[tid][nid].first_fvalue); } } - for (int tid = 0; tid < nthread; ++tid) { + for (int tid = 0; tid < this->nthread; ++tid) { stemp[tid][nid].stats_extra = sum; ThreadEntry &e = stemp[tid][nid]; bst_float fsplit; @@ -341,7 +336,7 @@ class ColMaker: public TreeUpdater { } if (need_backward) { tmp = sum; - ThreadEntry &e = stemp[nthread-1][nid]; + ThreadEntry &e = stemp[this->nthread-1][nid]; c.SetSubstract(snode[nid].stats, tmp); if (c.sum_hess >= param.min_child_weight && tmp.sum_hess >= param.min_child_weight) { @@ -357,8 +352,7 @@ class ColMaker: public TreeUpdater { TStats c(param), cright(param); const int tid = omp_get_thread_num(); std::vector &temp = stemp[tid]; - nthread = static_cast(omp_get_num_threads()); - bst_uint step = (col.length + nthread - 1) / nthread; + bst_uint step = (col.length + this->nthread - 1) / this->nthread; bst_uint end = std::min(col.length, step * (tid + 1)); for (bst_uint i = tid * step; i < end; ++i) { const bst_uint ridx = col[i].index; @@ -599,7 +593,7 @@ class ColMaker: public TreeUpdater { #endif int poption = param.parallel_option; if (poption == 2) { - poption = static_cast(nsize) * 2 < nthread ? 1 : 0; + poption = static_cast(nsize) * 2 < this->nthread ? 1 : 0; } if (poption == 0) { #pragma omp parallel for schedule(dynamic, batch_size) @@ -760,7 +754,7 @@ class ColMaker: public TreeUpdater { // --data fields-- const TrainParam& param; // number of omp thread used during training - int nthread; + const int nthread; // Per feature: shuffle index of each feature index std::vector feat_index; // Instance Data: current node position in the tree of each instance diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index 8ff5b994e221..c8fe5c7a3092 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -336,7 +336,7 @@ class CQHistMaker: public HistMaker { auto lazy_get_hist = [&]() #endif { - thread_hist.resize(this->get_nthread()); + thread_hist.resize(omp_get_max_threads()); // start accumulating statistics dmlc::DataIter *iter = p_fmat->ColIterator(fset); iter->BeforeFirst(); @@ -410,7 +410,7 @@ class CQHistMaker: public HistMaker { } { // get smmary - thread_sketch.resize(this->get_nthread()); + thread_sketch.resize(omp_get_max_threads()); // TWOPASS: use the real set + split set in the column iteration. this->SetDefaultPostion(p_fmat, tree); @@ -695,7 +695,7 @@ class GlobalProposalHistMaker: public CQHistMaker { this->wspace.Init(this->param, 1); // to gain speedup in recovery { - this->thread_hist.resize(this->get_nthread()); + this->thread_hist.resize(omp_get_max_threads()); // TWOPASS: use the real set + split set in the column iteration. this->SetDefaultPostion(p_fmat, tree); @@ -756,7 +756,7 @@ class QuantileHistMaker: public HistMaker { const RegTree &tree) override { const MetaInfo &info = p_fmat->info(); // initialize the data structure - int nthread = BaseMaker::get_nthread(); + const int nthread = omp_get_max_threads(); sketchs.resize(this->qexpand.size() * tree.param.num_feature); for (size_t i = 0; i < sketchs.size(); ++i) { sketchs[i].Init(info.num_row, this->param.sketch_eps); diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index 280cea35c078..fb4e72cafa37 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -34,11 +34,7 @@ class TreeRefresher: public TreeUpdater { std::vector > stemp; std::vector fvec_temp; // setup temp space for each thread - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } + const int nthread = omp_get_max_threads(); fvec_temp.resize(nthread, RegTree::FVec()); stemp.resize(nthread, std::vector()); #pragma omp parallel diff --git a/src/tree/updater_skmaker.cc b/src/tree/updater_skmaker.cc index 4a49b7b3a264..c2320a0ef072 100644 --- a/src/tree/updater_skmaker.cc +++ b/src/tree/updater_skmaker.cc @@ -141,7 +141,7 @@ class SketchMaker: public BaseMaker { for (size_t i = 0; i < sketchs.size(); ++i) { sketchs[i].Init(info.num_row, this->param.sketch_eps); } - thread_sketch.resize(this->get_nthread()); + thread_sketch.resize(omp_get_max_threads()); // number of rows in const size_t nrows = p_fmat->buffered_rowset().size(); // start accumulating statistics