Skip to content

Commit

Permalink
fix some light omp loop .
Browse files Browse the repository at this point in the history
  • Loading branch information
guolinke committed Apr 7, 2017
1 parent 7586d9a commit ddcbe71
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 26 deletions.
2 changes: 1 addition & 1 deletion include/LightGBM/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class Tree {
* \param rate The factor of shrinkage
*/
inline void Shrinkage(double rate) {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_leaves_ >= 1024)
for (int i = 0; i < num_leaves_; ++i) {
leaf_value_[i] *= rate;
if (leaf_value_[i] > kMaxTreeOutput) { leaf_value_[i] = kMaxTreeOutput; }
Expand Down
2 changes: 1 addition & 1 deletion include/LightGBM/utils/array_args.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ArrayArgs {
if (array.empty()) {
return 0;
}
if (array.size() > 100) {
if (array.size() > 1024) {
return ArgMaxMT(array);
} else {
size_t arg_max = 0;
Expand Down
22 changes: 11 additions & 11 deletions src/treelearner/data_partition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class DataPartition {
temp_left_indices_.resize(num_data_);
temp_right_indices_.resize(num_data_);
used_data_indices_ = nullptr;
#pragma omp parallel
#pragma omp master
#pragma omp parallel
#pragma omp master
{
num_threads_ = omp_get_num_threads();
}
Expand Down Expand Up @@ -60,7 +60,7 @@ class DataPartition {
if (used_data_indices_ == nullptr) {
// if using all data
leaf_count_[0] = num_data_;
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
indices_[i] = i;
}
Expand Down Expand Up @@ -92,7 +92,7 @@ class DataPartition {
* \param right_leaf index of right leaf
*/
void Split(int leaf, const Dataset* dataset, int feature, uint32_t threshold, int right_leaf) {
const data_size_t min_inner_size = 1000;
const data_size_t min_inner_size = 512;
// get leaf boundary
const data_size_t begin = leaf_begin_[leaf];
const data_size_t cnt = leaf_count_[leaf];
Expand All @@ -101,7 +101,7 @@ class DataPartition {
if (inner_size < min_inner_size) { inner_size = min_inner_size; }
// split data multi-threading
OMP_INIT_EX();
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
OMP_LOOP_EX_BEGIN();
left_cnts_buf_[i] = 0;
Expand All @@ -112,7 +112,7 @@ class DataPartition {
if (cur_start + cur_cnt > cnt) { cur_cnt = cnt - cur_start; }
// split data inner, reduce the times of function called
data_size_t cur_left_count = dataset->Split(feature, threshold, indices_.data() + begin + cur_start, cur_cnt,
temp_left_indices_.data() + cur_start, temp_right_indices_.data() + cur_start);
temp_left_indices_.data() + cur_start, temp_right_indices_.data() + cur_start);
offsets_buf_[i] = cur_start;
left_cnts_buf_[i] = cur_left_count;
right_cnts_buf_[i] = cur_cnt - cur_left_count;
Expand All @@ -128,15 +128,15 @@ class DataPartition {
}
left_cnt = left_write_pos_buf_[num_threads_ - 1] + left_cnts_buf_[num_threads_ - 1];
// copy back indices of right leaf to indices_
#pragma omp parallel for schedule(static, 1)
#pragma omp parallel for schedule(static, 1)
for (int i = 0; i < num_threads_; ++i) {
if (left_cnts_buf_[i] > 0) {
std::memcpy(indices_.data() + begin + left_write_pos_buf_[i],
temp_left_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
std::memcpy(indices_.data() + begin + left_write_pos_buf_[i],
temp_left_indices_.data() + offsets_buf_[i], left_cnts_buf_[i] * sizeof(data_size_t));
}
if (right_cnts_buf_[i] > 0) {
std::memcpy(indices_.data() + begin + left_cnt + right_write_pos_buf_[i],
temp_right_indices_.data() + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
std::memcpy(indices_.data() + begin + left_cnt + right_write_pos_buf_[i],
temp_right_indices_.data() + offsets_buf_[i], right_cnts_buf_[i] * sizeof(data_size_t));
}
}
// update leaf boundary
Expand Down
12 changes: 7 additions & 5 deletions src/treelearner/feature_histogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,10 @@ class HistogramPool {

void DynamicChangeSize(const Dataset* train_data, const TreeConfig* tree_config, int cache_size, int total_size) {
if (feature_metas_.empty()) {
feature_metas_.resize(train_data->num_features());
#pragma omp parallel for schedule(static)
for (int i = 0; i < train_data->num_features(); ++i) {
int num_feature = train_data->num_features();
feature_metas_.resize(num_feature);
#pragma omp parallel for schedule(static, 512) if(num_feature >= 1024)
for (int i = 0; i < num_feature; ++i) {
feature_metas_[i].num_bin = train_data->FeatureNumBin(i);
if (train_data->FeatureBinMapper(i)->GetDefaultBin() == 0) {
feature_metas_[i].bias = 1;
Expand Down Expand Up @@ -400,8 +401,9 @@ class HistogramPool {
}

void ResetConfig(const TreeConfig* tree_config) {
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(feature_metas_.size()); ++i) {
int size = static_cast<int>(feature_metas_.size());
#pragma omp parallel for schedule(static, 512) if(size >= 1024)
for (int i = 0; i < size; ++i) {
feature_metas_[i].tree_config = tree_config;
}
}
Expand Down
19 changes: 11 additions & 8 deletions src/treelearner/serial_tree_learner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,15 @@ void SerialTreeLearner::BeforeTrain() {
std::memset(is_feature_used_.data(), 0, sizeof(int8_t) * num_features_);
// Get used feature at current tree
auto used_feature_indices = random_.Sample(train_data_->num_total_features(), used_feature_cnt);
#pragma omp parallel for schedule(static)
for (int i = 0; i < static_cast<int>(used_feature_indices.size()); ++i) {
int omp_loop_size = static_cast<int>(used_feature_indices.size());
#pragma omp parallel for schedule(static, 512) if (omp_loop_size >= 1024)
for (int i = 0; i < omp_loop_size; ++i) {
int inner_feature_index = train_data_->InnerFeatureIndex(used_feature_indices[i]);
if (inner_feature_index < 0) { continue; }
is_feature_used_[inner_feature_index] = 1;
}
} else {
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if (num_features_ >= 1024)
for (int i = 0; i < num_features_; ++i) {
is_feature_used_[i] = 1;
}
Expand Down Expand Up @@ -314,7 +315,8 @@ void SerialTreeLearner::BeforeTrain() {
const data_size_t* indices = data_partition_->indices();
data_size_t begin = data_partition_->leaf_begin(0);
data_size_t end = begin + data_partition_->leaf_count(0);
#pragma omp parallel for schedule(static)
data_size_t loop_size = end - begin;
#pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
Expand All @@ -327,7 +329,7 @@ void SerialTreeLearner::BeforeTrain() {
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
Expand Down Expand Up @@ -388,12 +390,13 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
char mark = 1;
data_size_t begin = data_partition_->leaf_begin(left_leaf);
data_size_t end = begin + left_cnt;
data_size_t loop_size = end - begin;
if (left_cnt > right_cnt) {
begin = data_partition_->leaf_begin(right_leaf);
end = begin + right_cnt;
mark = 0;
}
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 1;
}
Expand All @@ -406,7 +409,7 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
OMP_LOOP_EX_END();
}
OMP_THROW_EX();
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static, 512) if(loop_size >= 1024)
for (data_size_t i = begin; i < end; ++i) {
is_data_in_leaf_[indices[i]] = 0;
}
Expand Down Expand Up @@ -447,7 +450,7 @@ void SerialTreeLearner::ConstructHistograms(const std::vector<int8_t>& is_featur

void SerialTreeLearner::FindBestThresholds() {
std::vector<int8_t> is_feature_used(num_features_, 0);
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static,1024) if (num_features_ >= 2048)
for (int feature_index = 0; feature_index < num_features_; ++feature_index) {
if (!is_feature_used_[feature_index]) continue;
if (parent_leaf_histogram_array_ != nullptr
Expand Down

0 comments on commit ddcbe71

Please sign in to comment.