From a36eb7e76f7ff6e80358aabfd2b6f1b2c04ad41d Mon Sep 17 00:00:00 2001 From: Guolin Ke Date: Fri, 1 Nov 2019 22:33:19 +0800 Subject: [PATCH] remove many vector.at() --- include/LightGBM/utils/common.h | 28 +++++++----- src/io/dataset.cpp | 45 ++++++++++--------- src/io/dataset_loader.cpp | 7 +-- src/metric/multiclass_metric.hpp | 8 ++-- .../cost_effective_gradient_boosting.hpp | 5 ++- 5 files changed, 52 insertions(+), 41 deletions(-) diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h index cc5f2260ac0..1d6473a53a5 100644 --- a/include/LightGBM/utils/common.h +++ b/include/LightGBM/utils/common.h @@ -632,8 +632,8 @@ inline static void Softmax(const double* input, double* output, int len) { template std::vector ConstPtrInVectorWrapper(const std::vector>& input) { std::vector ret; - for (size_t i = 0; i < input.size(); ++i) { - ret.push_back(input.at(i).get()); + for (auto t = input.begin(); t !=input.end(); ++t) { + ret.push_back(t->get()); } return ret; } @@ -641,8 +641,10 @@ std::vector ConstPtrInVectorWrapper(const std::vector inline static void SortForPair(std::vector* keys, std::vector* values, size_t start, bool is_reverse = false) { std::vector> arr; + auto& ref_key = *keys; + auto& ref_value = *values; for (size_t i = start; i < keys->size(); ++i) { - arr.emplace_back(keys->at(i), values->at(i)); + arr.emplace_back(ref_key[i], ref_value[i]); } if (!is_reverse) { std::stable_sort(arr.begin(), arr.end(), [](const std::pair& a, const std::pair& b) { @@ -654,16 +656,17 @@ inline static void SortForPair(std::vector* keys, std::vector* values, s }); } for (size_t i = start; i < arr.size(); ++i) { - keys->at(i) = arr[i].first; - values->at(i) = arr[i].second; + ref_key[i] = arr[i].first; + ref_value[i] = arr[i].second; } } template inline static std::vector Vector2Ptr(std::vector>* data) { std::vector ptr(data->size()); + auto& ref_data = *data; for (size_t i = 0; i < data->size(); ++i) { - ptr[i] = data->at(i).data(); + ptr[i] = ref_data[i].data(); } return ptr; } @@ -841,12 +844,13 @@ inline static std::vector EmptyBitset(int n) { template inline static void InsertBitset(std::vector* vec, const T val) { - int i1 = val / 32; - int i2 = val % 32; - if (static_cast(vec->size()) < i1 + 1) { - vec->resize(i1 + 1, 0); - } - vec->at(i1) |= (1 << i2); + auto& ref_v = *vec; + int i1 = val / 32; + int i2 = val % 32; + if (static_cast(vec->size()) < i1 + 1) { + vec->resize(i1 + 1, 0); + } + ref_v[i1] |= (1 << i2); } template diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp index b0c2a483c00..0b1a4b87bcd 100644 --- a/src/io/dataset.cpp +++ b/src/io/dataset.cpp @@ -61,8 +61,9 @@ int GetConfilctCount(const std::vector& mark, const int* indices, int num_ return ret; } void MarkUsed(std::vector* mark, const int* indices, int num_indices) { + auto& ref_mark = *mark; for (int i = 0; i < num_indices; ++i) { - mark->at(indices[i]) = true; + ref_mark[indices[i]] = true; } } @@ -238,8 +239,9 @@ void Dataset::Construct( sparse_threshold_ = io_config.sparse_threshold; // get num_features std::vector used_features; + auto& ref_bin_mappers = *bin_mappers; for (int i = 0; i < static_cast(bin_mappers->size()); ++i) { - if (bin_mappers->at(i) != nullptr && !bin_mappers->at(i)->is_trivial()) { + if (ref_bin_mappers[i] != nullptr && !ref_bin_mappers[i]->is_trivial()) { used_features.emplace_back(i); } } @@ -277,7 +279,7 @@ void Dataset::Construct( real_feature_idx_[cur_fidx] = real_fidx; feature2group_[cur_fidx] = i; feature2subfeature_[cur_fidx] = j; - cur_bin_mappers.emplace_back(bin_mappers->at(real_fidx).release()); + cur_bin_mappers.emplace_back(ref_bin_mappers[real_fidx].release()); ++cur_fidx; } feature_groups_.emplace_back(std::unique_ptr( @@ -848,6 +850,7 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, int num_used_group = static_cast(used_group.size()); auto ptr_ordered_grad = gradients; auto ptr_ordered_hess = hessians; + auto& ref_ordered_bins = *ordered_bins; if (data_indices != nullptr && num_data < num_data_) { if (!is_constant_hessian) { #pragma omp parallel for schedule(static) @@ -874,7 +877,7 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, const int num_bin = feature_groups_[group]->num_total_bin_; std::memset(reinterpret_cast(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); // construct histograms for smaller leaf - if (ordered_bins->at(group) == nullptr) { + if (ref_ordered_bins[group] == nullptr) { // if not use ordered bin feature_groups_[group]->bin_data_->ConstructHistogram( data_indices, @@ -884,10 +887,10 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, data_ptr); } else { // used ordered bin - ordered_bins->at(group)->ConstructHistogram(leaf_idx, - gradients, - hessians, - data_ptr); + ref_ordered_bins[group]->ConstructHistogram(leaf_idx, + gradients, + hessians, + data_ptr); } OMP_LOOP_EX_END(); } @@ -903,7 +906,7 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, const int num_bin = feature_groups_[group]->num_total_bin_; std::memset(reinterpret_cast(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); // construct histograms for smaller leaf - if (ordered_bins->at(group) == nullptr) { + if (ref_ordered_bins[group] == nullptr) { // if not use ordered bin feature_groups_[group]->bin_data_->ConstructHistogram( data_indices, @@ -912,9 +915,9 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, data_ptr); } else { // used ordered bin - ordered_bins->at(group)->ConstructHistogram(leaf_idx, - gradients, - data_ptr); + ref_ordered_bins[group]->ConstructHistogram(leaf_idx, + gradients, + data_ptr); } // fixed hessian. for (int i = 0; i < num_bin; ++i) { @@ -936,7 +939,7 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, const int num_bin = feature_groups_[group]->num_total_bin_; std::memset(reinterpret_cast(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); // construct histograms for smaller leaf - if (ordered_bins->at(group) == nullptr) { + if (ref_ordered_bins[group] == nullptr) { // if not use ordered bin feature_groups_[group]->bin_data_->ConstructHistogram( num_data, @@ -945,10 +948,10 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, data_ptr); } else { // used ordered bin - ordered_bins->at(group)->ConstructHistogram(leaf_idx, - gradients, - hessians, - data_ptr); + ref_ordered_bins[group]->ConstructHistogram(leaf_idx, + gradients, + hessians, + data_ptr); } OMP_LOOP_EX_END(); } @@ -964,7 +967,7 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, const int num_bin = feature_groups_[group]->num_total_bin_; std::memset(reinterpret_cast(data_ptr + 1), 0, (num_bin - 1) * sizeof(HistogramBinEntry)); // construct histograms for smaller leaf - if (ordered_bins->at(group) == nullptr) { + if (ref_ordered_bins[group] == nullptr) { // if not use ordered bin feature_groups_[group]->bin_data_->ConstructHistogram( num_data, @@ -972,9 +975,9 @@ void Dataset::ConstructHistograms(const std::vector& is_feature_used, data_ptr); } else { // used ordered bin - ordered_bins->at(group)->ConstructHistogram(leaf_idx, - gradients, - data_ptr); + ref_ordered_bins[group]->ConstructHistogram(leaf_idx, + gradients, + data_ptr); } // fixed hessian. for (int i = 0; i < num_bin; ++i) { diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 7c7ed051e9f..f196bdb8c0e 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -1048,6 +1048,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, void DatasetLoader::ExtractFeaturesFromMemory(std::vector* text_data, const Parser* parser, Dataset* dataset) { std::vector> oneline_features; double tmp_label = 0.0f; + auto& ref_text_data = *text_data; if (predict_fun_ == nullptr) { OMP_INIT_EX(); // if doesn't need to prediction with initial model @@ -1057,11 +1058,11 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector* text_dat const int tid = omp_get_thread_num(); oneline_features.clear(); // parser - parser->ParseOneLine(text_data->at(i).c_str(), &oneline_features, &tmp_label); + parser->ParseOneLine(ref_text_data[i].c_str(), &oneline_features, &tmp_label); // set label dataset->metadata_.SetLabelAt(i, static_cast(tmp_label)); // free processed line: - text_data->at(i).clear(); + ref_text_data[i].clear(); // shrink_to_fit will be very slow in linux, and seems not free memory, disable for now // text_reader_->Lines()[i].shrink_to_fit(); // push data @@ -1094,7 +1095,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector* text_dat const int tid = omp_get_thread_num(); oneline_features.clear(); // parser - parser->ParseOneLine(text_data->at(i).c_str(), &oneline_features, &tmp_label); + parser->ParseOneLine(ref_text_data[i].c_str(), &oneline_features, &tmp_label); // set initial score std::vector oneline_init_score(num_class_); predict_fun_(oneline_features, oneline_init_score.data()); diff --git a/src/metric/multiclass_metric.hpp b/src/metric/multiclass_metric.hpp index 6c8b3e2ee86..8cf92f67ac1 100644 --- a/src/metric/multiclass_metric.hpp +++ b/src/metric/multiclass_metric.hpp @@ -140,9 +140,10 @@ class MultiErrorMetric: public MulticlassMetric { inline static double LossOnPoint(label_t label, std::vector* score, const Config& config) { size_t k = static_cast(label); + auto& ref_score = *score; int num_larger = 0; for (size_t i = 0; i < score->size(); ++i) { - if (score->at(i) >= score->at(k)) ++num_larger; + if (ref_score[i] >= ref_score[k]) ++num_larger; if (num_larger > config.multi_error_top_k) return 1.0f; } return 0.0f; @@ -164,8 +165,9 @@ class MultiSoftmaxLoglossMetric: public MulticlassMetric* score, const Config&) { size_t k = static_cast(label); - if (score->at(k) > kEpsilon) { - return static_cast(-std::log(score->at(k))); + auto& ref_score = *score; + if (ref_score[k] > kEpsilon) { + return static_cast(-std::log(ref_score[k])); } else { return -std::log(kEpsilon); } diff --git a/src/treelearner/cost_effective_gradient_boosting.hpp b/src/treelearner/cost_effective_gradient_boosting.hpp index 82c6b9abc12..aa97ca49a45 100644 --- a/src/treelearner/cost_effective_gradient_boosting.hpp +++ b/src/treelearner/cost_effective_gradient_boosting.hpp @@ -63,14 +63,15 @@ class CostEfficientGradientBoosting { auto config = tree_learner_->config_; auto train_data = tree_learner_->train_data_; const int inner_feature_index = train_data->InnerFeatureIndex(best_split_info->feature); + auto& ref_best_split_per_leaf = *best_split_per_leaf; if (!config->cegb_penalty_feature_coupled.empty() && !is_feature_used_in_split_[inner_feature_index]) { is_feature_used_in_split_[inner_feature_index] = true; for (int i = 0; i < tree->num_leaves(); ++i) { if (i == best_leaf) continue; auto split = &splits_per_leaf_[static_cast(i) * train_data->num_features() + inner_feature_index]; split->gain += config->cegb_tradeoff * config->cegb_penalty_feature_coupled[best_split_info->feature]; - if (*split > best_split_per_leaf->at(i)) - best_split_per_leaf->at(i) = *split; + if (*split > ref_best_split_per_leaf[i]) + ref_best_split_per_leaf[i] = *split; } } if (!config->cegb_penalty_feature_lazy.empty()) {