Skip to content

Commit

Permalink
speed up for const hessian (#2857)
Browse files Browse the repository at this point in the history
* speed up for const hessian

* rename template

* fix clang build

* template init

* add comment
  • Loading branch information
guolinke committed Mar 3, 2020
1 parent f7037fd commit bc7d2f0
Show file tree
Hide file tree
Showing 8 changed files with 272 additions and 265 deletions.
27 changes: 17 additions & 10 deletions include/LightGBM/bin.h
Expand Up @@ -30,6 +30,9 @@ enum MissingType {
};

typedef double hist_t;
typedef uint64_t hist_cnt_t;
// check at compile time
static_assert(sizeof(hist_t) == sizeof(hist_cnt_t), "Histogram entry size is not correct");

const size_t kHistEntrySize = 2 * sizeof(hist_t);
const int kHistOffset = 2;
Expand Down Expand Up @@ -482,20 +485,24 @@ class MultiValBin {
const std::vector<uint32_t>& lower, const std::vector<uint32_t>& upper,
const std::vector<uint32_t>& delta) = 0;

virtual void ConstructHistogram(
const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const = 0;
virtual void ConstructHistogram(const data_size_t* data_indices,
data_size_t start, data_size_t end,
const score_t* gradients,
const score_t* hessians,
hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* gradients, const score_t* hessians,
hist_t* out) const = 0;
const score_t* gradients,
const score_t* hessians,
hist_t* out) const = 0;

virtual void ConstructHistogram(const data_size_t* data_indices, data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;

virtual void ConstructHistogram(data_size_t start, data_size_t end,
const score_t* ordered_gradients, hist_t* out) const = 0;
virtual void ConstructHistogramOrdered(const data_size_t* data_indices,
data_size_t start, data_size_t end,
const score_t* ordered_gradients,
const score_t* ordered_hessians,
hist_t* out) const = 0;


virtual void FinishLoad() = 0;

Expand Down
54 changes: 45 additions & 9 deletions include/LightGBM/dataset.h
Expand Up @@ -482,20 +482,56 @@ class Dataset {
void InitTrain(const std::vector<int8_t>& is_feature_used,
TrainingShareStates* share_state) const;

void ConstructHistograms(const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices,
data_size_t num_data, const score_t* gradients,
const score_t* hessians, score_t* ordered_gradients,
score_t* ordered_hessians,
TrainingShareStates* share_state,
hist_t* histogram_data) const;

template <bool USE_INDICES, bool USE_HESSIAN>
void ConstructHistogramsInner(const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices,
data_size_t num_data, const score_t* gradients,
const score_t* hessians,
score_t* ordered_gradients,
score_t* ordered_hessians,
TrainingShareStates* share_state,
hist_t* hist_data) const;

template <bool USE_INDICES, bool ORDERED>
void ConstructHistogramsMultiVal(const data_size_t* data_indices,
data_size_t num_data,
const score_t* gradients,
const score_t* hessians,
TrainingShareStates* share_state,
hist_t* histogram_data) const;
hist_t* hist_data) const;

inline void ConstructHistograms(
const std::vector<int8_t>& is_feature_used,
const data_size_t* data_indices, data_size_t num_data,
const score_t* gradients, const score_t* hessians,
score_t* ordered_gradients, score_t* ordered_hessians,
TrainingShareStates* share_state, hist_t* hist_data) const {
if (num_data <= 0) {
return;
}
bool use_indices = data_indices != nullptr && (num_data < num_data_);
if (share_state->is_constant_hessian) {
if (use_indices) {
ConstructHistogramsInner<true, false>(
is_feature_used, data_indices, num_data, gradients, hessians,
ordered_gradients, ordered_hessians, share_state, hist_data);
} else {
ConstructHistogramsInner<false, false>(
is_feature_used, data_indices, num_data, gradients, hessians,
ordered_gradients, ordered_hessians, share_state, hist_data);
}
} else {
if (use_indices) {
ConstructHistogramsInner<true, true>(
is_feature_used, data_indices, num_data, gradients, hessians,
ordered_gradients, ordered_hessians, share_state, hist_data);
} else {
ConstructHistogramsInner<false, true>(
is_feature_used, data_indices, num_data, gradients, hessians,
ordered_gradients, ordered_hessians, share_state, hist_data);
}
}
}

void FixHistogram(int feature_idx, double sum_gradient, double sum_hessian, hist_t* data) const;

Expand Down

0 comments on commit bc7d2f0

Please sign in to comment.