Skip to content

Commit

Permalink
Added method for customizable "boost-from-average" (#731)
Browse files Browse the repository at this point in the history
* Created objectives and metrics xentropy and xentropy1

* Some coment and code cleanup.

* Added Kullback-Leibler version of metric. Changed some warning messages.

* Fixed sign error in KL-divergence calc.

* Removed __PRETTY_FUNCTION__.

* Fixed better name for alternative xentropy parameterization.
Documented details on the objectives / metrics in code comments.

* Common code for label interval checks. Cleanups.

* Use common utility for various weight property checks.

* Added code for customizable initial average to boost from.

* Fixed spelling error in aliases.
  • Loading branch information
olofer authored and guolinke committed Jul 27, 2017
1 parent 1f71115 commit a3c63ff
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 4 deletions.
2 changes: 1 addition & 1 deletion include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ struct ParameterAlias {
{ "is_sparse", "is_enable_sparse" },
{ "enable_sparse", "is_enable_sparse" },
{ "pre_partition", "is_pre_partition" },
{ "tranining_metric", "is_training_metric" },
{ "training_metric", "is_training_metric" },
{ "train_metric", "is_training_metric" },
{ "ndcg_at", "ndcg_eval_at" },
{ "eval_at", "ndcg_eval_at" },
Expand Down
2 changes: 2 additions & 0 deletions include/LightGBM/objective_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class ObjectiveFunction {

virtual bool BoostFromAverage() const { return false; }

virtual bool GetCustomAverage(double *) const { return false; }

virtual bool SkipEmptyClass() const { return false; }

virtual int NumTreePerIteration() const { return 1; }
Expand Down
9 changes: 7 additions & 2 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,13 @@ bool GBDT::TrainOneIter(const score_t* gradient, const score_t* hessian, bool is
&& num_class_ <= 1
&& objective_function_ != nullptr
&& objective_function_->BoostFromAverage()) {
auto label = train_data_->metadata().label();
double init_score = LabelAverage(label, num_data_);
double init_score = 0.0f;
// First try to poll the optional custom average score calculation for the specific objective
if (!objective_function_->GetCustomAverage(&init_score)) {
// otherwise compute a standard label average
auto label = train_data_->metadata().label();
init_score = LabelAverage(label, num_data_);
}
std::unique_ptr<Tree> new_tree(new Tree(2));
new_tree->Split(0, 0, BinType::NumericalBin, 0, 0, 0, init_score, init_score, 0, 0, -1, 0, 0, 0);
train_score_updater_->AddScore(init_score, 0);
Expand Down
40 changes: 39 additions & 1 deletion src/objective/xentropy_objective.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,31 @@ class CrossEntropy: public ObjectiveFunction {
return str_buf.str();
}

// allow boost from average option
bool BoostFromAverage() const override { return true; }

// implement custom average to boost from (if enabled among options)
bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double suml = 0.0f;
double sumw = 0.0f;
if (weights_ == nullptr) {

This comment has been minimized.

Copy link
@strint

strint Jul 28, 2017

This also need to be fixed ~

This comment has been minimized.

Copy link
@olofer

olofer Jul 28, 2017

Author Contributor

already part of the pending PR

This comment has been minimized.

Copy link
@strint

strint Jul 28, 2017

ok

for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i] * weights_[i];
sumw += weights_[i];
}
} else {
sumw = static_cast<double>(num_data_);
for (data_size_t i = 0; i < num_data_; ++i) {
suml += label_[i];
}
}
double pavg = suml / sumw;
*initscore = std::log(pavg / (1.0f - pavg));
Log::Info("[%s:%s]: pavg=%f -> initscore=%f", GetName(), __func__, pavg, *initscore);
return true;
}

private:
/*! \brief Number of data points */
data_size_t num_data_;
Expand Down Expand Up @@ -207,9 +230,24 @@ class CrossEntropyLambda: public ObjectiveFunction {
return str_buf.str();
}

// might want to boost from a weighted average in general, if possible
bool BoostFromAverage() const override { return true; }

bool GetCustomAverage(double *initscore) const override {
if (initscore == nullptr) return false;
double sumy = 0.0f;
for (data_size_t i = 0; i < num_data_; ++i) sumy += label_[i];
double sumw = 0.0f;
if (weights_ == nullptr) {

This comment has been minimized.

Copy link
@strint

strint Jul 27, 2017

should be

if (weights_ != nullptr) 

This comment has been minimized.

Copy link
@olofer

olofer Jul 27, 2017

Author Contributor

Yeah it looks like a silly mistake, I will fix this together with network stuff in a new PR as soon as I can get to it...

for (data_size_t i = 0; i < num_data_; ++i) sumw += weights_[i];
} else {
sumw = static_cast<double>(num_data_);
}
double havg = sumy / sumw;
*initscore = std::log(std::exp(havg) - 1.0f);
Log::Info("[%s:%s]: havg=%f -> initscore=%f", GetName(), __func__, havg, *initscore);
return true;
}

private:
/*! \brief Number of data points */
data_size_t num_data_;
Expand Down

0 comments on commit a3c63ff

Please sign in to comment.