Skip to content

Commit

Permalink
fix some formats
Browse files Browse the repository at this point in the history
  • Loading branch information
guolinke committed Nov 15, 2017
1 parent ba5c745 commit 3d65d06
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 70 deletions.
10 changes: 5 additions & 5 deletions include/LightGBM/boosting.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ class LIGHTGBM_EXPORT Boosting {
virtual void PredictRaw(const double* features, double* output,
const PredictionEarlyStopInstance* early_stop) const = 0;

virtual void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const = 0;
virtual void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const = 0;


/*!
Expand All @@ -134,8 +134,8 @@ class LIGHTGBM_EXPORT Boosting {
virtual void Predict(const double* features, double* output,
const PredictionEarlyStopInstance* early_stop) const = 0;

virtual void PredictByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const = 0;
virtual void PredictByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const = 0;


/*!
Expand Down Expand Up @@ -210,7 +210,7 @@ class LIGHTGBM_EXPORT Boosting {
* \param filename Filename that want to save to
*/
virtual void SaveModelToProto(int num_iteration, const char* filename) const = 0;

/*!
* \brief Restore from a serialized protobuf file
* \param filename Filename that want to restore from
Expand Down
8 changes: 4 additions & 4 deletions include/LightGBM/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,8 @@ inline void Tree::PredictContrib(const double* feature_values, int num_features,
output[num_features] += ExpectedValue();
// Run the recursion with preallocated space for the unique path data
if (num_leaves_ > 1) {
const int max_path_len = MaxDepth()+1;
PathElement *unique_path_data = new PathElement[(max_path_len*(max_path_len+1))/2];
const int max_path_len = MaxDepth() + 1;
PathElement *unique_path_data = new PathElement[(max_path_len*(max_path_len + 1)) / 2];
TreeSHAP(feature_values, output, 0, 0, unique_path_data, 1, 1, -1);
delete[] unique_path_data;
}
Expand All @@ -490,8 +490,8 @@ inline void Tree::RecomputeLeafDepths(int node, int depth) {
if (node < 0) {
leaf_depth_[~node] = depth;
} else {
RecomputeLeafDepths(left_child_[node], depth+1);
RecomputeLeafDepths(right_child_[node], depth+1);
RecomputeLeafDepths(left_child_[node], depth + 1);
RecomputeLeafDepths(right_child_[node], depth + 1);
}
}

Expand Down
27 changes: 13 additions & 14 deletions src/application/predictor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class Predictor {
if (is_predict_leaf_index) {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features, double* output) {
int tid = omp_get_thread_num();
if(num_feature_ > MAX_FEATURE && num_feature_/static_cast<int>(features.size()) > SPARSITY) {
if (num_feature_ > MAX_FEATURE && num_feature_ / static_cast<int>(features.size()) > SPARSITY) {
CopyToPredictMap(tid, features);
boosting_->PredictLeafIndexByMap(predict_buf_map_[tid], output);
ClearPredictMap(tid);
Expand All @@ -88,7 +88,7 @@ class Predictor {
if (is_raw_score) {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features, double* output) {
int tid = omp_get_thread_num();
if(num_feature_ > MAX_FEATURE && num_feature_/static_cast<int>(features.size()) > SPARSITY) {
if (num_feature_ > MAX_FEATURE && num_feature_ / static_cast<int>(features.size()) > SPARSITY) {
CopyToPredictMap(tid, features);
boosting_->PredictRawByMap(predict_buf_map_[tid], output, &early_stop_);
ClearPredictMap(tid);
Expand All @@ -101,7 +101,7 @@ class Predictor {
} else {
predict_fun_ = [this](const std::vector<std::pair<int, double>>& features, double* output) {
int tid = omp_get_thread_num();
if(num_feature_ > MAX_FEATURE && num_feature_/static_cast<int>(features.size()) > SPARSITY) {
if (num_feature_ > MAX_FEATURE && num_feature_ / static_cast<int>(features.size()) > SPARSITY) {
CopyToPredictMap(tid, features);
boosting_->PredictByMap(predict_buf_map_[tid], output, &early_stop_);
ClearPredictMap(tid);
Expand Down Expand Up @@ -151,20 +151,20 @@ class Predictor {
TextReader<data_size_t> predict_data_reader(data_filename, has_header);
std::unordered_map<int, int> feature_names_map_;
bool need_adjust = false;
if(has_header) {
if (has_header) {
std::string first_line = predict_data_reader.first_line();
std::vector<std::string> header = Common::Split(first_line.c_str(), "\t,");
header.erase(header.begin() + boosting_->LabelIdx());
for(int i = 0; i < static_cast<int>(header.size()); ++i) {
for(int j = 0; j < static_cast<int>(boosting_->FeatureNames().size()); ++j) {
if(header[i] == boosting_->FeatureNames()[j]) {
for (int i = 0; i < static_cast<int>(header.size()); ++i) {
for (int j = 0; j < static_cast<int>(boosting_->FeatureNames().size()); ++j) {
if (header[i] == boosting_->FeatureNames()[j]) {
feature_names_map_[i] = j;
break;
}
}
}
for(auto s:feature_names_map_) {
if(s.first != s.second) {
for (auto s : feature_names_map_) {
if (s.first != s.second) {
need_adjust = true;
break;
}
Expand All @@ -176,14 +176,13 @@ class Predictor {
parser_fun = [this, &parser, &tmp_label, &need_adjust, &feature_names_map_]
(const char* buffer, std::vector<std::pair<int, double>>* feature) {
parser->ParseOneLine(buffer, feature, &tmp_label);
if(need_adjust) {
if (need_adjust) {
int i = 0, j = static_cast<int>(feature->size());
while(i < j) {
if(feature_names_map_.find((*feature)[i].first) != feature_names_map_.end()) {
while (i < j) {
if (feature_names_map_.find((*feature)[i].first) != feature_names_map_.end()) {
(*feature)[i].first = feature_names_map_[(*feature)[i].first];
++i;
}
else {
} else {
//move the non-used features to the end of the feature vector
std::swap((*feature)[i], (*feature)[--j]);
}
Expand Down
22 changes: 11 additions & 11 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,16 @@ std::chrono::duration<double, std::milli> tree_time;
#endif // TIMETAG

GBDT::GBDT() : iter_(0),
train_data_(nullptr),
objective_function_(nullptr),
early_stopping_round_(0),
max_feature_idx_(0),
num_tree_per_iteration_(1),
num_class_(1),
num_iteration_for_pred_(0),
shrinkage_rate_(0.1f),
num_init_iteration_(0),
need_re_bagging_(false) {
train_data_(nullptr),
objective_function_(nullptr),
early_stopping_round_(0),
max_feature_idx_(0),
num_tree_per_iteration_(1),
num_class_(1),
num_iteration_for_pred_(0),
shrinkage_rate_(0.1f),
num_init_iteration_(0),
need_re_bagging_(false) {

#pragma omp parallel
#pragma omp master
Expand Down Expand Up @@ -224,7 +224,7 @@ data_size_t GBDT::BaggingHelper(Random& cur_rand, data_size_t start, data_size_t

void GBDT::Bagging(int iter) {
// if need bagging
if ((bag_data_cnt_ < num_data_ && iter % gbdt_config_->bagging_freq == 0)
if ((bag_data_cnt_ < num_data_ && iter % gbdt_config_->bagging_freq == 0)
|| need_re_bagging_) {
need_re_bagging_ = false;
const data_size_t min_inner_size = 1000;
Expand Down
12 changes: 6 additions & 6 deletions src/boosting/gbdt.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace LightGBM {
/*!
* \brief GBDT algorithm implementation. including Training, prediction, bagging.
*/
class GBDT: public GBDTBase {
class GBDT : public GBDTBase {
public:

/*!
Expand Down Expand Up @@ -187,14 +187,14 @@ class GBDT: public GBDTBase {
void PredictRaw(const double* features, double* output,
const PredictionEarlyStopInstance* earlyStop) const override;

void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const override;
void PredictRawByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const override;

void Predict(const double* features, double* output,
const PredictionEarlyStopInstance* earlyStop) const override;

void PredictByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const override;
void PredictByMap(const std::unordered_map<int, double>& features, double* output,
const PredictionEarlyStopInstance* early_stop) const override;

void PredictLeafIndex(const double* features, double* output) const override;

Expand Down Expand Up @@ -252,7 +252,7 @@ class GBDT: public GBDTBase {
* \param filename Filename that want to save to
*/
void SaveModelToProto(int num_iteration, const char* filename) const override;

/*!
* \brief Restore from a serialized protobuf file
* \param filename Filename that want to restore from
Expand Down
16 changes: 8 additions & 8 deletions src/boosting/gbdt_model_text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ std::string GBDT::ModelToIfElse(int num_iteration) const {
// PredictRawByMap
str_buf << "double (*PredictTreeByMapPtr[])(const std::unordered_map<int, double>&) = { ";
for (int i = 0; i < num_used_model; ++i) {
if (i > 0) {
str_buf << " , ";
}
str_buf << "PredictTree" << i << "ByMap";
if (i > 0) {
str_buf << " , ";
}
str_buf << "PredictTree" << i << "ByMap";
}
str_buf << " };" << std::endl << std::endl;

Expand Down Expand Up @@ -185,10 +185,10 @@ std::string GBDT::ModelToIfElse(int num_iteration) const {
//PredictLeafIndexByMap
str_buf << "double (*PredictTreeLeafByMapPtr[])(const std::unordered_map<int, double>&) = { ";
for (int i = 0; i < num_used_model; ++i) {
if (i > 0) {
str_buf << " , ";
}
str_buf << "PredictTree" << i << "LeafByMap";
if (i > 0) {
str_buf << " , ";
}
str_buf << "PredictTree" << i << "LeafByMap";
}
str_buf << " };" << std::endl << std::endl;

Expand Down
42 changes: 20 additions & 22 deletions src/io/tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,30 +381,28 @@ std::string Tree::ToIfElse(int index, bool is_predict_leaf_index) const {
//Predict func by Map to ifelse
str_buf << "double PredictTree" << index;
if (is_predict_leaf_index) {
str_buf << "LeafByMap";
}
else {
str_buf << "ByMap";
str_buf << "LeafByMap";
} else {
str_buf << "ByMap";
}
str_buf << "(const std::unordered_map<int, double>& arr) { ";
if (num_leaves_ <= 1) {
str_buf << "return " << leaf_value_[0] << ";";
}
else {
str_buf << "const std::vector<uint32_t> cat_threshold = {";
for (size_t i = 0; i < cat_threshold_.size(); ++i) {
if (i != 0) {
str_buf << ",";
}
str_buf << cat_threshold_[i];
}
str_buf << "};";
// use this for the missing value conversion
str_buf << "double fval = 0.0f; ";
if (num_cat_ > 0) {
str_buf << "int int_fval = 0; ";
}
str_buf << NodeToIfElseByMap(0, is_predict_leaf_index);
str_buf << "return " << leaf_value_[0] << ";";
} else {
str_buf << "const std::vector<uint32_t> cat_threshold = {";
for (size_t i = 0; i < cat_threshold_.size(); ++i) {
if (i != 0) {
str_buf << ",";
}
str_buf << cat_threshold_[i];
}
str_buf << "};";
// use this for the missing value conversion
str_buf << "double fval = 0.0f; ";
if (num_cat_ > 0) {
str_buf << "int int_fval = 0; ";
}
str_buf << NodeToIfElseByMap(0, is_predict_leaf_index);
}
str_buf << " }" << std::endl;

Expand Down Expand Up @@ -696,7 +694,7 @@ double Tree::ExpectedValue() const {
const double total_count = internal_count_[0];
double exp_value = 0.0;
for (int i = 0; i < num_leaves(); ++i) {
exp_value += (leaf_count_[i]/total_count)*LeafOutput(i);
exp_value += (leaf_count_[i] / total_count)*LeafOutput(i);
}
return exp_value;
}
Expand Down

0 comments on commit 3d65d06

Please sign in to comment.