Skip to content

Commit

Permalink
refactor template initializations with macro
Browse files Browse the repository at this point in the history
  • Loading branch information
shiyu1994 committed Mar 28, 2024
1 parent abdb716 commit 3cdfd83
Show file tree
Hide file tree
Showing 8 changed files with 447 additions and 1,024 deletions.
6 changes: 4 additions & 2 deletions include/LightGBM/bin.h
Expand Up @@ -509,19 +509,21 @@ class Bin {
* \param num_bin Number of bin
* \param paired_ranking_item_index_map Map from data index to the original index for items in the pair
* \param diff_bin_mappers Bin mappers for differential features in this group
* \param bin_offsets Bin offsets in feature group
* \return The bin data object
*/
static Bin* CreateDensePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers);
static Bin* CreateDensePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers, const std::vector<std::unique_ptr<const BinMapper>>* ori_bin_mappers, const std::vector<uint32_t>* bin_offsets);

/*!
* \brief Create object for bin data of the differential feature in pair, used for pairwise ranking, for an original sparse bin
* \param num_data Size of the pairwise dataset
* \param num_bin Number of bin
* \param paired_ranking_item_index_map Map from data index to the original index for items in the pair
* \param diff_bin_mappers Bin mappers for differential features in this group
* \param bin_offsets Bin offsets in feature group
* \return The bin data object
*/
static Bin* CreateSparsePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers);
static Bin* CreateSparsePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers, const std::vector<std::unique_ptr<const BinMapper>>* ori_bin_mappers, const std::vector<uint32_t>* bin_offsets);

/*!
* \brief Deep copy the bin
Expand Down
3 changes: 2 additions & 1 deletion include/LightGBM/dataset.h
Expand Up @@ -1065,9 +1065,10 @@ class Dataset {
void CreatePairwiseRankingDifferentialFeatures(
const std::vector<std::vector<double>>& sample_values,
const std::vector<std::vector<int>>& sample_indices,
const std::vector<const BinMapper*>& bin_mappers,
const std::vector<std::unique_ptr<const BinMapper>>& bin_mappers,
const data_size_t num_total_sample_data,
std::vector<std::unique_ptr<BinMapper>>* differential_feature_bin_mappers,
std::vector<int>* diff_original_feature_index,
const Config& config) const;

std::string data_filename_;
Expand Down
3 changes: 2 additions & 1 deletion include/LightGBM/pairwise_ranking_feature_group.h
Expand Up @@ -126,7 +126,7 @@ class PairwiseRankingDifferentialFeatureGroup: public PairwiseRankingFeatureGrou
* \param is_first_or_second_in_pairing Mark whether features in this group belong to the first or second element in the pairing
*/

PairwiseRankingDifferentialFeatureGroup(const FeatureGroup& other, int num_original_data, const int is_first_or_second_in_pairing, int num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, std::vector<std::unique_ptr<const BinMapper>>& diff_feature_bin_mappers);
PairwiseRankingDifferentialFeatureGroup(const FeatureGroup& other, int num_original_data, const int is_first_or_second_in_pairing, int num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, std::vector<std::unique_ptr<BinMapper>>& diff_feature_bin_mappers, std::vector<std::unique_ptr<BinMapper>>& ori_feature_bin_mappers);

/*! \brief Destructor */
~PairwiseRankingDifferentialFeatureGroup() {}
Expand All @@ -135,6 +135,7 @@ class PairwiseRankingDifferentialFeatureGroup: public PairwiseRankingFeatureGrou
void CreateBinData(int num_data, bool is_multi_val, bool force_dense, bool force_sparse) override;

std::vector<std::unique_ptr<const BinMapper>> diff_feature_bin_mappers_;
std::vector<std::unique_ptr<const BinMapper>> ori_feature_bin_mappers_;
};


Expand Down
24 changes: 18 additions & 6 deletions src/io/bin.cpp
Expand Up @@ -677,14 +677,26 @@ namespace LightGBM {
}
}

Bin* Bin::CreateDensePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers) {
// TODO(shiyu1994)
return nullptr;
Bin* Bin::CreateDensePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers, const std::vector<std::unique_ptr<const BinMapper>>* ori_bin_mappers, const std::vector<uint32_t>* bin_offsets) {
if (num_bin <= 16) {
return new DensePairwiseRankingDiffBin<uint8_t, true>(num_pairs, paired_ranking_item_index_map, new DenseBin<uint8_t, true>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
} else if (num_bin <= 256) {
return new DensePairwiseRankingDiffBin<uint8_t, false>(num_pairs, paired_ranking_item_index_map, new DenseBin<uint8_t, false>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
} else if (num_bin <= 65536) {
return new DensePairwiseRankingDiffBin<uint16_t, false>(num_pairs, paired_ranking_item_index_map, new DenseBin<uint16_t, false>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
} else {
return new DensePairwiseRankingDiffBin<uint32_t, false>(num_pairs, paired_ranking_item_index_map, new DenseBin<uint32_t, false>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
}
}

Bin* Bin::CreateSparsePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers) {
// TODO(shiyu1994)
return nullptr;
Bin* Bin::CreateSparsePairwiseRankingDiffBin(data_size_t num_original_data, int num_bin, data_size_t num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, const std::vector<std::unique_ptr<const BinMapper>>* diff_bin_mappers, const std::vector<std::unique_ptr<const BinMapper>>* ori_bin_mappers, const std::vector<uint32_t>* bin_offsets) {
if (num_bin <= 256) {
return new SparsePairwiseRankingDiffBin<uint8_t>(num_pairs, paired_ranking_item_index_map, new SparseBin<uint8_t>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
} else if (num_bin <= 65536) {
return new SparsePairwiseRankingDiffBin<uint16_t>(num_pairs, paired_ranking_item_index_map, new SparseBin<uint16_t>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
} else {
return new SparsePairwiseRankingDiffBin<uint32_t>(num_pairs, paired_ranking_item_index_map, new SparseBin<uint32_t>(num_original_data), diff_bin_mappers, ori_bin_mappers, bin_offsets);
}
}

MultiValBin* MultiValBin::CreateMultiValBin(data_size_t num_data, int num_bin, int num_feature,
Expand Down
23 changes: 17 additions & 6 deletions src/io/dataset.cpp
Expand Up @@ -875,17 +875,18 @@ void Dataset::CreatePairWiseRankingData(const Dataset* dataset, const bool is_va

// create differential features
std::vector<std::unique_ptr<BinMapper>> diff_feature_bin_mappers;
std::vector<const BinMapper*> original_bin_mappers;
std::vector<std::unique_ptr<const BinMapper>> original_bin_mappers;
std::vector<int> diff_original_feature_index;
for (int i = 0; i < dataset->num_total_features_; ++i) {
const int inner_feature_index = dataset->InnerFeatureIndex(i);
if (inner_feature_index >= 0) {
original_bin_mappers.push_back(dataset->FeatureBinMapper(inner_feature_index));
original_bin_mappers.emplace_back(dataset->FeatureBinMapper(inner_feature_index));
} else {
original_bin_mappers.push_back(nullptr);
original_bin_mappers.emplace_back(nullptr);
}
}

CreatePairwiseRankingDifferentialFeatures(sampled_values_, sampled_indices_, original_bin_mappers, num_total_sampled_data_, &diff_feature_bin_mappers, config);
CreatePairwiseRankingDifferentialFeatures(sampled_values_, sampled_indices_, original_bin_mappers, num_total_sampled_data_, &diff_feature_bin_mappers, &diff_original_feature_index, config);

std::vector<int> used_diff_features;
for (int diff_feature_index = 0; diff_feature_index < static_cast<int>(diff_feature_bin_mappers.size()); ++diff_feature_index) {
Expand Down Expand Up @@ -926,6 +927,8 @@ void Dataset::CreatePairWiseRankingData(const Dataset* dataset, const bool is_va
const std::vector<int>& features_in_group = diff_feature_groups[i];
group_feature_start_.push_back(cur_feature_index);
int sub_feature_index = 0;
std::vector<std::unique_ptr<BinMapper>> ori_bin_mappers;
std::vector<std::unique_ptr<BinMapper>> diff_bin_mappers;
for (size_t j = 0; j < features_in_group.size(); ++j) {
const int diff_feature_index = features_in_group[j];
if (!diff_feature_bin_mappers[diff_feature_index]->is_trivial()) {
Expand All @@ -936,9 +939,15 @@ void Dataset::CreatePairWiseRankingData(const Dataset* dataset, const bool is_va
feature2subfeature_.push_back(sub_feature_index);
++cur_feature_index;
++sub_feature_index;
const int ori_feature_index = dataset->InnerFeatureIndex(diff_original_feature_index[diff_feature_index]);
ori_bin_mappers.emplace_back(new BinMapper(*dataset->FeatureBinMapper(ori_feature_index)));
diff_bin_mappers.emplace_back(new BinMapper(*diff_feature_bin_mappers[diff_feature_index]));
}
}

FeatureGroup feature_group(sub_feature_index, 0, &ori_bin_mappers, dataset->num_data(), i + num_groups_);
feature_groups_.emplace_back(new PairwiseRankingDifferentialFeatureGroup(feature_group, dataset->num_data(), 2, metadata_.paired_ranking_item_index_map_size(), metadata_.paired_ranking_item_global_index_map(), diff_bin_mappers, ori_bin_mappers));

group_feature_cnt_.push_back(cur_feature_index - group_feature_start_.back());
}

Expand Down Expand Up @@ -1937,16 +1946,17 @@ const void* Dataset::GetColWiseData(
void Dataset::CreatePairwiseRankingDifferentialFeatures(
const std::vector<std::vector<double>>& sample_values,
const std::vector<std::vector<int>>& sample_indices,
const std::vector<const BinMapper*>& bin_mappers,
const std::vector<std::unique_ptr<const BinMapper>>& bin_mappers,
const data_size_t num_total_sample_data,
std::vector<std::unique_ptr<BinMapper>>* differential_feature_bin_mappers,
std::vector<int>* diff_original_feature_index,
const Config& config) const {
const int num_original_features = static_cast<int>(sample_values.size());
const data_size_t filter_cnt = static_cast<data_size_t>(
static_cast<double>(config.min_data_in_leaf * num_total_sample_data) / num_data_);
std::vector<int> numerical_feature_indices;
for (int i = 0; i < num_original_features; ++i) {
if (bin_mappers[i] != nullptr && bin_mappers[i]->bin_type() == BinType::NumericalBin) {
if (bin_mappers[i] != nullptr && !bin_mappers[i]->is_trivial() && bin_mappers[i]->bin_type() == BinType::NumericalBin) {
numerical_feature_indices.push_back(i);
}
}
Expand All @@ -1959,6 +1969,7 @@ void Dataset::CreatePairwiseRankingDifferentialFeatures(
#pragma omp parallel for schedule(static) num_threads(num_threads)
for (int i = 0; i < num_numerical_features; ++i) {
const int feature_index = numerical_feature_indices[i];
diff_original_feature_index->push_back(feature_index);
const data_size_t num_samples_for_feature = static_cast<data_size_t>(sample_values[feature_index].size());
if (config.zero_as_missing) {
for (int j = 0; j < num_samples_for_feature; ++j) {
Expand Down

0 comments on commit 3cdfd83

Please sign in to comment.