-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
pairwise_ranking_feature_group.h
144 lines (120 loc) · 5.15 KB
/
pairwise_ranking_feature_group.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*!
* Copyright (c) 2023 Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See LICENSE file in the project root for
* license information.
*/
#ifndef LIGHTGBM_PAIRWISE_RANKING_FEATURE_GROUP_H_
#define LIGHTGBM_PAIRWISE_RANKING_FEATURE_GROUP_H_
#include <cstdio>
#include <memory>
#include <utility>
#include <vector>
#include "feature_group.h"
namespace LightGBM {
/*! \brief Using to store data and providing some operations on one pairwise feature group for pairwise ranking */
class PairwiseRankingFeatureGroup: public FeatureGroup {
public:
/*!
* \brief Constructor
* \param num_feature number of features of this group
* \param bin_mappers Bin mapper for features
* \param num_data Total number of data
* \param is_enable_sparse True if enable sparse feature
* \param is_first_or_second_in_pairing Mark whether features in this group belong to the first or second element in the pairing
*/
PairwiseRankingFeatureGroup(const FeatureGroup& other, int num_original_data, const int is_first_or_second_in_pairing, int num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map);
/*!
* \brief Constructor from memory when data is present
* \param memory Pointer of memory
* \param num_all_data Number of global data
* \param local_used_indices Local used indices, empty means using all data
* \param group_id Id of group
*/
// PairwiseRankingFeatureGroup(const void* memory,
// data_size_t num_all_data,
// const std::vector<data_size_t>& local_used_indices,
// int group_id) {
// // TODO(shiyu1994)
// }
// /*!
// * \brief Constructor from definition in memory (without data)
// * \param memory Pointer of memory
// * \param local_used_indices Local used indices, empty means using all data
// */
// PairwiseRankingFeatureGroup(const void* memory, data_size_t num_data, int group_id): FeatureGroup(memory, num_data, group_id) {
// // TODO(shiyu1994)
// }
/*! \brief Destructor */
~PairwiseRankingFeatureGroup() {}
/*!
* \brief Load the overall definition of the feature group from binary serialized data
* \param memory Pointer of memory
* \param group_id Id of group
*/
const char* LoadDefinitionFromMemory(const void* /*memory*/, int /*group_id*/) {
// TODO(shiyu1994)
return nullptr;
}
inline BinIterator* SubFeatureIterator(int /*sub_feature*/) {
// TODO(shiyu1994)
return nullptr;
}
inline void FinishLoad() {
CHECK(!is_multi_val_);
bin_data_->FinishLoad();
}
inline BinIterator* FeatureGroupIterator() {
// TODO(shiyu1994)
return nullptr;
}
/*!
* \brief Push one record, will auto convert to bin and push to bin data
* \param tid Thread id
* \param sub_feature_idx Index of the subfeature
* \param line_idx Index of record
* \param bin feature bin value of record
*/
inline void PushBinData(int tid, int sub_feature_idx, data_size_t line_idx, uint32_t bin) {
if (bin == bin_mappers_[sub_feature_idx]->GetMostFreqBin()) {
return;
}
if (bin_mappers_[sub_feature_idx]->GetMostFreqBin() == 0) {
bin -= 1;
}
if (is_multi_val_) {
multi_bin_data_[sub_feature_idx]->Push(tid, line_idx, bin + 1);
} else {
bin += bin_offsets_[sub_feature_idx];
bin_data_->Push(tid, line_idx, bin);
}
}
protected:
void CreateBinData(int num_data, bool is_multi_val, bool force_dense, bool force_sparse) override;
/*! \brief Pairwise data index to original data indices for ranking with pairwise features */
const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map_;
/*! \brief Number of pairwise data */
data_size_t num_data_;
/*! \brief Mark whether features in this group belong to the first or second element in the pairing */
const int is_first_or_second_in_pairing_;
};
/*! \brief One differential feature group in pairwise ranking */
class PairwiseRankingDifferentialFeatureGroup: public PairwiseRankingFeatureGroup {
public:
/*!
* \brief Constructor
* \param num_feature number of features of this group
* \param bin_mappers Bin mapper for features
* \param num_data Total number of data
* \param is_enable_sparse True if enable sparse feature
* \param is_first_or_second_in_pairing Mark whether features in this group belong to the first or second element in the pairing
*/
PairwiseRankingDifferentialFeatureGroup(const FeatureGroup& other, int num_original_data, const int is_first_or_second_in_pairing, int num_pairs, const std::pair<data_size_t, data_size_t>* paired_ranking_item_index_map, std::vector<std::unique_ptr<BinMapper>>& diff_feature_bin_mappers, std::vector<std::unique_ptr<BinMapper>>& ori_feature_bin_mappers);
/*! \brief Destructor */
~PairwiseRankingDifferentialFeatureGroup() {}
private:
void CreateBinData(int num_data, bool is_multi_val, bool force_dense, bool force_sparse) override;
std::vector<std::unique_ptr<const BinMapper>> diff_feature_bin_mappers_;
std::vector<std::unique_ptr<const BinMapper>> ori_feature_bin_mappers_;
};
} // namespace LightGBM
#endif // LIGHTGBM_PAIRWISE_RANKING_FEATURE_GROUP_H_