Skip to content

Commit

Permalink
[fix](inverted/bitmap index) fix apply bitmap index with bloom filter…
Browse files Browse the repository at this point in the history
… which is built by runtime filter, and support optimization for bitmap index no need to read raw data (apache#1188)
  • Loading branch information
Tanya-W committed Nov 30, 2022
1 parent 938d6bd commit e4e281d
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 16 deletions.
4 changes: 4 additions & 0 deletions be/src/exec/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ class ColumnValueRange {
_marked_runtime_filter_predicate = is_runtime_filter_predicate;
}

bool get_marked_by_runtime_filter() const {
return _marked_runtime_filter_predicate;
}

TCompoundType::type get_compound_type() const;

int scale() const { return _scale; }
Expand Down
20 changes: 12 additions & 8 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
RowRanges zone_map_row_ranges = RowRanges::create_single(num_rows());
// second filter data by zone map
for (auto& cid : cids) {
if (_inverted_index_iterators[cid] != nullptr && field_is_slice_type(_schema.column(cid)->type())) {
if (_inverted_index_iterators[cid] != nullptr && field_is_slice_type(_schema.column(cid)->type())) {
continue;
}
// get row ranges by zone map of this column,
Expand Down Expand Up @@ -521,17 +521,18 @@ Status SegmentIterator::_apply_bitmap_index() {
for (auto pred : _col_predicates) {
int32_t unique_id = _schema.unique_id(pred->column_id());
if (_bitmap_index_iterators.count(unique_id) < 1 ||
_bitmap_index_iterators[unique_id] == nullptr) {
_bitmap_index_iterators[unique_id] == nullptr ||
pred->type() == PredicateType::BF) {
// no bitmap index for this column
remaining_predicates.push_back(pred);
} else {
RETURN_IF_ERROR(pred->evaluate(_bitmap_index_iterators[unique_id], _segment->num_rows(),
&_row_bitmap));

// if (_check_column_pred_all_push_down(pred) &&
// !pred->predicate_params()->marked_by_runtime_filter) {
// _need_read_data_indices[unique_id] = false;
// }
if (_check_column_pred_all_push_down(pred) &&
!pred->predicate_params()->marked_by_runtime_filter) {
_need_read_data_indices[unique_id] = false;
}

if (_row_bitmap.isEmpty()) {
break; // all rows have been pruned, no need to process further predicates
Expand Down Expand Up @@ -592,6 +593,7 @@ Status SegmentIterator::_apply_inverted_index() {
// 2. equal or range for fulltext index
// 3. is_null or is_not_null predicate in OrPredicate
// 4. in_list or not_in_list predicate produced by runtime filter
// 5. bloom filter predicate
remaining_predicates.push_back(pred);
} else {
roaring::Roaring bitmap = _row_bitmap;
Expand Down Expand Up @@ -1067,6 +1069,7 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) {
// 1. this column without inverted index
// 2. equal or range qeury for fulltext index
// 3. is_null or is_not_null predicate
// 4. bloom filter predicate
return false;
}
return true;
Expand Down Expand Up @@ -1128,9 +1131,10 @@ Status SegmentIterator::_apply_index_in_compound() {
}

for (auto pred : _all_compound_col_predicates) {
int32_t unique_id = _schema.unique_id(pred->column_id());
if (_check_column_pred_all_push_down(pred, true) &&
if (_remaining_vconjunct_root != nullptr &&
_check_column_pred_all_push_down(pred, true) &&
!pred->predicate_params()->marked_by_runtime_filter) {
int32_t unique_id = _schema.unique_id(pred->column_id());
_need_read_data_indices[unique_id] = false;
}
}
Expand Down
15 changes: 11 additions & 4 deletions be/src/vec/exec/scan/vscan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) {
if (pdt == PushDownType::UNACCEPTABLE && is_compound_predicate) {
std::vector<ColumnValueRangeType> column_value_rangs;
_normalize_compound_predicate(cur_expr, *(_vconjunct_ctx_ptr.get()),
&pdt, &column_value_rangs,
&pdt, is_runtimer_filter_predicate, &column_value_rangs,
in_predicate_checker, eq_predicate_checker);
_compound_value_ranges.push_back(column_value_rangs);
return conjunct_expr_root;
Expand Down Expand Up @@ -891,6 +891,7 @@ Status VScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr_ctx
Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
PushDownType* pdt,
bool is_runtimer_filter_predicate,
std::vector<ColumnValueRangeType>* column_value_rangs,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& in_predicate_checker,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& eq_predicate_checker) {
Expand All @@ -907,7 +908,10 @@ Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range
std::visit(
[&](auto& value_range) {
_normalize_binary_in_compound_predicate(
Defer mark_runtime_filter_flag {[&]() {
value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate);
}};
_normalize_binary_in_compound_predicate(
child_expr, expr_ctx, slot, value_range, pdt,
_get_compound_type_by_fn_name(compound_fn_name));
},
Expand All @@ -923,7 +927,10 @@ Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range
std::visit(
[&](auto& value_range) {
_normalize_match_in_compound_predicate(
Defer mark_runtime_filter_flag {[&]() {
value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate);
}};
_normalize_match_in_compound_predicate(
child_expr, expr_ctx, slot, value_range, pdt,
_get_compound_type_by_fn_name(compound_fn_name));
},
Expand All @@ -934,7 +941,7 @@ Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
} else if (TExprNodeType::COMPOUND_PRED == child_expr->node_type()) {
_normalize_compound_predicate(
child_expr, expr_ctx,
pdt, column_value_rangs,
pdt, is_runtimer_filter_predicate, column_value_rangs,
in_predicate_checker, eq_predicate_checker);
}
}
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/scan/vscan_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ class VScanNode : public ExecNode {
Status _normalize_compound_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
PushDownType* pdt,
bool is_runtimer_filter_predicate,
std::vector<ColumnValueRangeType>* column_value_rangs,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& in_predicate_checker,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& eq_predicate_checker);
Expand Down
15 changes: 11 additions & 4 deletions be/src/vec/exec/volap_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1926,6 +1926,7 @@ Status VOlapScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr
Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
bool* push_down,
bool is_runtimer_filter_predicate,
std::vector<ColumnValueRangeType>* column_value_rangs,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& in_predicate_checker,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& eq_predicate_checker) {
Expand All @@ -1942,7 +1943,10 @@ Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range
std::visit(
[&](auto& value_range) {
_normalize_binary_in_compound_predicate(
Defer mark_runtime_filter_flag {[&]() {
value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate);
}};
_normalize_binary_in_compound_predicate(
child_expr, expr_ctx, slot, value_range, push_down,
_get_compound_type_by_fn_name(compound_fn_name));
},
Expand All @@ -1958,7 +1962,10 @@ Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range
std::visit(
[&](auto& value_range) {
_normalize_match_in_compound_predicate(
Defer mark_runtime_filter_flag {[&]() {
value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate);
}};
_normalize_match_in_compound_predicate(
child_expr, expr_ctx, slot, value_range, push_down,
_get_compound_type_by_fn_name(compound_fn_name));
},
Expand All @@ -1969,7 +1976,7 @@ Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr,
} else if (TExprNodeType::COMPOUND_PRED == child_expr->node_type()) {
_normalize_compound_predicate(
child_expr, expr_ctx,
push_down, column_value_rangs,
push_down, is_runtimer_filter_predicate, column_value_rangs,
in_predicate_checker, eq_predicate_checker);
}
}
Expand Down Expand Up @@ -2267,7 +2274,7 @@ VExpr* VOlapScanNode::_normalize_predicate(RuntimeState* state, VExpr* conjunct_
if (!push_down && is_compound_predicate) {
std::vector<ColumnValueRangeType> column_value_rangs;
_normalize_compound_predicate(cur_expr, *(_vconjunct_ctx_ptr.get()),
&push_down, &column_value_rangs,
&push_down, is_runtimer_filter_predicate, &column_value_rangs,
in_predicate_checker, eq_predicate_checker);
_compound_value_ranges.push_back(column_value_rangs);
return conjunct_expr_root;
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/volap_scan_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class VOlapScanNode final : public ScanNode {
Status _normalize_compound_predicate(vectorized::VExpr* expr,
VExprContext* expr_ctx,
bool* push_down,
bool is_runtimer_filter_predicate,
std::vector<ColumnValueRangeType>* column_value_rangs,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& in_predicate_checker,
const std::function<bool(const std::vector<VExpr*>&, const VSlotRef**, VExpr**)>& eq_predicate_checker);
Expand Down

0 comments on commit e4e281d

Please sign in to comment.