From e4e281d896abc4d5c400325d83aeeba49b07fc96 Mon Sep 17 00:00:00 2001 From: YueW <45946325+Tanya-W@users.noreply.github.com> Date: Wed, 30 Nov 2022 18:35:39 +0800 Subject: [PATCH] [fix](inverted/bitmap index) fix apply bitmap index with bloom filter which is built by runtime filter, and support optimization for bitmap index no need to read raw data (#1188) --- be/src/exec/olap_common.h | 4 ++++ .../rowset/segment_v2/segment_iterator.cpp | 20 +++++++++++-------- be/src/vec/exec/scan/vscan_node.cpp | 15 ++++++++++---- be/src/vec/exec/scan/vscan_node.h | 1 + be/src/vec/exec/volap_scan_node.cpp | 15 ++++++++++---- be/src/vec/exec/volap_scan_node.h | 1 + 6 files changed, 40 insertions(+), 16 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 6898a64ed77ab1..c3e4ddbfe00149 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -330,6 +330,10 @@ class ColumnValueRange { _marked_runtime_filter_predicate = is_runtime_filter_predicate; } + bool get_marked_by_runtime_filter() const { + return _marked_runtime_filter_predicate; + } + TCompoundType::type get_compound_type() const; int scale() const { return _scale; } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index d63f86a66656d9..caba91cc63fe2c 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -465,7 +465,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row RowRanges zone_map_row_ranges = RowRanges::create_single(num_rows()); // second filter data by zone map for (auto& cid : cids) { - if (_inverted_index_iterators[cid] != nullptr && field_is_slice_type(_schema.column(cid)->type())) { + if (_inverted_index_iterators[cid] != nullptr && field_is_slice_type(_schema.column(cid)->type())) { continue; } // get row ranges by zone map of this column, @@ -521,17 +521,18 @@ Status SegmentIterator::_apply_bitmap_index() { for (auto pred : _col_predicates) { int32_t unique_id = _schema.unique_id(pred->column_id()); if (_bitmap_index_iterators.count(unique_id) < 1 || - _bitmap_index_iterators[unique_id] == nullptr) { + _bitmap_index_iterators[unique_id] == nullptr || + pred->type() == PredicateType::BF) { // no bitmap index for this column remaining_predicates.push_back(pred); } else { RETURN_IF_ERROR(pred->evaluate(_bitmap_index_iterators[unique_id], _segment->num_rows(), &_row_bitmap)); - // if (_check_column_pred_all_push_down(pred) && - // !pred->predicate_params()->marked_by_runtime_filter) { - // _need_read_data_indices[unique_id] = false; - // } + if (_check_column_pred_all_push_down(pred) && + !pred->predicate_params()->marked_by_runtime_filter) { + _need_read_data_indices[unique_id] = false; + } if (_row_bitmap.isEmpty()) { break; // all rows have been pruned, no need to process further predicates @@ -592,6 +593,7 @@ Status SegmentIterator::_apply_inverted_index() { // 2. equal or range for fulltext index // 3. is_null or is_not_null predicate in OrPredicate // 4. in_list or not_in_list predicate produced by runtime filter + // 5. bloom filter predicate remaining_predicates.push_back(pred); } else { roaring::Roaring bitmap = _row_bitmap; @@ -1067,6 +1069,7 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) { // 1. this column without inverted index // 2. equal or range qeury for fulltext index // 3. is_null or is_not_null predicate + // 4. bloom filter predicate return false; } return true; @@ -1128,9 +1131,10 @@ Status SegmentIterator::_apply_index_in_compound() { } for (auto pred : _all_compound_col_predicates) { - int32_t unique_id = _schema.unique_id(pred->column_id()); - if (_check_column_pred_all_push_down(pred, true) && + if (_remaining_vconjunct_root != nullptr && + _check_column_pred_all_push_down(pred, true) && !pred->predicate_params()->marked_by_runtime_filter) { + int32_t unique_id = _schema.unique_id(pred->column_id()); _need_read_data_indices[unique_id] = false; } } diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index f55cefa2ad5809..960622557c7df0 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -481,7 +481,7 @@ VExpr* VScanNode::_normalize_predicate(VExpr* conjunct_expr_root) { if (pdt == PushDownType::UNACCEPTABLE && is_compound_predicate) { std::vector column_value_rangs; _normalize_compound_predicate(cur_expr, *(_vconjunct_ctx_ptr.get()), - &pdt, &column_value_rangs, + &pdt, is_runtimer_filter_predicate, &column_value_rangs, in_predicate_checker, eq_predicate_checker); _compound_value_ranges.push_back(column_value_rangs); return conjunct_expr_root; @@ -891,6 +891,7 @@ Status VScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr_ctx Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, PushDownType* pdt, + bool is_runtimer_filter_predicate, std::vector* column_value_rangs, const std::function&, const VSlotRef**, VExpr**)>& in_predicate_checker, const std::function&, const VSlotRef**, VExpr**)>& eq_predicate_checker) { @@ -907,7 +908,10 @@ Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range std::visit( [&](auto& value_range) { - _normalize_binary_in_compound_predicate( + Defer mark_runtime_filter_flag {[&]() { + value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate); + }}; + _normalize_binary_in_compound_predicate( child_expr, expr_ctx, slot, value_range, pdt, _get_compound_type_by_fn_name(compound_fn_name)); }, @@ -923,7 +927,10 @@ Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range std::visit( [&](auto& value_range) { - _normalize_match_in_compound_predicate( + Defer mark_runtime_filter_flag {[&]() { + value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate); + }}; + _normalize_match_in_compound_predicate( child_expr, expr_ctx, slot, value_range, pdt, _get_compound_type_by_fn_name(compound_fn_name)); }, @@ -934,7 +941,7 @@ Status VScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, } else if (TExprNodeType::COMPOUND_PRED == child_expr->node_type()) { _normalize_compound_predicate( child_expr, expr_ctx, - pdt, column_value_rangs, + pdt, is_runtimer_filter_predicate, column_value_rangs, in_predicate_checker, eq_predicate_checker); } } diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 50c7922348d657..f91840c88cb26d 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -290,6 +290,7 @@ class VScanNode : public ExecNode { Status _normalize_compound_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, PushDownType* pdt, + bool is_runtimer_filter_predicate, std::vector* column_value_rangs, const std::function&, const VSlotRef**, VExpr**)>& in_predicate_checker, const std::function&, const VSlotRef**, VExpr**)>& eq_predicate_checker); diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp index 5c4dd945d75ddf..ce9352b3483f5f 100644 --- a/be/src/vec/exec/volap_scan_node.cpp +++ b/be/src/vec/exec/volap_scan_node.cpp @@ -1926,6 +1926,7 @@ Status VOlapScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, bool* push_down, + bool is_runtimer_filter_predicate, std::vector* column_value_rangs, const std::function&, const VSlotRef**, VExpr**)>& in_predicate_checker, const std::function&, const VSlotRef**, VExpr**)>& eq_predicate_checker) { @@ -1942,7 +1943,10 @@ Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range std::visit( [&](auto& value_range) { - _normalize_binary_in_compound_predicate( + Defer mark_runtime_filter_flag {[&]() { + value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate); + }}; + _normalize_binary_in_compound_predicate( child_expr, expr_ctx, slot, value_range, push_down, _get_compound_type_by_fn_name(compound_fn_name)); }, @@ -1958,7 +1962,10 @@ Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, ColumnValueRangeType active_range = *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range std::visit( [&](auto& value_range) { - _normalize_match_in_compound_predicate( + Defer mark_runtime_filter_flag {[&]() { + value_range.mark_runtime_filter_predicate(is_runtimer_filter_predicate); + }}; + _normalize_match_in_compound_predicate( child_expr, expr_ctx, slot, value_range, push_down, _get_compound_type_by_fn_name(compound_fn_name)); }, @@ -1969,7 +1976,7 @@ Status VOlapScanNode::_normalize_compound_predicate(vectorized::VExpr* expr, } else if (TExprNodeType::COMPOUND_PRED == child_expr->node_type()) { _normalize_compound_predicate( child_expr, expr_ctx, - push_down, column_value_rangs, + push_down, is_runtimer_filter_predicate, column_value_rangs, in_predicate_checker, eq_predicate_checker); } } @@ -2267,7 +2274,7 @@ VExpr* VOlapScanNode::_normalize_predicate(RuntimeState* state, VExpr* conjunct_ if (!push_down && is_compound_predicate) { std::vector column_value_rangs; _normalize_compound_predicate(cur_expr, *(_vconjunct_ctx_ptr.get()), - &push_down, &column_value_rangs, + &push_down, is_runtimer_filter_predicate, &column_value_rangs, in_predicate_checker, eq_predicate_checker); _compound_value_ranges.push_back(column_value_rangs); return conjunct_expr_root; diff --git a/be/src/vec/exec/volap_scan_node.h b/be/src/vec/exec/volap_scan_node.h index 814540beb15430..c2bac0c949c893 100644 --- a/be/src/vec/exec/volap_scan_node.h +++ b/be/src/vec/exec/volap_scan_node.h @@ -122,6 +122,7 @@ class VOlapScanNode final : public ScanNode { Status _normalize_compound_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, bool* push_down, + bool is_runtimer_filter_predicate, std::vector* column_value_rangs, const std::function&, const VSlotRef**, VExpr**)>& in_predicate_checker, const std::function&, const VSlotRef**, VExpr**)>& eq_predicate_checker);