From 5da846a4a867a0770cda617be8667716f25ecab6 Mon Sep 17 00:00:00 2001 From: Dejan Mircevski Date: Sun, 8 Aug 2021 07:16:55 +0200 Subject: [PATCH 1/4] cql3: Add statement_restrictions::to_string Useful for error messages and debugging. Signed-off-by: Dejan Mircevski --- cql3/restrictions/statement_restrictions.cc | 4 ++++ cql3/restrictions/statement_restrictions.hh | 2 ++ 2 files changed, 6 insertions(+) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 9cf0305aa7fb..5891d6b8a377 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1672,5 +1672,9 @@ std::vector statement_restrictions::get_local_index_clu return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); } +sstring statement_restrictions::to_string() const { + return _where ? expr::to_string(*_where) : ""; +} + } // namespace restrictions } // namespace cql3 diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 11ad70f4ad91..443703dd60ec 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -520,6 +520,8 @@ public: /// Calculates clustering ranges for querying a local-index table. std::vector get_local_index_clustering_ranges( const query_options& options, const schema& idx_tbl_schema) const; + + sstring to_string() const; }; } From ba55769f808bc15cd32083943f188ca868f0fdfe Mon Sep 17 00:00:00 2001 From: Dejan Mircevski Date: Sun, 8 Aug 2021 07:23:44 +0200 Subject: [PATCH 2/4] test: Use ALLOW FILTERING more strictly Prepare for the upcoming strict ALLOW FILTERING check by modifying unit-test queries that need it. Current code allows such queries both with and without ALLOW FILTERING; future code will reject them without ALLOW FILTERING. Signed-off-by: Dejan Mircevski --- test/boost/cql_query_group_test.cc | 4 +- test/boost/cql_query_test.cc | 16 ++-- test/boost/filtering_test.cc | 20 ++-- test/boost/large_paging_state_test.cc | 4 +- test/boost/query_processor_test.cc | 2 +- test/boost/restrictions_test.cc | 129 +++++++++++++------------- test/boost/secondary_index_test.cc | 20 ++-- 7 files changed, 98 insertions(+), 97 deletions(-) diff --git a/test/boost/cql_query_group_test.cc b/test/boost/cql_query_group_test.cc index 1f4f96b88185..286c0fb8fb9c 100644 --- a/test/boost/cql_query_group_test.cc +++ b/test/boost/cql_query_group_test.cc @@ -105,7 +105,7 @@ SEASTAR_TEST_CASE(test_group_by_syntax) { BOOST_REQUIRE_EXCEPTION( e.execute_cql("select * from t1 where p1 > 0 group by p2 allow filtering").get(), ire, order); BOOST_REQUIRE_EXCEPTION( - e.execute_cql("select * from t1 where (c1,c2) > (0,0) group by p1, p2, c3").get(), ire, order); + e.execute_cql("select * from t1 where (c1,c2) > (0,0) group by p1, p2, c3 allow filtering").get(), ire, order); BOOST_REQUIRE_EXCEPTION( e.execute_cql("select * from t1 where p1>0 and p2=0 group by c1 allow filtering").get(), ire, order); // Even when GROUP BY lists all primary-key columns: @@ -224,7 +224,7 @@ SEASTAR_TEST_CASE(test_group_by_text_key) { cquery_nofail(e, "insert into t2 (p, c1, c2, v) values (' ', 'a', 'b', 40)"); require_rows(e, "select avg(v) from t2 group by p", {{I(25), T(" ")}}); require_rows(e, "select avg(v) from t2 group by p, c1", {{I(15), T(" "), T("")}, {I(35), T(" "), T("a")}}); - require_rows(e, "select sum(v) from t2 where c1='' group by p, c2", + require_rows(e, "select sum(v) from t2 where c1='' group by p, c2 allow filtering", {{I(10), T(" "), T("")}, {I(20), T(" "), T("b")}}); return make_ready_future<>(); }); diff --git a/test/boost/cql_query_test.cc b/test/boost/cql_query_test.cc index 0e116aa6c0a4..2eed88f3bcec 100644 --- a/test/boost/cql_query_test.cc +++ b/test/boost/cql_query_test.cc @@ -249,7 +249,7 @@ SEASTAR_TEST_CASE(test_map_elements_validation) { SEASTAR_TEST_CASE(test_in_clause_validation) { return do_with_cql_env_thread([](cql_test_env& e) { auto test_inline = [&] (sstring value, bool should_throw) { - auto cql = sprint("SELECT r1 FROM tbl WHERE (c1,r1) IN ((1, '%s'))", value); + auto cql = sprint("SELECT r1 FROM tbl WHERE (c1,r1) IN ((1, '%s')) ALLOW FILTERING", value); if (should_throw) { BOOST_REQUIRE_THROW(e.execute_cql(cql).get(), exceptions::invalid_request_exception); } else { @@ -260,7 +260,7 @@ SEASTAR_TEST_CASE(test_in_clause_validation) { test_inline("definietly not a date value", true); test_inline("2015-05-03", false); e.execute_cql("CREATE TABLE tbl2 (p1 int, c1 int, r1 text, PRIMARY KEY (p1, c1,r1))").get(); - auto id = e.prepare("SELECT r1 FROM tbl2 WHERE (c1,r1) IN ?").get0(); + auto id = e.prepare("SELECT r1 FROM tbl2 WHERE (c1,r1) IN ? ALLOW FILTERING").get0(); auto test_bind = [&] (sstring value, bool should_throw) { auto my_tuple_type = tuple_type_impl::get_instance({int32_type, utf8_type}); auto my_list_type = list_type_impl::get_instance(my_tuple_type, true); @@ -449,7 +449,7 @@ SEASTAR_TEST_CASE(test_select_statement) { }); }).then([&e] { // Test full partition range, singular clustering range - return e.execute_cql("select * from cf where c1 = 1 and c2 = 2;").then([] (shared_ptr msg) { + return e.execute_cql("select * from cf where c1 = 1 and c2 = 2 allow filtering;").then([] (shared_ptr msg) { assert_that(msg).is_rows() .with_size(3) .with_row({ @@ -2405,14 +2405,14 @@ SEASTAR_TEST_CASE(test_in_restriction) { e.execute_cql("insert into tir2 (p1, c1, r1) values (1, 2, 3);").get(); e.execute_cql("insert into tir2 (p1, c1, r1) values (2, 3, 4);").get(); { - auto msg = e.execute_cql("select r1 from tir2 where (c1,r1) in ((0, 1),(1,2),(0,1),(1,2),(3,3));").get0(); + auto msg = e.execute_cql("select r1 from tir2 where (c1,r1) in ((0, 1),(1,2),(0,1),(1,2),(3,3)) allow filtering;").get0(); assert_that(msg).is_rows().with_rows({ {int32_type->decompose(1)}, {int32_type->decompose(2)}, }); } { - auto prepared_id = e.prepare("select r1 from tir2 where (c1,r1) in ?;").get0(); + auto prepared_id = e.prepare("select r1 from tir2 where (c1,r1) in ? allow filtering;").get0(); auto my_tuple_type = tuple_type_impl::get_instance({int32_type,int32_type}); auto my_list_type = list_type_impl::get_instance(my_tuple_type, true); std::vector native_tuples = { @@ -4523,7 +4523,7 @@ SEASTAR_TEST_CASE(test_select_serial_consistency) { } }; check_fails("select * from t allow filtering"); - check_fails("select * from t where b > 0"); + check_fails("select * from t where b > 0 allow filtering"); check_fails("select * from t where a in (1, 3)"); prepared_on_shard(e, "select * from t where a = 1", {}, {{I(1), I(1)}, {I(1), I(2)}}, db::consistency_level::SERIAL); }); @@ -4580,8 +4580,8 @@ SEASTAR_TEST_CASE(test_impossible_where) { cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (0, 0)"); cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (1, 10)"); cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (2, 20)"); - require_rows(e, "SELECT * FROM t2 WHERE c>10 AND c<10", {}); - require_rows(e, "SELECT * FROM t2 WHERE c>=10 AND c<=0", {}); + require_rows(e, "SELECT * FROM t2 WHERE c>10 AND c<10 ALLOW FILTERING", {}); + require_rows(e, "SELECT * FROM t2 WHERE c>=10 AND c<=0 ALLOW FILTERING", {}); }); } diff --git a/test/boost/filtering_test.cc b/test/boost/filtering_test.cc index db73f7571d10..0c32b64bfcb4 100644 --- a/test/boost/filtering_test.cc +++ b/test/boost/filtering_test.cc @@ -198,7 +198,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_multi_column) { e.execute_cql("INSERT INTO t (a,b,c,d,e) VALUES (1, 2, 1, 2, 25)").get(); e.execute_cql("INSERT INTO t (a,b,c,d,e) VALUES (1, 2, 1, 3, 35)").get(); - auto msg = e.execute_cql("SELECT * FROM t WHERE (c, d) = (1, 2)").get0(); + auto msg = e.execute_cql("SELECT * FROM t WHERE (c, d) = (1, 2) ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows_ignore_order({ { int32_type->decompose(1), @@ -216,7 +216,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_multi_column) { }, }); - msg = e.execute_cql("SELECT * FROM t WHERE (c, d) IN ((1, 2), (1,3), (1,4))").get0(); + msg = e.execute_cql("SELECT * FROM t WHERE (c, d) IN ((1, 2), (1,3), (1,4)) ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows_ignore_order({ { int32_type->decompose(1), @@ -241,7 +241,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_multi_column) { }, }); - msg = e.execute_cql("SELECT * FROM t WHERE (c, d) < (1, 3)").get0(); + msg = e.execute_cql("SELECT * FROM t WHERE (c, d) < (1, 3) ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows_ignore_order({ { int32_type->decompose(1), @@ -266,7 +266,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_multi_column) { }, }); - msg = e.execute_cql("SELECT * FROM t WHERE (c, d) < (1, 3) AND (c, d) > (1, 1)").get0(); + msg = e.execute_cql("SELECT * FROM t WHERE (c, d) < (1, 3) AND (c, d) > (1, 1) ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows_ignore_order({ { int32_type->decompose(1), @@ -334,7 +334,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_clustering_column) { int32_type->decompose(1) }}); - msg = e.execute_cql("SELECT * FROM t WHERE c = 2").get0(); + msg = e.execute_cql("SELECT * FROM t WHERE c = 2 ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows({ { int32_type->decompose(1), @@ -348,7 +348,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_clustering_column) { } }); - msg = e.execute_cql("SELECT * FROM t WHERE c > 2 AND c <= 4").get0(); + msg = e.execute_cql("SELECT * FROM t WHERE c > 2 AND c <= 4 ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows({{ int32_type->decompose(1), int32_type->decompose(3), @@ -601,7 +601,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_desc) { e.execute_cql("INSERT INTO t (a, b, c, d, e) VALUES (1, 2, 5, 1, 9)").get(); e.execute_cql("INSERT INTO t (a, b, c, d, e) VALUES (1, 2, 6, 7, 5)").get(); - auto msg = e.execute_cql("SELECT a, b, c, d, e FROM t WHERE c > 3").get0(); + auto msg = e.execute_cql("SELECT a, b, c, d, e FROM t WHERE c > 3 ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows({ { int32_type->decompose(1), @@ -619,7 +619,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_desc) { } }); - msg = e.execute_cql("SELECT a, b, c, d, e FROM t WHERE c < 4").get0(); + msg = e.execute_cql("SELECT a, b, c, d, e FROM t WHERE c < 4 ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_rows({ { int32_type->decompose(1), @@ -637,7 +637,7 @@ SEASTAR_TEST_CASE(test_allow_filtering_desc) { } }); - msg = e.execute_cql("SELECT a, b, c, d, e FROM t WHERE c = 4").get0(); + msg = e.execute_cql("SELECT a, b, c, d, e FROM t WHERE c = 4 ALLOW FILTERING").get0(); assert_that(msg).is_rows().with_size(0); }); } @@ -1076,7 +1076,7 @@ SEASTAR_TEST_CASE(test_filtering_on_empty_partition_with_a_static_row) { cquery_nofail(e, "INSERT INTO t (p, s) VALUES (1, 1);"); auto msg = cquery_nofail(e, "SELECT * FROM t WHERE s = 2 ALLOW FILTERING;"); assert_that(msg).is_rows().is_empty(); - msg = cquery_nofail(e, "SELECT * FROM t WHERE c = 1"); + msg = cquery_nofail(e, "SELECT * FROM t WHERE c = 1 ALLOW FILTERING"); assert_that(msg).is_rows().is_empty(); cquery_nofail(e, "INSERT INTO t (p, c, s) VALUES (2, 2, 2);"); msg = cquery_nofail(e, "SELECT * FROM t WHERE s = 1 ALLOW FILTERING"); diff --git a/test/boost/large_paging_state_test.cc b/test/boost/large_paging_state_test.cc index d44814f6b0f2..e430fe2ae2a3 100644 --- a/test/boost/large_paging_state_test.cc +++ b/test/boost/large_paging_state_test.cc @@ -103,7 +103,7 @@ SEASTAR_TEST_CASE(test_use_high_bits_of_remaining_rows_in_paging_state_filtering auto qo = std::make_unique(db::consistency_level::LOCAL_ONE, std::vector{}, cql3::query_options::specific_options{5, nullptr, {}, api::new_timestamp()}); - auto msg = e.execute_cql("select * from test where ck > 10;", std::move(qo)).get0(); + auto msg = e.execute_cql("select * from test where ck > 10 allow filtering;", std::move(qo)).get0(); auto paging_state = extract_paging_state(msg); uint64_t rows_fetched = count_rows_fetched(msg); BOOST_REQUIRE_EQUAL(paging_state->get_remaining() + rows_fetched, query::max_rows); @@ -115,7 +115,7 @@ SEASTAR_TEST_CASE(test_use_high_bits_of_remaining_rows_in_paging_state_filtering while (has_more_pages(msg)) { qo = std::make_unique(db::consistency_level::LOCAL_ONE, std::vector{}, cql3::query_options::specific_options{5, paging_state, {}, api::new_timestamp()}); - msg = e.execute_cql("SELECT * FROM test where ck > 10;", std::move(qo)).get0(); + msg = e.execute_cql("SELECT * FROM test where ck > 10 allow filtering;", std::move(qo)).get0(); rows_fetched = count_rows_fetched(msg); test_remaining = test_remaining - rows_fetched; if (has_more_pages(msg)) { diff --git a/test/boost/query_processor_test.cc b/test/boost/query_processor_test.cc index abf8bde9e599..38eb0b5ee1b6 100644 --- a/test/boost/query_processor_test.cc +++ b/test/boost/query_processor_test.cc @@ -341,7 +341,7 @@ SEASTAR_TEST_CASE(test_select_full_scan_metrics) { // Filtered by ck but not filtered by pk auto stat_ps4 = qp.get_cql_stats().select_partition_range_scan; - qp.execute_internal("select * from ks.fsm where ck = 1;").get(); + qp.execute_internal("select * from ks.fsm where ck = 1 allow filtering;").get(); BOOST_CHECK_EQUAL(stat_ps4 + 1, qp.get_cql_stats().select_partition_range_scan); // Filtered by unindexed non-cluster column diff --git a/test/boost/restrictions_test.cc b/test/boost/restrictions_test.cc index 70eb643dfd83..9c3d9a00bd00 100644 --- a/test/boost/restrictions_test.cc +++ b/test/boost/restrictions_test.cc @@ -182,10 +182,11 @@ SEASTAR_THREAD_TEST_CASE(tuple_of_list) { cquery_nofail(e, "create table t (p int, l1 frozen>, l2 frozen>, primary key(p,l1,l2))"); cquery_nofail(e, "insert into t (p, l1, l2) values (1, [11,12], [101,102])"); cquery_nofail(e, "insert into t (p, l1, l2) values (2, [21,22], [201,202])"); - require_rows(e, "select * from t where (l1,l2)<([],[])", {}); - require_rows(e, "select l1 from t where (l1,l2)<([20],[200])", {{LI({11, 12})}}); - require_rows(e, "select l1 from t where (l1,l2)>=([11,12],[101,102])", {{LI({11, 12})}, {LI({21, 22})}}); - require_rows(e, "select l1 from t where (l1,l2)<([11,12],[101,103])", {{LI({11, 12})}}); + require_rows(e, "select * from t where (l1,l2)<([],[]) allow filtering", {}); + require_rows(e, "select l1 from t where (l1,l2)<([20],[200]) allow filtering", {{LI({11, 12})}}); + require_rows(e, "select l1 from t where (l1,l2)>=([11,12],[101,102]) allow filtering", + {{LI({11, 12})}, {LI({21, 22})}}); + require_rows(e, "select l1 from t where (l1,l2)<([11,12],[101,103]) allow filtering", {{LI({11, 12})}}); }).get(); } @@ -242,10 +243,10 @@ SEASTAR_THREAD_TEST_CASE(regular_col_slice_reversed) { do_with_cql_env_thread([](cql_test_env& e) { cquery_nofail(e, "create table t (p int, c int, primary key(p, c)) with clustering order by (c desc)"); cquery_nofail(e, "insert into t(p,c) values (1,11)"); - require_rows(e, "select c from t where c>10", {{I(11)}}); + require_rows(e, "select c from t where c>10 allow filtering", {{I(11)}}); cquery_nofail(e, "insert into t(p,c) values (1,12)"); - require_rows(e, "select c from t where c>10", {{I(11)}, {I(12)}}); - require_rows(e, "select c from t where c<100", {{I(11)}, {I(12)}}); + require_rows(e, "select c from t where c>10 allow filtering", {{I(11)}, {I(12)}}); + require_rows(e, "select c from t where c<100 allow filtering", {{I(11)}, {I(12)}}); }).get(); } @@ -311,7 +312,7 @@ SEASTAR_THREAD_TEST_CASE(null_rhs) { require_rows(e, "select * from t where pk1=0 and pk2=null", {}); BOOST_REQUIRE_EXCEPTION(q("select * from t where pk1=0 and pk2=0 and (ck1,ck2)>=(0,null)"), ire, message_contains(expect)); - require_rows(e, "select * from t where ck1=null", {}); + require_rows(e, "select * from t where ck1=null allow filtering", {}); require_rows(e, "select * from t where r=null and ck1=null allow filtering", {}); require_rows(e, "select * from t where pk1=0 and pk2=0 and ck1null and ck1('a',20)", {{F(2)}, {F(13)}}); - require_rows(e, "select p from t where (c1,c2)>=('a',20) and (c1,c2)<('b',3)", {{I(2)}}); - require_rows(e, "select * from t where (c1,c2)<('a',11)", {}); - require_rows(e, "select c1 from t where (c1,c2)<('a',12)", {{T("a")}}); - require_rows(e, "select c1 from t where (c1)>=('c')", {{T("c")}}); - require_rows(e, "select c1 from t where (c1,c2)<=('c',13)", {{T("a")}, {T("b")}, {T("c")}}); - require_rows(e, "select c1 from t where (c1,c2)>=('b',2) and (c1,c2)<=('b',2)", {{T("b")}}); - auto stmt = e.prepare("select c1 from t where (c1,c2)('a',20) allow filtering", {{F(2)}, {F(13)}}); + require_rows(e, "select p from t where (c1,c2)>=('a',20) and (c1,c2)<('b',3) allow filtering", {{I(2)}}); + require_rows(e, "select * from t where (c1,c2)<('a',11) allow filtering", {}); + require_rows(e, "select c1 from t where (c1,c2)<('a',12) allow filtering", {{T("a")}}); + require_rows(e, "select c1 from t where (c1)>=('c') allow filtering", {{T("c")}}); + require_rows(e, "select c1 from t where (c1,c2)<=('c',13) allow filtering", {{T("a")}, {T("b")}, {T("c")}}); + require_rows(e, "select c1 from t where (c1,c2)>=('b',2) and (c1,c2)<=('b',2) allow filtering", {{T("b")}}); + auto stmt = e.prepare("select c1 from t where (c1,c2)=?").get0(); + stmt = e.prepare("select c1 from t where (c1)>=? allow filtering").get0(); require_rows(e, stmt, {}, {make_tuple({utf8_type}, {sstring("c")})}, {{T("c")}}); require_rows(e, stmt, {}, {make_tuple({utf8_type}, {sstring("x")})}, {}); - stmt = e.prepare("select c1 from t where (c1)>=(:c1)").get0(); + stmt = e.prepare("select c1 from t where (c1)>=(:c1) allow filtering").get0(); require_rows(e, stmt, {{"c1"}}, {T("c")}, {{T("c")}}); require_rows(e, stmt, {{"c1"}}, {T("x")}, {}); }).get(); @@ -400,11 +401,11 @@ SEASTAR_THREAD_TEST_CASE(multi_col_slice_reversed) { cquery_nofail(e, "insert into t(p,c1,c2) values (1,11,21)"); cquery_nofail(e, "insert into t(p,c1,c2) values (1,12,22)"); cquery_nofail(e, "insert into t(p,c1,c2) values (1,12,23)"); - require_rows(e, "select c1 from t where (c1,c2)>(10,99)", {{I(11)}, {I(12)}, {I(12)}}); - require_rows(e, "select c1 from t where (c1,c2)<(12,0)", {{I(11)}}); - require_rows(e, "select c1 from t where (c1,c2)>(12,22)", {{I(12)}}); - require_rows(e, "select c1 from t where (c1)>(12)", {}); - require_rows(e, "select c1 from t where (c1)<=(12)", {{I(11)}, {I(12)}, {I(12)}}); + require_rows(e, "select c1 from t where (c1,c2)>(10,99) allow filtering", {{I(11)}, {I(12)}, {I(12)}}); + require_rows(e, "select c1 from t where (c1,c2)<(12,0) allow filtering", {{I(11)}}); + require_rows(e, "select c1 from t where (c1,c2)>(12,22) allow filtering", {{I(12)}}); + require_rows(e, "select c1 from t where (c1)>(12) allow filtering", {}); + require_rows(e, "select c1 from t where (c1)<=(12) allow filtering", {{I(11)}, {I(12)}, {I(12)}}); }).get(); } @@ -645,17 +646,17 @@ SEASTAR_THREAD_TEST_CASE(scalar_in) { cquery_nofail(e, "create table t (p int, c int, r float, s text static, primary key (p, c))"); require_rows(e, "select c from t where c in (11,12,13) allow filtering", {}); cquery_nofail(e, "insert into t(p,c) values (1,11)"); - require_rows(e, "select c from t where c in (11,12,13)", {{I(11)}}); + require_rows(e, "select c from t where c in (11,12,13) allow filtering", {{I(11)}}); cquery_nofail(e, "insert into t(p,c,r) values (1,11,21)"); cquery_nofail(e, "insert into t(p,c,r) values (2,12,22)"); cquery_nofail(e, "insert into t(p,c,r) values (3,13,23)"); cquery_nofail(e, "insert into t(p,c,r) values (4,14,24)"); cquery_nofail(e, "insert into t(p,c,r,s) values (4,15,24,'34')"); cquery_nofail(e, "insert into t(p,c,r,s) values (5,15,25,'35')"); - require_rows(e, "select c from t where c in (11,12,13)", {{I(11)}, {I(12)}, {I(13)}}); - require_rows(e, "select c from t where c in (11)", {{I(11)}}); - require_rows(e, "select c from t where c in (999)", {}); - require_rows(e, "select c from t where c in (11,999)", {{I(11)}}); + require_rows(e, "select c from t where c in (11,12,13) allow filtering", {{I(11)}, {I(12)}, {I(13)}}); + require_rows(e, "select c from t where c in (11) allow filtering", {{I(11)}}); + require_rows(e, "select c from t where c in (999) allow filtering", {}); + require_rows(e, "select c from t where c in (11,999) allow filtering", {{I(11)}}); require_rows(e, "select c from t where c in (11,12,13) and r in (21,24) allow filtering", {{I(11), F(21)}}); require_rows(e, "select c from t where c in (11,12,13) and r in (21,22) allow filtering", {{I(11), F(21)}, {I(12), F(22)}}); @@ -695,25 +696,25 @@ SEASTAR_THREAD_TEST_CASE(list_in) { cquery_nofail(e, "insert into t (p, c) values ([4], [41,42,43])"); cquery_nofail(e, "insert into t (p, c) values ([4], [])"); cquery_nofail(e, "insert into t (p, c) values ([5], [51,52,53])"); - require_rows(e, "select c from t where c in ([11,12],[11,13])", {}); - require_rows(e, "select c from t where c in ([11,12,13],[11,13,12])", {{LI({11,12,13})}}); - require_rows(e, "select c from t where c in ([11,12,13],[11,13,12],[41,42,43])", + require_rows(e, "select c from t where c in ([11,12],[11,13]) allow filtering", {}); + require_rows(e, "select c from t where c in ([11,12,13],[11,13,12]) allow filtering", {{LI({11,12,13})}}); + require_rows(e, "select c from t where c in ([11,12,13],[11,13,12],[41,42,43]) allow filtering", {{LI({11,12,13})}, {LI({41,42,43})}}); require_rows(e, "select c from t where p in ([1],[2],[4]) and c in ([11,12,13], [41,42,43])", {{LI({11,12,13})}, {LI({41,42,43})}}); - require_rows(e, "select c from t where c in ([],[11,13,12])", {{LI({})}}); + require_rows(e, "select c from t where c in ([],[11,13,12]) allow filtering", {{LI({})}}); }).get(); } SEASTAR_THREAD_TEST_CASE(set_in) { do_with_cql_env_thread([](cql_test_env& e) { cquery_nofail(e, "create table t (p frozen>, c frozen>, r text, primary key (p, c))"); - require_rows(e, "select * from t where c in ({222})", {}); + require_rows(e, "select * from t where c in ({222}) allow filtering", {}); cquery_nofail(e, "insert into t (p, c) values ({1,11}, {21,201})"); cquery_nofail(e, "insert into t (p, c, r) values ({1,11}, {22,202}, '2')"); - require_rows(e, "select * from t where c in ({222}, {21})", {}); - require_rows(e, "select c from t where c in ({222}, {21,201})", {{SI({21, 201})}}); - require_rows(e, "select c from t where c in ({22,202}, {21,201})", {{SI({21, 201})}, {SI({22, 202})}}); + require_rows(e, "select * from t where c in ({222}, {21}) allow filtering", {}); + require_rows(e, "select c from t where c in ({222}, {21,201}) allow filtering", {{SI({21, 201})}}); + require_rows(e, "select c from t where c in ({22,202}, {21,201}) allow filtering", {{SI({21, 201})}, {SI({22, 202})}}); require_rows(e, "select c from t where c in ({222}, {21,201}) and r='' allow filtering", {}); require_rows(e, "select c from t where c in ({222}, {21,201}) and r='x' allow filtering", {}); require_rows(e, "select c from t where c in ({22,202}, {21,201}) and r='2' allow filtering", @@ -728,13 +729,13 @@ SEASTAR_THREAD_TEST_CASE(map_in) { cquery_nofail(e, "create table t (p frozen>, c frozen>, r int, primary key(p, c))"); cquery_nofail(e, "insert into t (p, c) values ({1:1}, {10:10})"); cquery_nofail(e, "insert into t (p, c, r) values ({1:1}, {10:10,11:11}, 12)"); - require_rows(e, "select * from t where c in ({10:11},{10:11},{11:11})", {}); + require_rows(e, "select * from t where c in ({10:11},{10:11},{11:11}) allow filtering", {}); const auto my_map_type = map_type_impl::get_instance(int32_type, int32_type, true); const auto c1a = my_map_type->decompose(make_map_value(my_map_type, map_type_impl::native_type({{10, 10}}))); - require_rows(e, "select c from t where c in ({10:11}, {10:10}, {11:11})", {{c1a}}); + require_rows(e, "select c from t where c in ({10:11}, {10:10}, {11:11}) allow filtering", {{c1a}}); const auto c1b = my_map_type->decompose( make_map_value(my_map_type, map_type_impl::native_type({{10, 10}, {11, 11}}))); - require_rows(e, "select c from t where c in ({10:11}, {10:10}, {10:10,11:11})", + require_rows(e, "select c from t where c in ({10:11}, {10:10}, {10:10,11:11}) allow filtering", {{c1a}, {c1b}}); require_rows(e, "select c from t where c in ({10:11}, {10:10}, {10:10,11:11}) and r=12 allow filtering", {{c1b, I(12)}}); @@ -748,24 +749,24 @@ SEASTAR_THREAD_TEST_CASE(map_in) { SEASTAR_THREAD_TEST_CASE(multi_col_in) { do_with_cql_env_thread([](cql_test_env& e) { cquery_nofail(e, "create table t (pk int, ck1 int, ck2 float, r text, primary key (pk, ck1, ck2))"); - require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22))", {}); + require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22)) allow filtering", {}); cquery_nofail(e, "insert into t(pk,ck1,ck2) values (1,11,21)"); - require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22))", {{I(11)}}); - require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21))", {{I(11)}}); + require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22)) allow filtering", {{I(11)}}); + require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21)) allow filtering", {{I(11)}}); cquery_nofail(e, "insert into t(pk,ck1,ck2) values (2,12,22)"); - require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22))", {{I(11)}, {I(12)}}); + require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22)) allow filtering", {{I(11)}, {I(12)}}); cquery_nofail(e, "insert into t(pk,ck1,ck2) values (3,13,23)"); - require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22))", {{I(11)}, {I(12)}}); - require_rows(e, "select ck1 from t where (ck1,ck2) in ((13,23))", {{I(13)}}); + require_rows(e, "select ck1 from t where (ck1,ck2) in ((11,21),(12,22)) allow filtering", {{I(11)}, {I(12)}}); + require_rows(e, "select ck1 from t where (ck1,ck2) in ((13,23)) allow filtering", {{I(13)}}); cquery_nofail(e, "insert into t(pk,ck1,ck2,r) values (4,13,23,'a')"); - require_rows(e, "select pk from t where (ck1,ck2) in ((13,23))", {{I(3)}, {I(4)}}); - require_rows(e, "select pk from t where (ck1) in ((13),(33),(44))", {{I(3)}, {I(4)}}); + require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) allow filtering", {{I(3)}, {I(4)}}); + require_rows(e, "select pk from t where (ck1) in ((13),(33),(44)) allow filtering", {{I(3)}, {I(4)}}); // TODO: uncomment when #6200 is fixed. // require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) and r='a' allow filtering", // {{I(4), I(13), F(23), T("a")}}); cquery_nofail(e, "delete from t where pk=4"); - require_rows(e, "select pk from t where (ck1,ck2) in ((13,23))", {{I(3)}}); - auto stmt = e.prepare("select ck1 from t where (ck1,ck2) in ?").get0(); + require_rows(e, "select pk from t where (ck1,ck2) in ((13,23)) allow filtering", {{I(3)}}); + auto stmt = e.prepare("select ck1 from t where (ck1,ck2) in ? allow filtering").get0(); auto bound_tuples = [] (std::vector> tuples) { const auto tuple_type = tuple_type_impl::get_instance({int32_type, float_type}); const auto list_type = list_type_impl::get_instance(tuple_type, true); @@ -781,21 +782,21 @@ SEASTAR_THREAD_TEST_CASE(multi_col_in) { require_rows(e, stmt, {}, {bound_tuples({{13, 13}, {12, 22}})}, {{I(12)}}); require_rows(e, stmt, {}, {bound_tuples({{12, 21}})}, {}); require_rows(e, stmt, {}, {bound_tuples({{12, 21}, {12, 21}, {13, 21}, {14, 21}})}, {}); - stmt = e.prepare("select ck1 from t where (ck1,ck2) in (?)").get0(); + stmt = e.prepare("select ck1 from t where (ck1,ck2) in (?) allow filtering").get0(); auto tpl = [] (int32_t e1, float e2) { return make_tuple({int32_type, float_type}, {e1, e2}); }; require_rows(e, stmt, {}, {tpl(11, 21)}, {{I(11)}}); require_rows(e, stmt, {}, {tpl(12, 22)}, {{I(12)}}); require_rows(e, stmt, {}, {tpl(12, 21)}, {}); - stmt = e.prepare("select ck1 from t where (ck1,ck2) in (:t1,:t2)").get0(); + stmt = e.prepare("select ck1 from t where (ck1,ck2) in (:t1,:t2) allow filtering").get0(); require_rows(e, stmt, {{"t1", "t2"}}, {tpl(11, 21), tpl(12, 22)}, {{I(11)}, {I(12)}}); require_rows(e, stmt, {{"t1", "t2"}}, {tpl(11, 21), tpl(11, 21)}, {{I(11)}}); require_rows(e, stmt, {{"t1", "t2"}}, {tpl(11, 21), tpl(99, 99)}, {{I(11)}}); require_rows(e, stmt, {{"t1", "t2"}}, {tpl(9, 9), tpl(99, 99)}, {}); // Parsing error: // stmt = e.prepare("select ck1 from t where (ck1,ck2) in ((13,23),:p1)").get0(); - stmt = e.prepare("select ck1 from t where (ck1,ck2) in ((13,23),(?,?))").get0(); + stmt = e.prepare("select ck1 from t where (ck1,ck2) in ((13,23),(?,?)) allow filtering").get0(); require_rows(e, stmt, {}, {I(0), F(0)}, {{I(13)}}); require_rows(e, stmt, {}, {I(11), F(21)}, {{I(11)}, {I(13)}}); }).get(); @@ -851,8 +852,8 @@ SEASTAR_THREAD_TEST_CASE(multi_eq_on_primary) { cquery_nofail(e, "insert into t (p, c) values (3, 13);"); require_rows(e, "select p from t where p=1 and p=1", {{I(1)}}); require_rows(e, "select p from t where p=1 and p=2", {}); - require_rows(e, "select c from t where c=11 and c=11", {{I(11)}}); - require_rows(e, "select c from t where c=11 and c=999", {}); + require_rows(e, "select c from t where c=11 and c=11 allow filtering", {{I(11)}}); + require_rows(e, "select c from t where c=11 and c=999 allow filtering", {}); cquery_nofail(e, "create table t2 (pk1 int, pk2 int, ck1 int, ck2 int, primary key ((pk1, pk2), ck1, ck2))"); cquery_nofail(e, "insert into t2 (pk1, pk2, ck1, ck2) values (1, 12, 21, 22);"); diff --git a/test/boost/secondary_index_test.cc b/test/boost/secondary_index_test.cc index a9f1bed25e89..92e9c74ad867 100644 --- a/test/boost/secondary_index_test.cc +++ b/test/boost/secondary_index_test.cc @@ -1832,7 +1832,7 @@ SEASTAR_TEST_CASE(test_filtering_indexed_column) { }); }); eventually([&] { - auto msg = cquery_nofail(e, "select d from ks.test_index where c > 25;"); + auto msg = cquery_nofail(e, "select d from ks.test_index where c > 25 allow filtering;"); assert_that(msg).is_rows().with_rows({{int32_type->decompose(44)}}); }); }); @@ -1850,16 +1850,16 @@ SEASTAR_TEST_CASE(indexed_multicolumn_where) { cquery_nofail(e, "create index i1 on t(c1)"); cquery_nofail(e, "insert into t(p, c1, c2) values (1, 11, 21)"); cquery_nofail(e, "insert into t(p, c1, c2) values (2, 12, 22)"); - eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,21)", {{I(11)}}); - eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,22)", {}); - eventually_require_rows(e, "select c1 from t where (c1)=(12)", {{I(12)}}); + eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,21) allow filtering", {{I(11)}}); + eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,22) allow filtering", {}); + eventually_require_rows(e, "select c1 from t where (c1)=(12) allow filtering", {{I(12)}}); cquery_nofail(e, "create index i2 on t(c2)"); - eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,21)", {{I(11)}}); - eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,22)", {}); - eventually_require_rows(e, "select c1 from t where (c1)=(12)", {{I(12)}}); + eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,21) allow filtering", {{I(11)}}); + eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,22) allow filtering", {}); + eventually_require_rows(e, "select c1 from t where (c1)=(12) allow filtering", {{I(12)}}); cquery_nofail(e, "drop index i1"); - eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,21)", {{I(11)}}); - eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,22)", {}); - eventually_require_rows(e, "select c1 from t where (c1)=(12)", {{I(12)}}); + eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,21) allow filtering", {{I(11)}}); + eventually_require_rows(e, "select c1 from t where (c1,c2)=(11,22) allow filtering", {}); + eventually_require_rows(e, "select c1 from t where (c1)=(12) allow filtering", {{I(12)}}); }); } From 5a4ac002c17a842f2e60a462dff233e8e695072a Mon Sep 17 00:00:00 2001 From: Dejan Mircevski Date: Wed, 25 Aug 2021 12:36:41 -0400 Subject: [PATCH 3/4] cql3: Track warnings in prepared_statement Preparation should be able to record warnings that make it back to the user via the query response. Signed-off-by: Dejan Mircevski --- cql3/query_processor.cc | 12 ++++++++++-- cql3/query_processor.hh | 9 +++++++-- cql3/statements/prepared_statement.hh | 1 + 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/cql3/query_processor.cc b/cql3/query_processor.cc index fdf1d68eb4c7..38842c4d2e0e 100644 --- a/cql3/query_processor.cc +++ b/cql3/query_processor.cc @@ -478,6 +478,7 @@ query_processor::execute_direct(const sstring_view& query_string, service::query tracing::trace(query_state.get_trace_state(), "Parsing a statement"); auto p = get_statement(query_string, query_state.get_client_state()); auto cql_statement = p->statement; + const auto warnings = std::move(p->warnings); if (cql_statement->get_bound_terms() != options.get_values_count()) { const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}", cql_statement->get_bound_terms(), @@ -492,8 +493,15 @@ query_processor::execute_direct(const sstring_view& query_string, service::query metrics.regularStatementsExecuted.inc(); #endif tracing::trace(query_state.get_trace_state(), "Processing a statement"); - return cql_statement->check_access(_proxy, query_state.get_client_state()).then([this, cql_statement, &query_state, &options] () mutable { - return process_authorized_statement(std::move(cql_statement), query_state, options); + return cql_statement->check_access(_proxy, query_state.get_client_state()).then( + [this, cql_statement, &query_state, &options, warnings = move(warnings)] () mutable { + return process_authorized_statement(std::move(cql_statement), query_state, options).then( + [warnings = move(warnings)] (::shared_ptr m) { + for (const auto& w : warnings) { + m->add_warning(w); + } + return make_ready_future<::shared_ptr>(m); + }); }); } diff --git a/cql3/query_processor.hh b/cql3/query_processor.hh index cc3d201e6f1c..b1181102677c 100644 --- a/cql3/query_processor.hh +++ b/cql3/query_processor.hh @@ -405,9 +405,14 @@ private: assert(bound_terms == prepared->bound_names.size()); return make_ready_future>(std::move(prepared)); }).then([&key, &id_getter, &client_state] (auto prep_ptr) { - return make_ready_future<::shared_ptr>( + const auto& warnings = prep_ptr->warnings; + const auto msg = ::make_shared(id_getter(key), std::move(prep_ptr), - client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK))); + client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK)); + for (const auto& w : warnings) { + msg->add_warning(w); + } + return make_ready_future<::shared_ptr>(std::move(msg)); }).handle_exception_type([&query_string] (typename prepared_statements_cache::statement_is_too_big&) { return make_exception_future<::shared_ptr>( prepared_statement_is_too_big(query_string)); diff --git a/cql3/statements/prepared_statement.hh b/cql3/statements/prepared_statement.hh index 152595472e23..00b0c46bc3b4 100644 --- a/cql3/statements/prepared_statement.hh +++ b/cql3/statements/prepared_statement.hh @@ -71,6 +71,7 @@ public: const seastar::shared_ptr statement; const std::vector> bound_names; std::vector partition_key_bind_indices; + std::vector warnings; prepared_statement(seastar::shared_ptr statement_, std::vector> bound_names_, std::vector partition_key_bind_indices); From 2f28f68e84b76e7d3eb3ce78b6200ec3f0c65ca9 Mon Sep 17 00:00:00 2001 From: Dejan Mircevski Date: Sun, 8 Aug 2021 07:43:45 +0200 Subject: [PATCH 4/4] cql3: Demand ALLOW FILTERING for unlimited, sliced partitions When a query requests a partition slice but doesn't limit the number of partitions, require that it also says ALLOW FILTERING. Although do_filter() isn't invoked for such queries, the performance can still be unexpectedly slow, and we want to signal that to the user by demanding they explicitly say ALLOW FILTERING. Because we now reject queries that worked fine before, existing applications can break. Therefore, the behavior is controlled by a flag currently defaulting to off. We will default to "on" in the next Scylla version. Fixes #7608; see comments there for justification. Signed-off-by: Dejan Mircevski --- cql3/statements/prepared_statement.hh | 6 ++- cql3/statements/raw/parsed_statement.cc | 11 ++++-- cql3/statements/raw/select_statement.hh | 6 ++- cql3/statements/select_statement.cc | 40 ++++++++++++++++---- db/config.cc | 5 +++ db/config.hh | 1 + test/boost/restrictions_test.cc | 49 +++++++++++++++++++++++++ test/cql-pytest/run.py | 1 + test/cql-pytest/test_allow_filtering.py | 9 +++-- 9 files changed, 112 insertions(+), 16 deletions(-) diff --git a/cql3/statements/prepared_statement.hh b/cql3/statements/prepared_statement.hh index 00b0c46bc3b4..1b64449c4015 100644 --- a/cql3/statements/prepared_statement.hh +++ b/cql3/statements/prepared_statement.hh @@ -73,9 +73,11 @@ public: std::vector partition_key_bind_indices; std::vector warnings; - prepared_statement(seastar::shared_ptr statement_, std::vector> bound_names_, std::vector partition_key_bind_indices); + prepared_statement(seastar::shared_ptr statement_, std::vector> bound_names_, + std::vector partition_key_bind_indices, std::vector warnings = {}); - prepared_statement(seastar::shared_ptr statement_, const prepare_context& ctx, const std::vector& partition_key_bind_indices); + prepared_statement(seastar::shared_ptr statement_, const prepare_context& ctx, const std::vector& partition_key_bind_indices, + std::vector warnings = {}); prepared_statement(seastar::shared_ptr statement_, prepare_context&& ctx, std::vector&& partition_key_bind_indices); diff --git a/cql3/statements/raw/parsed_statement.cc b/cql3/statements/raw/parsed_statement.cc index e075f83f8471..a1fd0f735f56 100644 --- a/cql3/statements/raw/parsed_statement.cc +++ b/cql3/statements/raw/parsed_statement.cc @@ -68,14 +68,19 @@ void parsed_statement::set_bound_variables(const std::vector<::shared_ptr statement_, std::vector> bound_names_, std::vector partition_key_bind_indices) +prepared_statement::prepared_statement( + ::shared_ptr statement_, std::vector> bound_names_, + std::vector partition_key_bind_indices, std::vector warnings) : statement(std::move(statement_)) , bound_names(std::move(bound_names_)) , partition_key_bind_indices(std::move(partition_key_bind_indices)) + , warnings(move(warnings)) { } -prepared_statement::prepared_statement(::shared_ptr statement_, const prepare_context& ctx, const std::vector& partition_key_bind_indices) - : prepared_statement(statement_, ctx.get_variable_specifications(), partition_key_bind_indices) +prepared_statement::prepared_statement( + ::shared_ptr statement_, const prepare_context& ctx, + const std::vector& partition_key_bind_indices, std::vector warnings) + : prepared_statement(statement_, ctx.get_variable_specifications(), partition_key_bind_indices, move(warnings)) { } prepared_statement::prepared_statement(::shared_ptr statement_, prepare_context&& ctx, std::vector&& partition_key_bind_indices) diff --git a/cql3/statements/raw/select_statement.hh b/cql3/statements/raw/select_statement.hh index bd35291f7f0f..673a9789993d 100644 --- a/cql3/statements/raw/select_statement.hh +++ b/cql3/statements/raw/select_statement.hh @@ -45,6 +45,7 @@ #include "cql3/statements/prepared_statement.hh" #include "cql3/relation.hh" #include "cql3/attributes.hh" +#include "db/config.hh" #include namespace cql3 { @@ -150,7 +151,10 @@ private: bool is_reversed(const schema& schema) const; /** If ALLOW FILTERING was not specified, this verifies that it is not needed */ - void check_needs_filtering(const restrictions::statement_restrictions& restrictions); + void check_needs_filtering( + const restrictions::statement_restrictions& restrictions, + db::tri_mode_restriction_t::mode strict_allow_filtering, + std::vector& warnings); void ensure_filtering_columns_retrieval(database& db, selection::selection& selection, diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index e5a69e78269f..eae58b33fec1 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1413,7 +1413,8 @@ std::unique_ptr select_statement::prepare(database& db, cql_ is_reversed_ = is_reversed(*schema); } - check_needs_filtering(*restrictions); + std::vector warnings; + check_needs_filtering(*restrictions, db.get_config().strict_allow_filtering(), warnings); ensure_filtering_columns_retrieval(db, *selection, *restrictions); auto group_by_cell_indices = ::make_shared>(prepare_group_by(*schema, *selection)); @@ -1453,7 +1454,7 @@ std::unique_ptr select_statement::prepare(database& db, cql_ auto partition_key_bind_indices = ctx.get_partition_key_bind_indexes(*schema); - return std::make_unique(std::move(stmt), ctx, std::move(partition_key_bind_indices)); + return make_unique(std::move(stmt), ctx, move(partition_key_bind_indices), move(warnings)); } ::shared_ptr @@ -1627,15 +1628,40 @@ bool select_statement::is_reversed(const schema& schema) const { return is_reversed_; } +/// True iff restrictions require ALLOW FILTERING despite there being no coordinator-side filtering. +static bool needs_allow_filtering_anyway( + const restrictions::statement_restrictions& restrictions, + db::tri_mode_restriction_t::mode strict_allow_filtering, + std::vector& warnings) { + using flag_t = db::tri_mode_restriction_t::mode; + if (strict_allow_filtering == flag_t::FALSE) { + return false; + } + const auto& ck_restrictions = *restrictions.get_clustering_columns_restrictions(); + const auto& pk_restrictions = *restrictions.get_partition_key_restrictions(); + // Even if no filtering happens on the coordinator, we still warn about poor performance when partition + // slice is defined but in potentially unlimited number of partitions (see #7608). + if ((pk_restrictions.empty() || has_token(pk_restrictions.expression)) // Potentially unlimited partitions. + && !ck_restrictions.empty() // Slice defined. + && !restrictions.uses_secondary_indexing()) { // Base-table is used. (Index-table use always limits partitions.) + if (strict_allow_filtering == flag_t::WARN) { + warnings.emplace_back("This query should use ALLOW FILTERING and will be rejected in future versions."); + return false; + } + return true; + } + return false; +} + /** If ALLOW FILTERING was not specified, this verifies that it is not needed */ -void select_statement::check_needs_filtering(const restrictions::statement_restrictions& restrictions) +void select_statement::check_needs_filtering( + const restrictions::statement_restrictions& restrictions, + db::tri_mode_restriction_t::mode strict_allow_filtering, + std::vector& warnings) { // non-key-range non-indexed queries cannot involve filtering underneath if (!_parameters->allow_filtering() && (restrictions.is_key_range() || restrictions.uses_secondary_indexing())) { - // We will potentially filter data if either: - // - Have more than one IndexExpression - // - Have no index expression and the column filter is not the identity - if (restrictions.need_filtering()) { + if (restrictions.need_filtering() || needs_allow_filtering_anyway(restrictions, strict_allow_filtering, warnings)) { throw exceptions::invalid_request_exception( "Cannot execute this query as it might involve data filtering and " "thus may have unpredictable performance. If you want to execute " diff --git a/db/config.cc b/db/config.cc index 00897e100a10..eff01940b866 100644 --- a/db/config.cc +++ b/db/config.cc @@ -229,6 +229,10 @@ class convert> { #define str(x) #x #define _mk_init(name, type, deflt, status, desc, ...) , name(this, str(name), value_status::status, type(deflt), desc) +static db::tri_mode_restriction_t::mode strict_allow_filtering_default() { + return db::tri_mode_restriction_t::mode::WARN; // TODO: make it TRUE after Scylla 4.6. +} + db::config::config(std::shared_ptr exts) : utils::config_file() @@ -802,6 +806,7 @@ db::config::config(std::shared_ptr exts) "Maximum number of concurrent requests a single shard can handle before it starts shedding extra load. By default, no requests will be shed.") , cdc_dont_rewrite_streams(this, "cdc_dont_rewrite_streams", value_status::Used, false, "Disable rewriting streams from cdc_streams_descriptions to cdc_streams_descriptions_v2. Should not be necessary, but the procedure is expensive and prone to failures; this config option is left as a backdoor in case some user requires manual intervention.") + , strict_allow_filtering(this, "strict_allow_filtering", liveness::LiveUpdate, value_status::Used, strict_allow_filtering_default(), "Match Cassandra in requiring ALLOW FILTERING on slow queries. Can be true, false, or warn. When false, Scylla accepts some slow queries even without ALLOW FILTERING that Cassandra rejects. Warn is same as false, but with warning.") , alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port") , alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port") , alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address") diff --git a/db/config.hh b/db/config.hh index c87b813f9bd0..82f5b02a7c97 100644 --- a/db/config.hh +++ b/db/config.hh @@ -336,6 +336,7 @@ public: named_value schema_registry_grace_period; named_value max_concurrent_requests_per_shard; named_value cdc_dont_rewrite_streams; + named_value strict_allow_filtering; named_value alternator_port; named_value alternator_https_port; diff --git a/test/boost/restrictions_test.cc b/test/boost/restrictions_test.cc index 9c3d9a00bd00..2e40b7a87d94 100644 --- a/test/boost/restrictions_test.cc +++ b/test/boost/restrictions_test.cc @@ -26,6 +26,7 @@ #include "cql3/cql_config.hh" #include "cql3/values.hh" +#include "db/config.hh" #include "test/lib/cql_assertions.hh" #include "test/lib/cql_test_env.hh" #include "test/lib/exception_utils.hh" @@ -927,3 +928,51 @@ SEASTAR_THREAD_TEST_CASE(tuples) { require_rows(e, "select q from t where q>(99) allow filtering", {}); }).get(); } + +SEASTAR_THREAD_TEST_CASE(strict_allow_filtering) { + auto cfg = make_shared(); + cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::TRUE); + do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "create table t (p int, c int, primary key (p,c))"); + require_rows(e, "select p from t where c>0 allow filtering", {}); + BOOST_REQUIRE_EXCEPTION( + e.execute_cql("select p from t where c>0").get(), + exceptions::invalid_request_exception, + exception_predicate::message_contains("use ALLOW FILTERING")); + }, cfg).get(); +} + +SEASTAR_THREAD_TEST_CASE(nonstrict_allow_filtering) { + auto cfg = make_shared(); + cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::FALSE); + do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "create table t (p int, c int, primary key (p,c))"); + require_rows(e, "select p from t where c>0 allow filtering", {}); + require_rows(e, "select p from t where c>0", {}); + }, cfg).get(); +} + +SEASTAR_THREAD_TEST_CASE(warn_strict_allow_filtering) { + auto cfg = make_shared(); + cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::WARN); + do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "create table t (p int, c int, primary key (p,c))"); + require_rows(e, "select p from t where c>0 allow filtering", {}); + require_rows(e, "select p from t where c>0", {}); + }, cfg).get(); +} + +SEASTAR_THREAD_TEST_CASE(strict_allow_filtering_live_update) { + auto cfg = make_shared(); + cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::FALSE); + do_with_cql_env_thread([&] (cql_test_env& e) { + cquery_nofail(e, "create table t (p int, c int, primary key (p,c))"); + const char* stmt = "select p from t where c>0"; + require_rows(e, stmt, {}); + cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::TRUE); + BOOST_REQUIRE_EXCEPTION( + e.execute_cql(stmt).get(), + exceptions::invalid_request_exception, + exception_predicate::message_contains("use ALLOW FILTERING")); + }, cfg).get(); +} diff --git a/test/cql-pytest/run.py b/test/cql-pytest/run.py index 76433adf8e19..c13905335ebe 100755 --- a/test/cql-pytest/run.py +++ b/test/cql-pytest/run.py @@ -216,6 +216,7 @@ def run_scylla_cmd(pid, dir): # Set up authentication in order to allow testing this module # and other modules dependent on it: e.g. service levels '--authenticator', 'PasswordAuthenticator', + '--strict-allow-filtering', 'true', ], {}) # Same as run_scylla_cmd, just use SSL encryption for the CQL port (same diff --git a/test/cql-pytest/test_allow_filtering.py b/test/cql-pytest/test_allow_filtering.py index 338214d3ba7d..856c471b4fd2 100644 --- a/test/cql-pytest/test_allow_filtering.py +++ b/test/cql-pytest/test_allow_filtering.py @@ -121,7 +121,6 @@ def test_allow_filtering_regular_column(cql, table1): # partition it needs to inspect the promoted index and in the worst case, # also read a chunk of rows to look for the matching clustering-column value. # Reproduces issue #7608. -@pytest.mark.xfail(reason="issue #7608") def test_allow_filtering_clustering_key(cql, table1): check_af_mandatory(cql, table1, 'c=2', lambda row: row.c==2) check_af_mandatory(cql, table1, 'c>2', lambda row: row.c>2) @@ -136,7 +135,6 @@ def test_allow_filtering_clustering_key(cql, table1): # ranges - because although they are finite, they may still contain a lot # of partitions. # Reproduces issue #7608. -@pytest.mark.xfail(reason="issue #7608") def test_allow_filtering_clustering_key_token_range(cql, table1): # TODO: the current implementation of check_af_mandatory() doesn't know # how to request or compare results with the token, so we pass None to @@ -153,7 +151,6 @@ def test_allow_filtering_clustering_key_token_range(cql, table1): # this case. Note that 'c=2 AND k = 123' (without the "token"), which # is guaranteed to match just one partition, doesn't need ALLOW FILTERING. # Reproduces issue #7608. -@pytest.mark.xfail(reason="issue #7608") def test_allow_filtering_clustering_key_token_specific(cql, table1): check_af_mandatory(cql, table1, 'c=2 AND token(k) = 123', None) check_af_optional(cql, table1, 'c=2 AND k = 123', None) @@ -237,6 +234,12 @@ def table3(cql, test_keyspace): yield (table, everything) cql.execute("DROP TABLE " + table) +def test_allow_filtering_multi_column(cql, table3): + """Multi-column restrictions are just like other clustering restrictions""" + check_af_mandatory(cql, table3, '(c1,c2)=(10,10)', lambda row: row.c1==10 and row.c2==10) + check_af_optional(cql, table3, '(c1,c2)=(10,10) and k1=1 and k2=1', + lambda row: row.c1==10 and row.c2==10 and row.k1==1 and row.k2==1) + # In test_allow_filtering_indexed_a_and_k() above we noted that the # combination of an indexed column and the partition key can be searched # without filtering, and explained why. The same cannot be said for the