Skip to content

Commit

Permalink
Merge ' cql3: Demand ALLOW FILTERING for unlimited, sliced partitions…
Browse files Browse the repository at this point in the history
… ' from Dejan Mircevski

Return the pre- 6773563 behavior of demanding ALLOW FILTERING when partition slice is requested but on potentially unlimited number of partitions.  Put it on a flag defaulting to "off" for now.

Fixes #7608; see comments there for justification.

Tests: unit (debug, dev), dtest (cql_additional_test, paging_test)

Signed-off-by: Dejan Mircevski <dejan@scylladb.com>

Closes #9126

* github.com:scylladb/scylla:
  cql3: Demand ALLOW FILTERING for unlimited, sliced partitions
  cql3: Track warnings in prepared_statement
  test: Use ALLOW FILTERING more strictly
  cql3: Add statement_restrictions::to_string
  • Loading branch information
avikivity committed Aug 31, 2021
2 parents 9666921 + 2f28f68 commit 8b59e3a
Show file tree
Hide file tree
Showing 19 changed files with 234 additions and 117 deletions.
12 changes: 10 additions & 2 deletions cql3/query_processor.cc
Expand Up @@ -478,6 +478,7 @@ query_processor::execute_direct(const sstring_view& query_string, service::query
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
auto p = get_statement(query_string, query_state.get_client_state());
auto cql_statement = p->statement;
const auto warnings = std::move(p->warnings);
if (cql_statement->get_bound_terms() != options.get_values_count()) {
const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}",
cql_statement->get_bound_terms(),
Expand All @@ -492,8 +493,15 @@ query_processor::execute_direct(const sstring_view& query_string, service::query
metrics.regularStatementsExecuted.inc();
#endif
tracing::trace(query_state.get_trace_state(), "Processing a statement");
return cql_statement->check_access(_proxy, query_state.get_client_state()).then([this, cql_statement, &query_state, &options] () mutable {
return process_authorized_statement(std::move(cql_statement), query_state, options);
return cql_statement->check_access(_proxy, query_state.get_client_state()).then(
[this, cql_statement, &query_state, &options, warnings = move(warnings)] () mutable {
return process_authorized_statement(std::move(cql_statement), query_state, options).then(
[warnings = move(warnings)] (::shared_ptr<result_message> m) {
for (const auto& w : warnings) {
m->add_warning(w);
}
return make_ready_future<::shared_ptr<result_message>>(m);
});
});
}

Expand Down
9 changes: 7 additions & 2 deletions cql3/query_processor.hh
Expand Up @@ -401,9 +401,14 @@ private:
assert(bound_terms == prepared->bound_names.size());
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
}).then([&key, &id_getter, &client_state] (auto prep_ptr) {
return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(
const auto& warnings = prep_ptr->warnings;
const auto msg =
::make_shared<ResultMsgType>(id_getter(key), std::move(prep_ptr),
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK)));
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
for (const auto& w : warnings) {
msg->add_warning(w);
}
return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(std::move(msg));
}).handle_exception_type([&query_string] (typename prepared_statements_cache::statement_is_too_big&) {
return make_exception_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(
prepared_statement_is_too_big(query_string));
Expand Down
4 changes: 4 additions & 0 deletions cql3/restrictions/statement_restrictions.cc
Expand Up @@ -1744,5 +1744,9 @@ std::vector<query::clustering_range> statement_restrictions::get_local_index_clu
return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix);
}

sstring statement_restrictions::to_string() const {
return _where ? expr::to_string(*_where) : "";
}

} // namespace restrictions
} // namespace cql3
2 changes: 2 additions & 0 deletions cql3/restrictions/statement_restrictions.hh
Expand Up @@ -520,6 +520,8 @@ public:
/// Calculates clustering ranges for querying a local-index table.
std::vector<query::clustering_range> get_local_index_clustering_ranges(
const query_options& options, const schema& idx_tbl_schema) const;

sstring to_string() const;
};

}
Expand Down
7 changes: 5 additions & 2 deletions cql3/statements/prepared_statement.hh
Expand Up @@ -71,10 +71,13 @@ public:
const seastar::shared_ptr<cql_statement> statement;
const std::vector<seastar::lw_shared_ptr<column_specification>> bound_names;
std::vector<uint16_t> partition_key_bind_indices;
std::vector<sstring> warnings;

prepared_statement(seastar::shared_ptr<cql_statement> statement_, std::vector<seastar::lw_shared_ptr<column_specification>> bound_names_, std::vector<uint16_t> partition_key_bind_indices);
prepared_statement(seastar::shared_ptr<cql_statement> statement_, std::vector<seastar::lw_shared_ptr<column_specification>> bound_names_,
std::vector<uint16_t> partition_key_bind_indices, std::vector<sstring> warnings = {});

prepared_statement(seastar::shared_ptr<cql_statement> statement_, const prepare_context& ctx, const std::vector<uint16_t>& partition_key_bind_indices);
prepared_statement(seastar::shared_ptr<cql_statement> statement_, const prepare_context& ctx, const std::vector<uint16_t>& partition_key_bind_indices,
std::vector<sstring> warnings = {});

prepared_statement(seastar::shared_ptr<cql_statement> statement_, prepare_context&& ctx, std::vector<uint16_t>&& partition_key_bind_indices);

Expand Down
11 changes: 8 additions & 3 deletions cql3/statements/raw/parsed_statement.cc
Expand Up @@ -68,14 +68,19 @@ void parsed_statement::set_bound_variables(const std::vector<::shared_ptr<column

}

prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, std::vector<lw_shared_ptr<column_specification>> bound_names_, std::vector<uint16_t> partition_key_bind_indices)
prepared_statement::prepared_statement(
::shared_ptr<cql_statement> statement_, std::vector<lw_shared_ptr<column_specification>> bound_names_,
std::vector<uint16_t> partition_key_bind_indices, std::vector<sstring> warnings)
: statement(std::move(statement_))
, bound_names(std::move(bound_names_))
, partition_key_bind_indices(std::move(partition_key_bind_indices))
, warnings(move(warnings))
{ }

prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, const prepare_context& ctx, const std::vector<uint16_t>& partition_key_bind_indices)
: prepared_statement(statement_, ctx.get_variable_specifications(), partition_key_bind_indices)
prepared_statement::prepared_statement(
::shared_ptr<cql_statement> statement_, const prepare_context& ctx,
const std::vector<uint16_t>& partition_key_bind_indices, std::vector<sstring> warnings)
: prepared_statement(statement_, ctx.get_variable_specifications(), partition_key_bind_indices, move(warnings))
{ }

prepared_statement::prepared_statement(::shared_ptr<cql_statement> statement_, prepare_context&& ctx, std::vector<uint16_t>&& partition_key_bind_indices)
Expand Down
6 changes: 5 additions & 1 deletion cql3/statements/raw/select_statement.hh
Expand Up @@ -45,6 +45,7 @@
#include "cql3/statements/prepared_statement.hh"
#include "cql3/relation.hh"
#include "cql3/attributes.hh"
#include "db/config.hh"
#include <seastar/core/shared_ptr.hh>

namespace cql3 {
Expand Down Expand Up @@ -150,7 +151,10 @@ private:
bool is_reversed(const schema& schema) const;

/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
void check_needs_filtering(const restrictions::statement_restrictions& restrictions);
void check_needs_filtering(
const restrictions::statement_restrictions& restrictions,
db::tri_mode_restriction_t::mode strict_allow_filtering,
std::vector<sstring>& warnings);

void ensure_filtering_columns_retrieval(database& db,
selection::selection& selection,
Expand Down
40 changes: 33 additions & 7 deletions cql3/statements/select_statement.cc
Expand Up @@ -1413,7 +1413,8 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
is_reversed_ = is_reversed(*schema);
}

check_needs_filtering(*restrictions);
std::vector<sstring> warnings;
check_needs_filtering(*restrictions, db.get_config().strict_allow_filtering(), warnings);
ensure_filtering_columns_retrieval(db, *selection, *restrictions);
auto group_by_cell_indices = ::make_shared<std::vector<size_t>>(prepare_group_by(*schema, *selection));

Expand Down Expand Up @@ -1453,7 +1454,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_

auto partition_key_bind_indices = ctx.get_partition_key_bind_indexes(*schema);

return std::make_unique<prepared_statement>(std::move(stmt), ctx, std::move(partition_key_bind_indices));
return make_unique<prepared_statement>(std::move(stmt), ctx, move(partition_key_bind_indices), move(warnings));
}

::shared_ptr<restrictions::statement_restrictions>
Expand Down Expand Up @@ -1627,15 +1628,40 @@ bool select_statement::is_reversed(const schema& schema) const {
return is_reversed_;
}

/// True iff restrictions require ALLOW FILTERING despite there being no coordinator-side filtering.
static bool needs_allow_filtering_anyway(
const restrictions::statement_restrictions& restrictions,
db::tri_mode_restriction_t::mode strict_allow_filtering,
std::vector<sstring>& warnings) {
using flag_t = db::tri_mode_restriction_t::mode;
if (strict_allow_filtering == flag_t::FALSE) {
return false;
}
const auto& ck_restrictions = *restrictions.get_clustering_columns_restrictions();
const auto& pk_restrictions = *restrictions.get_partition_key_restrictions();
// Even if no filtering happens on the coordinator, we still warn about poor performance when partition
// slice is defined but in potentially unlimited number of partitions (see #7608).
if ((pk_restrictions.empty() || has_token(pk_restrictions.expression)) // Potentially unlimited partitions.
&& !ck_restrictions.empty() // Slice defined.
&& !restrictions.uses_secondary_indexing()) { // Base-table is used. (Index-table use always limits partitions.)
if (strict_allow_filtering == flag_t::WARN) {
warnings.emplace_back("This query should use ALLOW FILTERING and will be rejected in future versions.");
return false;
}
return true;
}
return false;
}

/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
void select_statement::check_needs_filtering(const restrictions::statement_restrictions& restrictions)
void select_statement::check_needs_filtering(
const restrictions::statement_restrictions& restrictions,
db::tri_mode_restriction_t::mode strict_allow_filtering,
std::vector<sstring>& warnings)
{
// non-key-range non-indexed queries cannot involve filtering underneath
if (!_parameters->allow_filtering() && (restrictions.is_key_range() || restrictions.uses_secondary_indexing())) {
// We will potentially filter data if either:
// - Have more than one IndexExpression
// - Have no index expression and the column filter is not the identity
if (restrictions.need_filtering()) {
if (restrictions.need_filtering() || needs_allow_filtering_anyway(restrictions, strict_allow_filtering, warnings)) {
throw exceptions::invalid_request_exception(
"Cannot execute this query as it might involve data filtering and "
"thus may have unpredictable performance. If you want to execute "
Expand Down
5 changes: 5 additions & 0 deletions db/config.cc
Expand Up @@ -229,6 +229,10 @@ class convert<enum_option<db::tri_mode_restriction_t>> {
#define str(x) #x
#define _mk_init(name, type, deflt, status, desc, ...) , name(this, str(name), value_status::status, type(deflt), desc)

static db::tri_mode_restriction_t::mode strict_allow_filtering_default() {
return db::tri_mode_restriction_t::mode::WARN; // TODO: make it TRUE after Scylla 4.6.
}

db::config::config(std::shared_ptr<db::extensions> exts)
: utils::config_file()

Expand Down Expand Up @@ -804,6 +808,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"Maximum number of concurrent requests a single shard can handle before it starts shedding extra load. By default, no requests will be shed.")
, cdc_dont_rewrite_streams(this, "cdc_dont_rewrite_streams", value_status::Used, false,
"Disable rewriting streams from cdc_streams_descriptions to cdc_streams_descriptions_v2. Should not be necessary, but the procedure is expensive and prone to failures; this config option is left as a backdoor in case some user requires manual intervention.")
, strict_allow_filtering(this, "strict_allow_filtering", liveness::LiveUpdate, value_status::Used, strict_allow_filtering_default(), "Match Cassandra in requiring ALLOW FILTERING on slow queries. Can be true, false, or warn. When false, Scylla accepts some slow queries even without ALLOW FILTERING that Cassandra rejects. Warn is same as false, but with warning.")
, alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port")
, alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port")
, alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address")
Expand Down
1 change: 1 addition & 0 deletions db/config.hh
Expand Up @@ -339,6 +339,7 @@ public:
named_value<uint32_t> schema_registry_grace_period;
named_value<uint32_t> max_concurrent_requests_per_shard;
named_value<bool> cdc_dont_rewrite_streams;
named_value<tri_mode_restriction> strict_allow_filtering;

named_value<uint16_t> alternator_port;
named_value<uint16_t> alternator_https_port;
Expand Down
4 changes: 2 additions & 2 deletions test/boost/cql_query_group_test.cc
Expand Up @@ -105,7 +105,7 @@ SEASTAR_TEST_CASE(test_group_by_syntax) {
BOOST_REQUIRE_EXCEPTION(
e.execute_cql("select * from t1 where p1 > 0 group by p2 allow filtering").get(), ire, order);
BOOST_REQUIRE_EXCEPTION(
e.execute_cql("select * from t1 where (c1,c2) > (0,0) group by p1, p2, c3").get(), ire, order);
e.execute_cql("select * from t1 where (c1,c2) > (0,0) group by p1, p2, c3 allow filtering").get(), ire, order);
BOOST_REQUIRE_EXCEPTION(
e.execute_cql("select * from t1 where p1>0 and p2=0 group by c1 allow filtering").get(), ire, order);
// Even when GROUP BY lists all primary-key columns:
Expand Down Expand Up @@ -224,7 +224,7 @@ SEASTAR_TEST_CASE(test_group_by_text_key) {
cquery_nofail(e, "insert into t2 (p, c1, c2, v) values (' ', 'a', 'b', 40)");
require_rows(e, "select avg(v) from t2 group by p", {{I(25), T(" ")}});
require_rows(e, "select avg(v) from t2 group by p, c1", {{I(15), T(" "), T("")}, {I(35), T(" "), T("a")}});
require_rows(e, "select sum(v) from t2 where c1='' group by p, c2",
require_rows(e, "select sum(v) from t2 where c1='' group by p, c2 allow filtering",
{{I(10), T(" "), T("")}, {I(20), T(" "), T("b")}});
return make_ready_future<>();
});
Expand Down
16 changes: 8 additions & 8 deletions test/boost/cql_query_test.cc
Expand Up @@ -249,7 +249,7 @@ SEASTAR_TEST_CASE(test_map_elements_validation) {
SEASTAR_TEST_CASE(test_in_clause_validation) {
return do_with_cql_env_thread([](cql_test_env& e) {
auto test_inline = [&] (sstring value, bool should_throw) {
auto cql = sprint("SELECT r1 FROM tbl WHERE (c1,r1) IN ((1, '%s'))", value);
auto cql = sprint("SELECT r1 FROM tbl WHERE (c1,r1) IN ((1, '%s')) ALLOW FILTERING", value);
if (should_throw) {
BOOST_REQUIRE_THROW(e.execute_cql(cql).get(), exceptions::invalid_request_exception);
} else {
Expand All @@ -260,7 +260,7 @@ SEASTAR_TEST_CASE(test_in_clause_validation) {
test_inline("definietly not a date value", true);
test_inline("2015-05-03", false);
e.execute_cql("CREATE TABLE tbl2 (p1 int, c1 int, r1 text, PRIMARY KEY (p1, c1,r1))").get();
auto id = e.prepare("SELECT r1 FROM tbl2 WHERE (c1,r1) IN ?").get0();
auto id = e.prepare("SELECT r1 FROM tbl2 WHERE (c1,r1) IN ? ALLOW FILTERING").get0();
auto test_bind = [&] (sstring value, bool should_throw) {
auto my_tuple_type = tuple_type_impl::get_instance({int32_type, utf8_type});
auto my_list_type = list_type_impl::get_instance(my_tuple_type, true);
Expand Down Expand Up @@ -449,7 +449,7 @@ SEASTAR_TEST_CASE(test_select_statement) {
});
}).then([&e] {
// Test full partition range, singular clustering range
return e.execute_cql("select * from cf where c1 = 1 and c2 = 2;").then([] (shared_ptr<cql_transport::messages::result_message> msg) {
return e.execute_cql("select * from cf where c1 = 1 and c2 = 2 allow filtering;").then([] (shared_ptr<cql_transport::messages::result_message> msg) {
assert_that(msg).is_rows()
.with_size(3)
.with_row({
Expand Down Expand Up @@ -2405,14 +2405,14 @@ SEASTAR_TEST_CASE(test_in_restriction) {
e.execute_cql("insert into tir2 (p1, c1, r1) values (1, 2, 3);").get();
e.execute_cql("insert into tir2 (p1, c1, r1) values (2, 3, 4);").get();
{
auto msg = e.execute_cql("select r1 from tir2 where (c1,r1) in ((0, 1),(1,2),(0,1),(1,2),(3,3));").get0();
auto msg = e.execute_cql("select r1 from tir2 where (c1,r1) in ((0, 1),(1,2),(0,1),(1,2),(3,3)) allow filtering;").get0();
assert_that(msg).is_rows().with_rows({
{int32_type->decompose(1)},
{int32_type->decompose(2)},
});
}
{
auto prepared_id = e.prepare("select r1 from tir2 where (c1,r1) in ?;").get0();
auto prepared_id = e.prepare("select r1 from tir2 where (c1,r1) in ? allow filtering;").get0();
auto my_tuple_type = tuple_type_impl::get_instance({int32_type,int32_type});
auto my_list_type = list_type_impl::get_instance(my_tuple_type, true);
std::vector<tuple_type_impl::native_type> native_tuples = {
Expand Down Expand Up @@ -4523,7 +4523,7 @@ SEASTAR_TEST_CASE(test_select_serial_consistency) {
}
};
check_fails("select * from t allow filtering");
check_fails("select * from t where b > 0");
check_fails("select * from t where b > 0 allow filtering");
check_fails("select * from t where a in (1, 3)");
prepared_on_shard(e, "select * from t where a = 1", {}, {{I(1), I(1)}, {I(1), I(2)}}, db::consistency_level::SERIAL);
});
Expand Down Expand Up @@ -4580,8 +4580,8 @@ SEASTAR_TEST_CASE(test_impossible_where) {
cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (0, 0)");
cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (1, 10)");
cquery_nofail(e, "INSERT INTO t2(p,c) VALUES (2, 20)");
require_rows(e, "SELECT * FROM t2 WHERE c>10 AND c<10", {});
require_rows(e, "SELECT * FROM t2 WHERE c>=10 AND c<=0", {});
require_rows(e, "SELECT * FROM t2 WHERE c>10 AND c<10 ALLOW FILTERING", {});
require_rows(e, "SELECT * FROM t2 WHERE c>=10 AND c<=0 ALLOW FILTERING", {});
});
}

Expand Down

0 comments on commit 8b59e3a

Please sign in to comment.