diff --git a/cql3/Cql.g b/cql3/Cql.g index 6c784204fc1a..e7b00db6aceb 100644 --- a/cql3/Cql.g +++ b/cql3/Cql.g @@ -147,8 +147,11 @@ using uexpression = uninitialized; listener_type* listener; - std::vector<::shared_ptr> _bind_variables; - // index into _bind_variables + // Keeps the names of all bind variables. For bind variables without a name ('?'), the name is nullptr. + // Maps bind_index -> name. + std::vector<::shared_ptr> _bind_variable_names; + + // Maps name -> bind_index for all named bind variables. std::unordered_map _named_bind_variables_indexes; std::vector> _missing_tokens; @@ -172,8 +175,8 @@ using uexpression = uninitialized; if (name && _named_bind_variables_indexes.contains(*name)) { return bind_variable{_named_bind_variables_indexes[*name]}; } - auto marker = bind_variable{_bind_variables.size()}; - _bind_variables.push_back(name); + auto marker = bind_variable{_bind_variable_names.size()}; + _bind_variable_names.push_back(name); if (name) { _named_bind_variables_indexes[*name] = marker.bind_index; } @@ -321,7 +324,7 @@ query returns [std::unique_ptr stmnt] ; cqlStatement returns [std::unique_ptr stmt] - @after{ if (stmt) { stmt->set_bound_variables(_bind_variables); } } + @after{ if (stmt) { stmt->set_bound_variables(_bind_variable_names); } } : st1= selectStatement { $stmt = std::move(st1); } | st2= insertStatement { $stmt = std::move(st2); } | st3= updateStatement { $stmt = std::move(st3); } diff --git a/cql3/prepare_context.cc b/cql3/prepare_context.cc index 8e02f6f5491a..5f54237f8347 100644 --- a/cql3/prepare_context.cc +++ b/cql3/prepare_context.cc @@ -19,22 +19,22 @@ size_t prepare_context::bound_variables_size() const { } const std::vector>& prepare_context::get_variable_specifications() const & { - return _specs; + return _variable_specs; } std::vector> prepare_context::get_variable_specifications() && { - return std::move(_specs); + return std::move(_variable_specs); } std::vector prepare_context::get_partition_key_bind_indexes(const schema& schema) const { auto count = schema.partition_key_columns().size(); std::vector partition_key_positions(count, uint16_t(0)); std::vector set(count, false); - for (size_t i = 0; i < _target_columns.size(); i++) { - auto& target_column = _target_columns[i]; - const auto* cdef = target_column ? schema.get_column_definition(target_column->name->name()) : nullptr; + + for (auto&& [bind_index, target_spec] : _targets) { + const auto* cdef = target_spec ? schema.get_column_definition(target_spec->name->name()) : nullptr; if (cdef && cdef->is_partition_key()) { - partition_key_positions[cdef->position()] = i; + partition_key_positions[cdef->position()] = bind_index; set[cdef->position()] = true; } } @@ -48,30 +48,30 @@ std::vector prepare_context::get_partition_key_bind_indexes(const sche void prepare_context::add_variable_specification(int32_t bind_index, lw_shared_ptr spec) { auto name = _variable_names[bind_index]; - if (_specs[bind_index]) { + if (_variable_specs[bind_index]) { // If the same variable is used in multiple places, check that the types are compatible - if (&spec->type->without_reversed() != &_specs[bind_index]->type->without_reversed()) { + if (&spec->type->without_reversed() != &_variable_specs[bind_index]->type->without_reversed()) { throw exceptions::invalid_request_exception( fmt::format("variable :{} has type {} which doesn't match {}", - *name, _specs[bind_index]->type->as_cql3_type(), spec->name)); + *name, _variable_specs[bind_index]->type->as_cql3_type(), spec->name)); } } - _target_columns[bind_index] = spec; + _targets.emplace_back(bind_index, spec); // Use the user name, if there is one if (name) { spec = make_lw_shared(spec->ks_name, spec->cf_name, name, spec->type); } - _specs[bind_index] = spec; + _variable_specs[bind_index] = spec; } -void prepare_context::set_bound_variables(const std::vector>& prepare_meta) { - _variable_names = prepare_meta; - _specs.clear(); - _target_columns.clear(); +void prepare_context::set_bound_variables(const std::vector>& bind_variable_names) { + _variable_names = bind_variable_names; + _variable_specs.clear(); + _targets.clear(); - const size_t bn_size = prepare_meta.size(); - _specs.resize(bn_size); - _target_columns.resize(bn_size); + const size_t bn_size = bind_variable_names.size(); + _variable_specs.resize(bn_size); + _targets.resize(bn_size); } void prepare_context::clear_pk_function_calls_cache() { diff --git a/cql3/prepare_context.hh b/cql3/prepare_context.hh index 603c60018708..341829ef49af 100644 --- a/cql3/prepare_context.hh +++ b/cql3/prepare_context.hh @@ -33,9 +33,18 @@ namespace functions { class function_call; } */ class prepare_context final { private: + // Keeps names of all the bind variables. For bind variables without a name ('?'), the name is nullptr. + // Maps bind_index -> name. std::vector> _variable_names; - std::vector> _specs; - std::vector> _target_columns; + + // Keeps column_specification for every bind_index. column_specification describes the name and type of this variable. + std::vector> _variable_specs; + + // For every expression like ( = ), there's a pair of (bind_index, target column_specification) in _targets. + // Collecting all equalities of bind variables allows to determine which of the variables set the value of partition key columns. + // The driver needs this information in order to compute the partition token and send the request to the right node. + std::vector>> _targets; + // A list of pointers to prepared `function_call` cache ids, that // participate in partition key ranges computation within an LWT statement. std::vector<::shared_ptr>> _pk_function_calls_cache_ids; @@ -61,7 +70,7 @@ public: void add_variable_specification(int32_t bind_index, lw_shared_ptr spec); - void set_bound_variables(const std::vector>& prepare_meta); + void set_bound_variables(const std::vector>& bind_variable_names); void clear_pk_function_calls_cache(); diff --git a/test/cql-pytest/test_prepare.py b/test/cql-pytest/test_prepare.py new file mode 100644 index 000000000000..90227883cdff --- /dev/null +++ b/test/cql-pytest/test_prepare.py @@ -0,0 +1,152 @@ +# Copyright 2023-present ScyllaDB +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +############################################################################# +# Tests for preparing various kinds of statements. When a client asks to prepare +# a statement, Scylla has to process it and return the correct prepared statement +# metadata. The metadata contains information about the keyspace, table and bind variables. +# Let's ensure that this information is correct. +# Here's the description of prepared metadata in CQL protocol spec: +# https://github.com/apache/cassandra/blob/1959502d8b16212479eecb076c89945c3f0f180c/doc/native_protocol_v4.spec#L675 + +import pytest +from util import new_test_table + + +@pytest.fixture(scope="module") +def table1(cql, test_keyspace): + with new_test_table(cql, test_keyspace, "p int, c int, PRIMARY KEY (p, c)") as table: + yield table + + +@pytest.fixture(scope="module") +def table2(cql, test_keyspace): + with new_test_table(cql, test_keyspace, "p1 int, p2 int, p3 int, p4 int, c1 int, c2 int, r1 int, r2 int, PRIMARY KEY ((p1, p2, p3, p4), c1, c2)") as table: + yield table + +# The following tests test the generation of "pk indexes" +# "pk indexes" tell the driver which bind variable values it should use to calculate the partition token, so that it can send queries to the correct shard. +# https://github.com/apache/cassandra/blob/1959502d8b16212479eecb076c89945c3f0f180c/doc/native_protocol_v4.spec#L699-L707 + + +# Test generating pk indexes for a single column partition key. +def test_single_pk_indexes(cql, table1): + prepared = cql.prepare(f"SELECT p FROM {table1} WHERE p = ?") + assert prepared.routing_key_indexes == [0] + + prepared = cql.prepare(f"SELECT p, c FROM {table1} WHERE c = ? AND p = ?") + assert prepared.routing_key_indexes == [1] + +# Test that pk indexes aren't generated when the partition key column isn't restricted using a bind variable. +# In this situation the driver won't be able to calculate the token, so pk indexes should be empty (None). +def test_single_pk_no_indexes(cql, table1): + prepared = cql.prepare(f"SELECT p, c FROM {table1}") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare(f"SELECT p, c FROM {table1} WHERE c = ? ALLOW FILTERING") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare(f"SELECT p FROM {table1} WHERE p = 0 AND c = ?") + assert prepared.routing_key_indexes is None + +# Test generating pk indexes for a composite partition key. +def test_composite_pk_indexes(cql, table2): + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = ? AND p2 = ? AND p3 = ? AND p4 = ? AND c1 = ? AND c2 = ?") + assert prepared.routing_key_indexes == [0, 1, 2, 3] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p4 = ? AND p3 = ? AND p2 = ? AND p1 = ? AND c1 = ? AND c2 = ?") + assert prepared.routing_key_indexes == [3, 2, 1, 0] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE r1 = ? AND c2 = ? AND p3 = ? AND p1 = ? AND r2 = ? AND p4 = ? AND c1 = ? AND p2 = ? ALLOW FILTERING") + assert prepared.routing_key_indexes == [3, 7, 2, 5] + +# Test that pk indexes aren't generated when not all partition key columns are restricted using bind variables. +# In this situation the driver won't be able to calculate the token, so pk indexes should be empty (None). +def test_composite_pk_no_indexes(cql, table2): + prepared = cql.prepare( + f"SELECT * FROM {table2}") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = ? ALLOW FILTERING") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = ? AND p2 = ? AND p4 = ? ALLOW FILTERING") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = ? AND p2 = 0 AND p3 = ? AND p4 = ?") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = ? AND p2 = 0 AND p3 = ? AND p4 = ? AND c1 = ? AND c2 = ?") + assert prepared.routing_key_indexes is None + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = 0 AND p2 = 1 AND p3 = 2 AND p4 = 3 AND c1 = 5 AND c2 = 5") + assert prepared.routing_key_indexes is None + +# Test generating pk indexes for a single column partition key using named bind variables. +def test_single_pk_indexes_named_variables(cql, table1): + prepared = cql.prepare(f"SELECT p FROM {table1} WHERE p = :a") + assert prepared.routing_key_indexes == [0] + + prepared = cql.prepare(f"SELECT p, c FROM {table1} WHERE c = :a AND p = :b") + assert prepared.routing_key_indexes == [1] + +# Test generating pk indexes for a composite partition key using named bind variables. +def test_composite_pk_indexes_named_variables(cql, table2): + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = :a AND p2 = :b AND p3 = :c AND p4 = :d AND c1 = :e AND c2 = :f") + assert prepared.routing_key_indexes == [0, 1, 2, 3] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = :f AND p2 = :e AND p3 = :d AND p4 = :c AND c1 = :b AND c2 = :a") + assert prepared.routing_key_indexes == [0, 1, 2, 3] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE c1 = :a AND c2 = :b AND p1 = :f AND p2 = :e AND p3 = :d AND p4 = :c") + assert prepared.routing_key_indexes == [2, 3, 4, 5] + +# Test generating pk indexes with named bind variables where the same variable is used multiple times. +# The test is scylla_only because Scylla treats :x as a single bind variable, but Cassandra thinks +# that there are two bind variables, both of them named :x. +def test_single_pk_indexes_duplicate_named_variables(cql, table1, scylla_only): + prepared = cql.prepare(f"SELECT p FROM {table1} WHERE p = :x") + assert prepared.routing_key_indexes == [0] + + prepared = cql.prepare(f"SELECT p FROM {table1} WHERE p = :x AND c = :x") + assert prepared.routing_key_indexes == [0] + + prepared = cql.prepare(f"SELECT p FROM {table1} WHERE c = :x AND p = :x") + assert prepared.routing_key_indexes == [0] + +# Test generating pk indexes with named bind variables where the same variable is used multiple times. +# The test is scylla_only because Scylla treats :x as a single bind variable, but Cassandra thinks +# that there are multiple bind variables, all of them named :x. +def test_composite_pk_indexes_duplicate_named_variables(cql, table2, scylla_only): + prepared = cql.prepare(f"SELECT * FROM {table2} WHERE p1 = :x AND p2 = :x AND p3 = :x AND p4 = :x") + assert prepared.routing_key_indexes == [0, 0, 0, 0] + + prepared = cql.prepare(f"SELECT * FROM {table2} WHERE p1 = :a AND p2 = :a AND p3 = :b AND p4 = :b") + assert prepared.routing_key_indexes == [0, 0, 1, 1] + + prepared = cql.prepare(f"SELECT * FROM {table2} WHERE p1 = :a AND p2 = :b AND p3 = :a AND p4 = :b") + assert prepared.routing_key_indexes == [0, 1, 0, 1] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE c1 = :a AND c2 = :b AND p1 = :a AND p2 = :b AND p3 = :a AND p4 = :b") + assert prepared.routing_key_indexes == [0, 1, 0, 1] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = :a AND p2 = :b AND p3 = :a AND p4 = :b AND c1 = :a AND c2 = :b ") + assert prepared.routing_key_indexes == [0, 1, 0, 1] + + prepared = cql.prepare( + f"SELECT * FROM {table2} WHERE p1 = :x AND p2 = :x AND p3 = :z AND p4 = :y AND c1 = :y AND c2 = :z ") + assert prepared.routing_key_indexes == [0, 0, 1, 2]