forked from scylladb/scylladb
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cql: fix empty aggregation, and add more tests
This patch fixes scylladb#12475, where an aggregation (e.g., COUNT(*), MIN(v)) of absolutely no partitions (e.g., "WHERE p = null" or "WHERE p in ()") resulted in an internal error instead of the "zero" result that each aggregator expects (e.g., 0 for COUNT, null for MIN). The problem is that normally our aggregator forwarder picks the nodes which hold the relevant partition(s), forwards the request to each of them, and then combines these results. When there are no partitions, the query is sent to no node, and we end up with an empty result set instead of the "zero" results. So in this patch we recognize this case and build those "zero" results (as mentioned above, these aren't always 0 and depend on the aggregation function!). The patch also adds two tests reproducing this issue in a fairly general way (e.g., several aggregators, different aggregation functions) and confirming the patch fixes the bug. The test also includes two additional tests for COUNT aggregation, which uncovered an incompatibility with Cassandra which is still not fixed - so these tests are marked "xfail": Refs scylladb#12477: Combining COUNT with GROUP by results with empty results in Cassandra, and one result with empty count in Scylla. Signed-off-by: Nadav Har'El <nyh@scylladb.com>
- Loading branch information
Showing
2 changed files
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# Copyright 2023-present ScyllaDB | ||
# | ||
# SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
############################################################################# | ||
# Tests for the COUNT() aggregation function | ||
############################################################################# | ||
|
||
import pytest | ||
from util import new_test_table, unique_key_int | ||
|
||
@pytest.fixture(scope="module") | ||
def table1(cql, test_keyspace): | ||
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table: | ||
yield table | ||
|
||
# When there is no row matching the selection, the count should be 0. | ||
# First check a "=" expression matching no row: | ||
def test_count_empty_eq(cql, table1): | ||
p = unique_key_int() | ||
assert [(0,)] == list(cql.execute(f"select count(*) from {table1} where p = {p}")) | ||
# The aggregation "0" for no results is true for count(), but not all | ||
# aggregators - min() returns null for no results: | ||
assert [(None,)] == list(cql.execute(f"select min(v) from {table1} where p = {p}")) | ||
|
||
# The query "p = null" also matches no row so should have a count 0, but | ||
# it's a special case where no partition belongs to the query range so the | ||
# aggregator doesn't need to send the query to any node. This reproduces | ||
# issue #12475. | ||
# This test is scylla_only because Cassandra doesn't support "p = null". | ||
# it does support "p IN ()" with the same effect, though, so that will | ||
# be the next test. | ||
def test_count_empty_eq_null(cql, table1, scylla_only): | ||
assert [(0,)] == list(cql.execute(f"select count(*) from {table1} where p = null")) | ||
# A more complex list of aggregators, some return zero and some null | ||
# for an empty result set: | ||
assert [(0,0,None)] == list(cql.execute(f"select count(*), count(v), min(v) from {table1} where p = null")) | ||
|
||
# Another special case of a query which matches no partition - an IN with | ||
# an empty list. Reproduces #12475. | ||
def test_count_empty_in(cql, table1): | ||
assert [(0,)] == list(cql.execute(f"select count(*) from {table1} where p in ()")) | ||
assert [(0,0,None)] == list(cql.execute(f"select count(*), count(v), min(v) from {table1} where p in ()")) | ||
|
||
# Simple test of counting the number of rows in a single partition | ||
def test_count_in_partition(cql, table1): | ||
p = unique_key_int() | ||
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)") | ||
cql.execute(stmt, [p, 1, 1]) | ||
cql.execute(stmt, [p, 2, 2]) | ||
cql.execute(stmt, [p, 3, 3]) | ||
assert [(3,)] == list(cql.execute(f"select count(*) from {table1} where p = {p}")) | ||
|
||
# Using count(v) instead of count(*) allows counting only rows with a set | ||
# value in v | ||
def test_count_specific_column(cql, table1): | ||
p = unique_key_int() | ||
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)") | ||
cql.execute(stmt, [p, 1, 1]) | ||
cql.execute(stmt, [p, 2, 2]) | ||
cql.execute(stmt, [p, 3, 3]) | ||
cql.execute(stmt, [p, 4, None]) | ||
assert [(4,)] == list(cql.execute(f"select count(*) from {table1} where p = {p}")) | ||
assert [(3,)] == list(cql.execute(f"select count(v) from {table1} where p = {p}")) | ||
|
||
# COUNT can be combined with GROUP BY to count separately for each partition | ||
# or row. | ||
def test_count_and_group_by_row(cql, table1): | ||
p = unique_key_int() | ||
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)") | ||
cql.execute(stmt, [p, 1, 1]) | ||
cql.execute(stmt, [p, 2, 2]) | ||
cql.execute(stmt, [p, 3, 3]) | ||
cql.execute(stmt, [p, 4, None]) | ||
assert [(p, 1, 1), (p, 2, 1), (p, 3, 1), (p, 4, 0)] == list(cql.execute(f"select p, c, count(v) from {table1} where p = {p} group by p,c")) | ||
|
||
def test_count_and_group_by_partition(cql, table1): | ||
p1 = unique_key_int() | ||
p2 = unique_key_int() | ||
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)") | ||
cql.execute(stmt, [p1, 1, 1]) | ||
cql.execute(stmt, [p1, 2, 2]) | ||
cql.execute(stmt, [p2, 3, 3]) | ||
cql.execute(stmt, [p2, 4, None]) | ||
assert [(p1, 2), (p2, 1)] == list(cql.execute(f"select p, count(v) from {table1} where p in ({p1},{p2}) group by p")) | ||
|
||
# In the above tests we looked for per-row or per-partition counts and got | ||
# back more than one count. But if our query matches no row, we should get | ||
# back no count. | ||
@pytest.mark.xfail(reason="issue #12477") | ||
def test_count_and_group_by_row_none(cql, table1): | ||
p = unique_key_int() | ||
assert [] == list(cql.execute(f"select p, c, count(v) from {table1} where p = {p} group by p,c")) | ||
|
||
@pytest.mark.xfail(reason="issue #12477") | ||
def test_count_and_group_by_partition_none(cql, table1): | ||
p = unique_key_int() | ||
assert [] == list(cql.execute(f"select p, count(v) from {table1} where p = {p} group by p")) |