Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of expressions evaluated to Const in OR #6596

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 16 additions & 3 deletions .github/workflows/sqlsmith.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ jobs:
/usr/lib/postgresql/${{ matrix.pg }}/bin/pg_ctl initdb -D ~/pgdata
/usr/lib/postgresql/${{ matrix.pg }}/bin/pg_ctl -D ~/pgdata start \
-o "-cshared_preload_libraries=timescaledb" -o "-cmax_connections=200" \
-o "-cmax_prepared_transactions=100" -o "-cunix_socket_directories=/tmp"
-o "-cmax_prepared_transactions=100" -o "-cunix_socket_directories=/tmp" \
-o "-clog_statement=all" -l postgres.log
psql -h /tmp postgres -c 'CREATE DATABASE smith;'
psql -h /tmp smith -c 'CREATE EXTENSION timescaledb;'
psql -h /tmp smith -c '\i ${{ github.workspace }}/tsl/test/shared/sql/include/shared_setup.sql'
Expand All @@ -84,6 +85,7 @@ jobs:
# so total runtime should be around 200 minutes in nightly run and 40 minutes otherwise
- name: Run SQLsmith
run: |
set -xeu
set -o pipefail
if [ "${{ github.event_name }}" == "schedule" ]; then
LOOPS=20
Expand All @@ -96,6 +98,10 @@ jobs:
./sqlsmith --seed=$((16#$(openssl rand -hex 3))) --exclude-catalog \
--target="host=/tmp dbname=smith" --max-queries=10000 \
2>&1 | tee -a sqlsmith.log

psql "host=/tmp dbname=smith" -c "select 1"

truncate --size=0 postgres.log
done

- name: Check for coredumps
Expand All @@ -116,9 +122,9 @@ jobs:
set verbose on
set trace-commands on
show debug-file-directory
printf "'"'"query = '%s'\n\n"'"'", debug_query_string
printf "'"'"query = '%s'\n\n"'"'", (char *) debug_query_string
frame function ExceptionalCondition
printf "'"'"condition = '%s'\n"'"'", conditionName
printf "'"'"condition = '%s'\n"'"'", (char *) conditionName
up 1
l
info args
Expand All @@ -135,6 +141,13 @@ jobs:
name: Coredumps sqlsmith ${{ matrix.os }} PG${{ matrix.pg }}
path: coredumps

- name: Save PostgreSQL log
if: always() && steps.collectlogs.outputs.coredumps == 'true'
uses: actions/upload-artifact@v4
with:
name: PostgreSQL log for PG${{ matrix.pg }}
path: postgres.log

- name: Upload test results to the database
if: always()
env:
Expand Down
35 changes: 34 additions & 1 deletion tsl/src/nodes/decompress_chunk/compressed_batch.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,36 @@ static void
compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_state, Node *qual,
uint64 *restrict result)
{
/*
* Some predicates can be evaluated to a Const at run time.
*/
if (IsA(qual, Const))
{
Const *c = castNode(Const, qual);
if (c->constisnull || !DatumGetBool(c->constvalue))
{
/*
* Some predicates are evaluated to a null Const, like a
* strict comparison with stable expression that evaluates to null.
* No rows pass.
*/
const size_t n_batch_result_words = (batch_state->total_batch_rows + 63) / 64;
for (size_t i = 0; i < n_batch_result_words; i++)
{
result[i] = 0;
}
}
else
{
/*
* This is a constant true qual, every row passes and we can
* just ignore it. No idea how it can happen though.
*/
Assert(false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should log an error here to ensure we also catch these cases in production builds.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The good thing is that this case doesn't require any handling, so I'd just leave it as is, the query will work correctly even if a user manages to hit it somehow.

}
return;
}

/*
* For now, we support NullTest, "Var ? Const" predicates and
* ScalarArrayOperations.
Expand All @@ -247,6 +277,7 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat
}
else
{
Ensure(IsA(qual, OpExpr), "expected OpExpr");
opexpr = castNode(OpExpr, qual);
args = opexpr->args;
vector_const_opcode = get_opcode(opexpr->opno);
Expand Down Expand Up @@ -368,6 +399,8 @@ compute_plain_qual(DecompressContext *dcontext, DecompressBatchState *batch_stat
* rows in the batch, if the column has a default value in this batch.
*/
const size_t n_vector_result_words = (vector->length + 63) / 64;
Assert((predicate_result != &default_value_predicate_result) ||
n_vector_result_words == 1); /* to placate Coverity. */
const uint64 *restrict validity = (uint64 *restrict) vector->buffers[0];
for (size_t i = 0; i < n_vector_result_words; i++)
{
Expand Down Expand Up @@ -480,7 +513,7 @@ compute_one_qual(DecompressContext *dcontext, DecompressBatchState *batch_state,
* Postgres removes NOT for operators we can vectorize, so we don't support
* NOT and consider it non-vectorizable at planning time. So only OR is left.
*/
Assert(boolexpr->boolop == OR_EXPR);
Ensure(boolexpr->boolop == OR_EXPR, "expected OR");
compute_qual_disjunction(dcontext, batch_state, boolexpr->args, result);
}

Expand Down
31 changes: 0 additions & 31 deletions tsl/src/nodes/decompress_chunk/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,6 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags)
}

/* Constify stable expressions in vectorized predicates. */
chunk_state->have_constant_false_vectorized_qual = false;
PlannerGlobal glob = {
.boundParams = node->ss.ps.state->es_param_list_info,
};
Expand All @@ -433,31 +432,6 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags)
{
Node *constified = estimate_expression_value(&root, (Node *) lfirst(lc));

/*
* Note that some expressions are evaluated to a null Const, like a
* strict comparison with stable expression that evaluates to null. If
* we have such filter, no rows can pass, so we set a special flag to
* return early.
*/
if (IsA(constified, Const))
{
Const *c = castNode(Const, constified);
if (c->constisnull || !DatumGetBool(c->constvalue))
{
chunk_state->have_constant_false_vectorized_qual = true;
break;
}
else
{
/*
* This is a constant true qual, every row passes and we can
* just ignore it. No idea how it can happen though.
*/
Assert(false);
continue;
}
}

dcontext->vectorized_quals_constified =
lappend(dcontext->vectorized_quals_constified, constified);
}
Expand Down Expand Up @@ -738,11 +712,6 @@ decompress_chunk_exec_impl(DecompressChunkState *chunk_state, const BatchQueueFu
return perform_vectorized_aggregation(chunk_state);
}

if (chunk_state->have_constant_false_vectorized_qual)
{
return NULL;
}

bqfuncs->pop(bq, dcontext);

while (bqfuncs->needs_next_batch(bq))
Expand Down
1 change: 0 additions & 1 deletion tsl/src/nodes/decompress_chunk/exec.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ typedef struct DecompressChunkState
* evaluate to constant false, hence the flag.
*/
List *vectorized_quals_original;
bool have_constant_false_vectorized_qual;
} DecompressChunkState;

extern Node *decompress_chunk_state_create(CustomScan *cscan);
102 changes: 100 additions & 2 deletions tsl/test/expected/decompress_vector_qual.out
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
create function stable_identity(x anyelement) returns anyelement as $$ select x $$ language sql stable;
create table vectorqual(metric1 int8, ts timestamp, metric2 int8, device int8);
select create_hypertable('vectorqual', 'ts');
WARNING: column type "timestamp without time zone" used for "ts" does not follow best practices
Expand Down Expand Up @@ -201,7 +202,6 @@ execute p(33);

deallocate p;
-- Also try query parameter in combination with a stable function.
create function stable_identity(x anyelement) returns anyelement as $$ select x $$ language sql stable;
prepare p(int4) as select count(*) from vectorqual where metric3 = stable_identity($1);
execute p(33);
count
Expand Down Expand Up @@ -304,6 +304,14 @@ select count(*) from vectorqual where metric2 < 0 or metric3 < -1;
0
(1 row)

-- expression evaluated to null at run time
select count(*) from vectorqual where metric3 = 777
or metric3 > case when now() > now() - interval '1s' then null else 1 end;
count
-------
2
(1 row)

reset timescaledb.enable_bulk_decompression;
-- Test with unary operator.
set timescaledb.debug_require_vector_qual to 'forbid';
Expand Down Expand Up @@ -348,13 +356,91 @@ select count(*) from vectorqual where metric3 = 777 or metric3 === 777;
2
(1 row)

-- It also doesn't have a commutator.
-- Custom operator that can be vectorized but doesn't have a negator.
create operator !!! (function = 'int4ne', rightarg = int4, leftarg = int4);
set timescaledb.debug_require_vector_qual to 'only';
select count(*) from vectorqual where metric3 !!! 777;
count
-------
3
(1 row)

select count(*) from vectorqual where metric3 !!! any(array[777, 888]);
count
-------
5
(1 row)

select count(*) from vectorqual where metric3 !!! 777 or metric3 !!! 888;
count
-------
5
(1 row)

select count(*) from vectorqual where metric3 !!! 666 and (metric3 !!! 777 or metric3 !!! 888);
count
-------
5
(1 row)

select count(*) from vectorqual where metric3 !!! 666 and (metric3 !!! 777 or metric3 !!! stable_identity(888));
count
-------
5
(1 row)

set timescaledb.debug_require_vector_qual to 'forbid';
select count(*) from vectorqual where not metric3 !!! 777;
count
-------
2
(1 row)

select count(*) from vectorqual where metric3 !!! 666 or (metric3 !!! 777 and not metric3 !!! 888);
count
-------
5
(1 row)

select count(*) from vectorqual where metric3 !!! 666 or not (metric3 !!! 777 and not metric3 !!! 888);
count
-------
5
(1 row)

set timescaledb.debug_require_vector_qual to 'allow';
select count(*) from vectorqual where metric3 !!! 777 or not metric3 !!! 888;
count
-------
3
(1 row)

select count(*) from vectorqual where metric3 !!! 777 and not metric3 !!! 888;
count
-------
0
(1 row)

select count(*) from vectorqual where not(metric3 !!! 666 or not (metric3 !!! 777 and not metric3 !!! 888));
count
-------
0
(1 row)

-- These operators don't have a commutator.
set timescaledb.debug_require_vector_qual to 'forbid';
select count(*) from vectorqual where 777 === metric3;
count
-------
2
(1 row)

select count(*) from vectorqual where 777 !!! metric3;
count
-------
3
(1 row)

-- NullTest is vectorized.
set timescaledb.debug_require_vector_qual to 'only';
select count(*) from vectorqual where metric4 is null;
Expand All @@ -369,6 +455,18 @@ select count(*) from vectorqual where metric4 is not null;
2
(1 row)

select count(*) from vectorqual where metric3 = 777 or metric4 is not null;
count
-------
4
(1 row)

select count(*) from vectorqual where metric3 = stable_identity(777) or metric4 is null;
count
-------
3
(1 row)

-- Can't vectorize conditions on system columns. Have to check this on a single
-- chunk, otherwise the whole-row var will be masked by ConvertRowType.
select show_chunks('vectorqual') chunk1 limit 1 \gset
Expand Down
32 changes: 30 additions & 2 deletions tsl/test/sql/decompress_vector_qual.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

\c :TEST_DBNAME :ROLE_SUPERUSER

create function stable_identity(x anyelement) returns anyelement as $$ select x $$ language sql stable;

create table vectorqual(metric1 int8, ts timestamp, metric2 int8, device int8);
select create_hypertable('vectorqual', 'ts');
alter table vectorqual set (timescaledb.compress, timescaledb.compress_segmentby = 'device');
Expand Down Expand Up @@ -87,7 +89,6 @@ execute p(33);
deallocate p;

-- Also try query parameter in combination with a stable function.
create function stable_identity(x anyelement) returns anyelement as $$ select x $$ language sql stable;
prepare p(int4) as select count(*) from vectorqual where metric3 = stable_identity($1);
execute p(33);
deallocate p;
Expand Down Expand Up @@ -133,6 +134,10 @@ select count(*) from vectorqual where metric2 < 0 or (metric4 < -1 and 40 >= met
-- early exit after OR BoolExpr
select count(*) from vectorqual where metric2 < 0 or metric3 < -1;

-- expression evaluated to null at run time
select count(*) from vectorqual where metric3 = 777
or metric3 > case when now() > now() - interval '1s' then null else 1 end;

reset timescaledb.enable_bulk_decompression;


Expand All @@ -152,14 +157,37 @@ select count(*) from vectorqual where metric3 === any(array[777, 888]);
select count(*) from vectorqual where not metric3 === 777;
select count(*) from vectorqual where metric3 = 777 or metric3 === 777;

-- It also doesn't have a commutator.
-- Custom operator that can be vectorized but doesn't have a negator.
create operator !!! (function = 'int4ne', rightarg = int4, leftarg = int4);
set timescaledb.debug_require_vector_qual to 'only';
select count(*) from vectorqual where metric3 !!! 777;
select count(*) from vectorqual where metric3 !!! any(array[777, 888]);
select count(*) from vectorqual where metric3 !!! 777 or metric3 !!! 888;
select count(*) from vectorqual where metric3 !!! 666 and (metric3 !!! 777 or metric3 !!! 888);
select count(*) from vectorqual where metric3 !!! 666 and (metric3 !!! 777 or metric3 !!! stable_identity(888));

set timescaledb.debug_require_vector_qual to 'forbid';
select count(*) from vectorqual where not metric3 !!! 777;
select count(*) from vectorqual where metric3 !!! 666 or (metric3 !!! 777 and not metric3 !!! 888);
select count(*) from vectorqual where metric3 !!! 666 or not (metric3 !!! 777 and not metric3 !!! 888);

set timescaledb.debug_require_vector_qual to 'allow';
select count(*) from vectorqual where metric3 !!! 777 or not metric3 !!! 888;
select count(*) from vectorqual where metric3 !!! 777 and not metric3 !!! 888;
select count(*) from vectorqual where not(metric3 !!! 666 or not (metric3 !!! 777 and not metric3 !!! 888));

-- These operators don't have a commutator.
set timescaledb.debug_require_vector_qual to 'forbid';
select count(*) from vectorqual where 777 === metric3;
select count(*) from vectorqual where 777 !!! metric3;


-- NullTest is vectorized.
set timescaledb.debug_require_vector_qual to 'only';
select count(*) from vectorqual where metric4 is null;
select count(*) from vectorqual where metric4 is not null;
select count(*) from vectorqual where metric3 = 777 or metric4 is not null;
select count(*) from vectorqual where metric3 = stable_identity(777) or metric4 is null;


-- Can't vectorize conditions on system columns. Have to check this on a single
Expand Down