Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Don't assume a subquery's output is unique if there's a SRF in its tl…

…ist.

While the x output of "select x from t group by x" can be presumed unique,
this does not hold for "select x, generate_series(1,10) from t group by x",
because we may expand the set-returning function after the grouping step.
(Perhaps that should be re-thought; but considering all the other oddities
involved with SRFs in targetlists, it seems unlikely we'll change it.)
Put a check in query_is_distinct_for() so it's not fooled by such cases.

Back-patch to all supported branches.

David Rowley
  • Loading branch information...
commit fa21a760b2e16b68196da685f29033304b41d4bc 1 parent d9d125d
Tom Lane authored
11 src/backend/optimizer/util/pathnode.c
@@ -1205,6 +1205,17 @@ query_is_distinct_for(Query *query, List *colnos, List *opids)
1205 1205 Assert(list_length(colnos) == list_length(opids));
1206 1206
1207 1207 /*
  1208 + * A set-returning function in the query's targetlist can result in
  1209 + * returning duplicate rows, if the SRF is evaluated after the
  1210 + * de-duplication step; so we play it safe and say "no" if there are any
  1211 + * SRFs. (We could be certain that it's okay if SRFs appear only in the
  1212 + * specified columns, since those must be evaluated before de-duplication;
  1213 + * but it doesn't presently seem worth the complication to check that.)
  1214 + */
  1215 + if (expression_returns_set((Node *) query->targetList))
  1216 + return false;
  1217 +
  1218 + /*
1208 1219 * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1209 1220 * columns in the DISTINCT clause appear in colnos and operator semantics
1210 1221 * match.
33 src/test/regress/expected/subselect.out
@@ -742,3 +742,36 @@ select * from int4_tbl where
742 742 0
743 743 (1 row)
744 744
  745 +--
  746 +-- Check for incorrect optimization when IN subquery contains a SRF
  747 +--
  748 +set enable_hashjoin to 0;
  749 +explain (verbose, costs off)
  750 +select * from int4_tbl o where (f1, f1) in
  751 + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
  752 + QUERY PLAN
  753 +----------------------------------------------------------------
  754 + Nested Loop Semi Join
  755 + Output: o.f1
  756 + Join Filter: (o.f1 = "ANY_subquery".f1)
  757 + -> Seq Scan on public.int4_tbl o
  758 + Output: o.f1
  759 + -> Materialize
  760 + Output: "ANY_subquery".f1, "ANY_subquery".g
  761 + -> Subquery Scan on "ANY_subquery"
  762 + Output: "ANY_subquery".f1, "ANY_subquery".g
  763 + Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
  764 + -> HashAggregate
  765 + Output: i.f1, (generate_series(1, 2) / 10)
  766 + -> Seq Scan on public.int4_tbl i
  767 + Output: i.f1
  768 +(14 rows)
  769 +
  770 +select * from int4_tbl o where (f1, f1) in
  771 + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
  772 + f1
  773 +----
  774 + 0
  775 +(1 row)
  776 +
  777 +reset enable_hashjoin;
11 src/test/regress/sql/subselect.sql
@@ -422,3 +422,14 @@ select * from int4_tbl where
422 422 select * from int4_tbl where
423 423 (case when f1 in (select unique1 from tenk1 a) then f1 else null end) in
424 424 (select ten from tenk1 b);
  425 +
  426 +--
  427 +-- Check for incorrect optimization when IN subquery contains a SRF
  428 +--
  429 +set enable_hashjoin to 0;
  430 +explain (verbose, costs off)
  431 +select * from int4_tbl o where (f1, f1) in
  432 + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
  433 +select * from int4_tbl o where (f1, f1) in
  434 + (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
  435 +reset enable_hashjoin;

0 comments on commit fa21a76

Please sign in to comment.
Something went wrong with that request. Please try again.