Allow user control of CTE materialization, and change the default beh…

…avior. Historically we've always materialized the full output of a CTE query, treating WITH as an optimization fence (so that, for example, restrictions from the outer query cannot be pushed into it). This is appropriate when the CTE query is INSERT/UPDATE/DELETE, or is recursive; but when the CTE query is non-recursive and side-effect-free, there's no hazard of changing the query results by pushing restrictions down. Another argument for materialization is that it can avoid duplicate computation of an expensive WITH query --- but that only applies if the WITH query is called more than once in the outer query. Even then it could still be a net loss, if each call has restrictions that would allow just a small part of the WITH query to be computed. Hence, let's change the behavior for WITH queries that are non-recursive and side-effect-free. By default, we will inline them into the outer query (removing the optimization fence) if they are called just once. If they are called more than once, we will keep the old behavior by default, but the user can override this and force inlining by specifying NOT MATERIALIZED. Lastly, the user can force the old behavior by specifying MATERIALIZED; this would mainly be useful when the query had deliberately been employing WITH as an optimization fence to prevent a poor choice of plan. Andreas Karlsson, Andrew Gierth, David Fetter Discussion: https://postgr.es/m/87sh48ffhb.fsf@news-spur.riddles.org.uk
postgres · Feb 16, 2019 · 608b167 · 608b167
1 parent 79730e2
commit 608b167
Show file tree

Hide file tree

Showing 23 changed files with 580 additions and 44 deletions.
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2927,6 +2927,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
 
 				/* we store the string name because RTE_CTE RTEs need it */
 				APP_JUMB_STRING(cte->ctename);
+				APP_JUMB(cte->ctematerialized);
 				JumbleQuery(jstate, castNode(Query, cte->ctequery));
 			}
 			break;

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1888,7 +1888,7 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 
 -- join in CTE
 EXPLAIN (VERBOSE, COSTS OFF)
-WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
+WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
                                                              QUERY PLAN                                                              
 -------------------------------------------------------------------------------------------------------------------------------------
  Limit
@@ -1905,7 +1905,7 @@ WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2
                Output: t.c1_1, t.c2_1, t.c1_3
 (12 rows)
 
-WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
+WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
  c1_1 | c2_1 
 ------+------
   101 |  101

diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -493,8 +493,8 @@ SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t
 SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10 FOR SHARE;
 -- join in CTE
 EXPLAIN (VERBOSE, COSTS OFF)
-WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
-WITH t (c1_1, c1_3, c2_1) AS (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
+WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
+WITH t (c1_1, c1_3, c2_1) AS MATERIALIZED (SELECT t1.c1, t1.c3, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) SELECT c1_1, c2_1 FROM t ORDER BY c1_3, c1_1 OFFSET 100 LIMIT 10;
 -- ctid with whole-row reference
 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT t1.ctid, t1, t2, t1.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c3, t1.c1 OFFSET 100 LIMIT 10;

diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
@@ -2199,22 +2199,94 @@ SELECT n FROM t LIMIT 100;
   </para>
 
   <para>
-   A useful property of <literal>WITH</literal> queries is that they are evaluated
-   only once per execution of the parent query, even if they are referred to
-   more than once by the parent query or sibling <literal>WITH</literal> queries.
+   A useful property of <literal>WITH</literal> queries is that they are
+   normally evaluated only once per execution of the parent query, even if
+   they are referred to more than once by the parent query or
+   sibling <literal>WITH</literal> queries.
    Thus, expensive calculations that are needed in multiple places can be
    placed within a <literal>WITH</literal> query to avoid redundant work.  Another
    possible application is to prevent unwanted multiple evaluations of
    functions with side-effects.
-   However, the other side of this coin is that the optimizer is less able to
-   push restrictions from the parent query down into a <literal>WITH</literal> query
-   than an ordinary subquery.  The <literal>WITH</literal> query will generally be
+   However, the other side of this coin is that the optimizer is not able to
+   push restrictions from the parent query down into a multiply-referenced
+   <literal>WITH</literal> query, since that might affect all uses of the
+   <literal>WITH</literal> query's output when it should affect only one.
+   The multiply-referenced <literal>WITH</literal> query will be
    evaluated as written, without suppression of rows that the parent query
    might discard afterwards.  (But, as mentioned above, evaluation might stop
    early if the reference(s) to the query demand only a limited number of
    rows.)
   </para>
 
+  <para>
+   However, if a <literal>WITH</literal> query is non-recursive and
+   side-effect-free (that is, it is a <literal>SELECT</literal> containing
+   no volatile functions) then it can be folded into the parent query,
+   allowing joint optimization of the two query levels.  By default, this
+   happens if the parent query references the <literal>WITH</literal> query
+   just once, but not if it references the <literal>WITH</literal> query
+   more than once.  You can override that decision by
+   specifying <literal>MATERIALIZED</literal> to force separate calculation
+   of the <literal>WITH</literal> query, or by specifying <literal>NOT
+   MATERIALIZED</literal> to force it to be merged into the parent query.
+   The latter choice risks duplicate computation of
+   the <literal>WITH</literal> query, but it can still give a net savings if
+   each usage of the <literal>WITH</literal> query needs only a small part
+   of the <literal>WITH</literal> query's full output.
+  </para>
+
+  <para>
+   A simple example of these rules is
+<programlisting>
+WITH w AS (
+    SELECT * FROM big_table
+)
+SELECT * FROM w WHERE key = 123;
+</programlisting>
+   This <literal>WITH</literal> query will be folded, producing the same
+   execution plan as
+<programlisting>
+SELECT * FROM big_table WHERE key = 123;
+</programlisting>
+   In particular, if there's an index on <structfield>key</structfield>,
+   it will probably be used to fetch just the rows having <literal>key =
+   123</literal>.  On the other hand, in
+<programlisting>
+WITH w AS (
+    SELECT * FROM big_table
+)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.key = w2.ref
+WHERE w2.key = 123;
+</programlisting>
+   the <literal>WITH</literal> query will be materialized, producing a
+   temporary copy of <structname>big_table</structname> that is then
+   joined with itself &mdash; without benefit of any index.  This query
+   will be executed much more efficiently if written as
+<programlisting>
+WITH w AS NOT MATERIALIZED (
+    SELECT * FROM big_table
+)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.key = w2.ref
+WHERE w2.key = 123;
+</programlisting>
+   so that the parent query's restrictions can be applied directly
+   to scans of <structname>big_table</structname>.
+  </para>
+
+  <para>
+   An example where <literal>NOT MATERIALIZED</literal> could be
+   undesirable is
+<programlisting>
+WITH w AS (
+    SELECT key, very_expensive_function(val) as f FROM some_table
+)
+SELECT * FROM w AS w1 JOIN w AS w2 ON w1.f = w2.f;
+</programlisting>
+   Here, materialization of the <literal>WITH</literal> query ensures
+   that <function>very_expensive_function</function> is evaluated only
+   once per table row, not twice.
+  </para>
+
   <para>
    The examples above only show <literal>WITH</literal> being used with
    <command>SELECT</command>, but it can be attached in the same way to

diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
@@ -72,7 +72,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
 
 <phrase>and <replaceable class="parameter">with_query</replaceable> is:</phrase>
 
-    <replaceable class="parameter">with_query_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] AS ( <replaceable class="parameter">select</replaceable> | <replaceable class="parameter">values</replaceable> | <replaceable class="parameter">insert</replaceable> | <replaceable class="parameter">update</replaceable> | <replaceable class="parameter">delete</replaceable> )
+    <replaceable class="parameter">with_query_name</replaceable> [ ( <replaceable class="parameter">column_name</replaceable> [, ...] ) ] AS [ [ NOT ] MATERIALIZED ] ( <replaceable class="parameter">select</replaceable> | <replaceable class="parameter">values</replaceable> | <replaceable class="parameter">insert</replaceable> | <replaceable class="parameter">update</replaceable> | <replaceable class="parameter">delete</replaceable> )
 
 TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
 </synopsis>
@@ -93,7 +93,8 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
       These effectively serve as temporary tables that can be referenced
       in the <literal>FROM</literal> list.  A <literal>WITH</literal> query
       that is referenced more than once in <literal>FROM</literal> is
-      computed only once.
+      computed only once,
+      unless specified otherwise with <literal>NOT MATERIALIZED</literal>.
       (See <xref linkend="sql-with" endterm="sql-with-title"/> below.)
      </para>
     </listitem>
@@ -272,22 +273,54 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
     that are earlier in the <literal>WITH</literal> list.
    </para>
 
+   <para>
+    The primary query and the <literal>WITH</literal> queries are all
+    (notionally) executed at the same time.  This implies that the effects of
+    a data-modifying statement in <literal>WITH</literal> cannot be seen from
+    other parts of the query, other than by reading its <literal>RETURNING</literal>
+    output.  If two such data-modifying statements attempt to modify the same
+    row, the results are unspecified.
+   </para>
+
    <para>
     A key property of <literal>WITH</literal> queries is that they
-    are evaluated only once per execution of the primary query,
+    are normally evaluated only once per execution of the primary query,
     even if the primary query refers to them more than once.
     In particular, data-modifying statements are guaranteed to be
     executed once and only once, regardless of whether the primary query
     reads all or any of their output.
    </para>
 
    <para>
-    The primary query and the <literal>WITH</literal> queries are all
-    (notionally) executed at the same time.  This implies that the effects of
-    a data-modifying statement in <literal>WITH</literal> cannot be seen from
-    other parts of the query, other than by reading its <literal>RETURNING</literal>
-    output.  If two such data-modifying statements attempt to modify the same
-    row, the results are unspecified.
+    However, a <literal>WITH</literal> query can be marked
+    <literal>NOT MATERIALIZED</literal> to remove this guarantee.  In that
+    case, the <literal>WITH</literal> query can be folded into the primary
+    query much as though it were a simple sub-<literal>SELECT</literal> in
+    the primary query's <literal>FROM</literal> clause.  This results in
+    duplicate computations if the primary query refers to
+    that <literal>WITH</literal> query more than once; but if each such use
+    requires only a few rows of the <literal>WITH</literal> query's total
+    output, <literal>NOT MATERIALIZED</literal> can provide a net savings by
+    allowing the queries to be optimized jointly.
+    <literal>NOT MATERIALIZED</literal> is ignored if it is attached to
+    a <literal>WITH</literal> query that is recursive or is not
+    side-effect-free (i.e., is not a plain <literal>SELECT</literal>
+    containing no volatile functions).
+   </para>
+
+   <para>
+    By default, a side-effect-free <literal>WITH</literal> query is folded
+    into the primary query if it is used exactly once in the primary
+    query's <literal>FROM</literal> clause.  This allows joint optimization
+    of the two query levels in situations where that should be semantically
+    invisible.  However, such folding can be prevented by marking the
+    <literal>WITH</literal> query as <literal>MATERIALIZED</literal>.
+    That might be useful, for example, if the <literal>WITH</literal> query
+    is being used as an optimization fence to prevent the planner from
+    choosing a bad plan.
+    <productname>PostgreSQL</productname> versions before v12 never did
+    such folding, so queries written for older versions might rely on
+    <literal>WITH</literal> to act as an optimization fence.
    </para>
 
    <para>
@@ -2087,6 +2120,12 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
    <para>
     <literal>ROWS FROM( ... )</literal> is an extension of the SQL standard.
    </para>
+
+   <para>
+    The <literal>MATERIALIZED</literal> and <literal>NOT
+    MATERIALIZED</literal> options of <literal>WITH</literal> are extensions
+    of the SQL standard.
+   </para>
   </refsect2>
 
  </refsect1>

diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
@@ -2536,6 +2536,7 @@ _copyCommonTableExpr(const CommonTableExpr *from)
 
 	COPY_STRING_FIELD(ctename);
 	COPY_NODE_FIELD(aliascolnames);
+	COPY_SCALAR_FIELD(ctematerialized);
 	COPY_NODE_FIELD(ctequery);
 	COPY_LOCATION_FIELD(location);
 	COPY_SCALAR_FIELD(cterecursive);

diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
@@ -2791,6 +2791,7 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
 {
 	COMPARE_STRING_FIELD(ctename);
 	COMPARE_NODE_FIELD(aliascolnames);
+	COMPARE_SCALAR_FIELD(ctematerialized);
 	COMPARE_NODE_FIELD(ctequery);
 	COMPARE_LOCATION_FIELD(location);
 	COMPARE_SCALAR_FIELD(cterecursive);

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
@@ -2989,6 +2989,7 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
 
 	WRITE_STRING_FIELD(ctename);
 	WRITE_NODE_FIELD(aliascolnames);
+	WRITE_ENUM_FIELD(ctematerialized, CTEMaterialize);
 	WRITE_NODE_FIELD(ctequery);
 	WRITE_LOCATION_FIELD(location);
 	WRITE_BOOL_FIELD(cterecursive);

diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
@@ -418,6 +418,7 @@ _readCommonTableExpr(void)
 
 	READ_STRING_FIELD(ctename);
 	READ_NODE_FIELD(aliascolnames);
+	READ_ENUM_FIELD(ctematerialized, CTEMaterialize);
 	READ_NODE_FIELD(ctequery);
 	READ_LOCATION_FIELD(location);
 	READ_BOOL_FIELD(cterecursive);

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
@@ -646,8 +646,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	root->partColsUpdated = false;
 
 	/*
-	 * If there is a WITH list, process each WITH query and build an initplan
-	 * SubPlan structure for it.
+	 * If there is a WITH list, process each WITH query and either convert it
+	 * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
 	 */
 	if (parse->cteList)
 		SS_process_ctes(root);