Skip to content

Commit

Permalink
Support hashed aggregation with grouping sets.
Browse files Browse the repository at this point in the history
This extends the Aggregate node with two new features: HashAggregate
can now run multiple hashtables concurrently, and a new strategy
MixedAggregate populates hashtables while doing sorted grouping.

The planner will now attempt to save as many sorts as possible when
planning grouping sets queries, while not exceeding work_mem for the
estimated combined sizes of all hashtables used.  No SQL-level changes
are required.  There should be no user-visible impact other than the
new EXPLAIN output and possible changes to result ordering when ORDER
BY was not used (which affected a few regression tests).  The
enable_hashagg option is respected.

Author: Andrew Gierth
Reviewers: Mark Dilger, Andres Freund
Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
  • Loading branch information
RhodiumToad committed Mar 27, 2017
1 parent f0a6046 commit b563594
Show file tree
Hide file tree
Showing 22 changed files with 2,544 additions and 594 deletions.
61 changes: 33 additions & 28 deletions contrib/postgres_fdw/expected/postgres_fdw.out
Original file line number Diff line number Diff line change
Expand Up @@ -3276,16 +3276,19 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2.
-- Grouping sets
explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
QUERY PLAN
---------------------------------------------------------------------------------------------------
GroupAggregate
Output: c2, sum(c1)
Group Key: ft1.c2
Group Key: ()
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
(7 rows)
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: c2, (sum(c1))
Sort Key: ft1.c2
-> MixedAggregate
Output: c2, sum(c1)
Hash Key: ft1.c2
Group Key: ()
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(10 rows)

select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
c2 | sum
Expand All @@ -3298,16 +3301,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la

explain (verbose, costs off)
select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
QUERY PLAN
---------------------------------------------------------------------------------------------------
GroupAggregate
Output: c2, sum(c1)
Group Key: ft1.c2
Group Key: ()
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
(7 rows)
QUERY PLAN
------------------------------------------------------------------------------
Sort
Output: c2, (sum(c1))
Sort Key: ft1.c2
-> MixedAggregate
Output: c2, sum(c1)
Hash Key: ft1.c2
Group Key: ()
-> Foreign Scan on public.ft1
Output: c2, c1
Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
(10 rows)

select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
c2 | sum
Expand All @@ -3320,20 +3326,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last

explain (verbose, costs off)
select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------
Sort
Output: c2, c6, (sum(c1))
Sort Key: ft1.c2, ft1.c6
-> GroupAggregate
-> HashAggregate
Output: c2, c6, sum(c1)
Group Key: ft1.c2
Sort Key: ft1.c6
Group Key: ft1.c6
Hash Key: ft1.c2
Hash Key: ft1.c6
-> Foreign Scan on public.ft1
Output: c2, c6, c1
Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST
(11 rows)
Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
(10 rows)

select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
c2 | c6 | sum
Expand Down
25 changes: 21 additions & 4 deletions src/backend/commands/explain.c
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
pname = "HashAggregate";
strategy = "Hashed";
break;
case AGG_MIXED:
pname = "MixedAggregate";
strategy = "Mixed";
break;
default:
pname = "Aggregate ???";
strategy = "???";
Expand Down Expand Up @@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate,
ListCell *lc;
List *gsets = aggnode->groupingSets;
AttrNumber *keycols = aggnode->grpColIdx;
const char *keyname;
const char *keysetname;

if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED)
{
keyname = "Hash Key";
keysetname = "Hash Keys";
}
else
{
keyname = "Group Key";
keysetname = "Group Keys";
}

ExplainOpenGroup("Grouping Set", NULL, true, es);

Expand All @@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate,
es->indent++;
}

ExplainOpenGroup("Group Keys", "Group Keys", false, es);
ExplainOpenGroup(keysetname, keysetname, false, es);

foreach(lc, gsets)
{
Expand All @@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate,
}

if (!result && es->format == EXPLAIN_FORMAT_TEXT)
ExplainPropertyText("Group Key", "()", es);
ExplainPropertyText(keyname, "()", es);
else
ExplainPropertyListNested("Group Key", result, es);
ExplainPropertyListNested(keyname, result, es);
}

ExplainCloseGroup("Group Keys", "Group Keys", false, es);
ExplainCloseGroup(keysetname, keysetname, false, es);

if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
es->indent--;
Expand Down
Loading

0 comments on commit b563594

Please sign in to comment.