Skip to content

Commit

Permalink
Fix ORDER BY on certain GROUPING SETS. (apache#16268)
Browse files Browse the repository at this point in the history
* Fix ORDER BY on certain GROUPING SETS.

DefaultLimitSpec (part of native groupBy) had a bug where it would assume
that results are naturally ordered by dimensions even when subtotalsSpec
is present. However, this is not necessarily the case. For certain
combinations of ORDER BY and GROUPING SETS, this would cause the ORDER BY
to be ignored.

* Fix test testGroupByWithSubtotalsSpecWithOrderLimitForcePushdown. Resorting was necessary.
  • Loading branch information
gianm committed Apr 12, 2024
1 parent 7f06a53 commit b0c5184
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,12 @@ public Function<Sequence<ResultRow>, Sequence<ResultRow>> build(final GroupByQue
{
final List<DimensionSpec> dimensions = query.getDimensions();

// Can avoid re-sorting if the natural ordering is good enough.
boolean sortingNeeded = dimensions.size() < columns.size();
// Can avoid re-sorting if the natural ordering is good enough. Set sortingNeeded to true if we definitely must
// sort, due to query dimensions list being shorter than the sort columns list, or due to having subtotalsSpec
// (when subtotalsSpec is set, dimensions are not naturally sorted).
//
// If sortingNeeded is false here, we may set it to true later on in this method. False is just a starting point.
boolean sortingNeeded = dimensions.size() < columns.size() || query.getSubtotalsSpec() != null;

final Set<String> aggAndPostAggNames = new HashSet<>();
for (AggregatorFactory agg : query.getAggregatorSpecs()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8204,16 +8204,16 @@ public void testGroupByWithSubtotalsSpecWithOrderLimitForcePushdown()
.build();

List<ResultRow> expectedResults = Arrays.asList(
makeRow(query, "2011-04-01", "placement", "preferred", "market", null, "rows", 13L, "idx", 6619L),
makeRow(query, "2011-04-02", "placement", "preferred", "market", null, "rows", 13L, "idx", 5827L),
makeRow(query, "2011-04-01", "placement", null, "market", null, "rows", 13L, "idx", 6619L),
makeRow(query, "2011-04-01", "placement", null, "market", "spot", "rows", 9L, "idx", 1102L),
makeRow(query, "2011-04-01", "placement", null, "market", "total_market", "rows", 2L, "idx", 2836L),
makeRow(query, "2011-04-01", "placement", null, "market", "upfront", "rows", 2L, "idx", 2681L),
makeRow(query, "2011-04-01", "placement", "preferred", "market", null, "rows", 13L, "idx", 6619L),
makeRow(query, "2011-04-02", "placement", null, "market", null, "rows", 13L, "idx", 5827L),
makeRow(query, "2011-04-02", "placement", null, "market", "spot", "rows", 9L, "idx", 1120L),
makeRow(query, "2011-04-02", "placement", null, "market", "total_market", "rows", 2L, "idx", 2514L),
makeRow(query, "2011-04-02", "placement", null, "market", "upfront", "rows", 2L, "idx", 2193L),
makeRow(query, "2011-04-01", "placement", null, "market", null, "rows", 13L, "idx", 6619L),
makeRow(query, "2011-04-02", "placement", null, "market", null, "rows", 13L, "idx", 5827L)
makeRow(query, "2011-04-02", "placement", "preferred", "market", null, "rows", 13L, "idx", 5827L)
);

Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,80 @@ public void testWithSortByDimsFirst()
);
}

@Test
public void testSortByDimNullSubtotals()
{
DefaultLimitSpec limitSpec = new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec("k1", OrderByColumnSpec.Direction.ASCENDING, StringComparators.NUMERIC)),
null
);

Function<Sequence<ResultRow>, Sequence<ResultRow>> limitFn = limitSpec.build(
GroupByQuery.builder()
.setDataSource("dummy")
.setInterval("1000/3000")
.setDimensions(new DefaultDimensionSpec("k1", "k1", ColumnType.DOUBLE))
.setGranularity(Granularities.ALL)
.build()
);

// No sorting, because the limit spec thinks sorting isn't necessary (it expects results naturally ordered on k1)
Assert.assertEquals(
testRowsList,
limitFn.apply(Sequences.simple(testRowsList)).toList()
);
}

@Test
public void testSortByDimEmptySubtotals()
{
DefaultLimitSpec limitSpec = new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec("k1", OrderByColumnSpec.Direction.ASCENDING, StringComparators.NUMERIC)),
null
);

Function<Sequence<ResultRow>, Sequence<ResultRow>> limitFn = limitSpec.build(
GroupByQuery.builder()
.setDataSource("dummy")
.setInterval("1000/3000")
.setDimensions(new DefaultDimensionSpec("k1", "k1", ColumnType.DOUBLE))
.setGranularity(Granularities.ALL)
.setSubtotalsSpec(ImmutableList.of())
.build()
);

// limit spec sorts rows because subtotalsSpec is set. (Otherwise, it wouldn't; see testSortByDimNullSubtotals.)
Assert.assertEquals(
ImmutableList.of(testRowsList.get(2), testRowsList.get(0), testRowsList.get(1)),
limitFn.apply(Sequences.simple(testRowsList)).toList()
);
}

@Test
public void testSortByDimSomeSubtotals()
{
DefaultLimitSpec limitSpec = new DefaultLimitSpec(
ImmutableList.of(new OrderByColumnSpec("k1", OrderByColumnSpec.Direction.ASCENDING, StringComparators.NUMERIC)),
null
);

Function<Sequence<ResultRow>, Sequence<ResultRow>> limitFn = limitSpec.build(
GroupByQuery.builder()
.setDataSource("dummy")
.setInterval("1000/3000")
.setDimensions(new DefaultDimensionSpec("k1", "k1", ColumnType.DOUBLE))
.setGranularity(Granularities.ALL)
.setSubtotalsSpec(ImmutableList.of(ImmutableList.of("k1"), ImmutableList.of()))
.build()
);

// limit spec sorts rows because subtotalsSpec is set. (Otherwise, it wouldn't; see testSortByDimNullSubtotals.)
Assert.assertEquals(
ImmutableList.of(testRowsList.get(2), testRowsList.get(0), testRowsList.get(1)),
limitFn.apply(Sequences.simple(testRowsList)).toList()
);
}

@Test
public void testSortDimensionDescending()
{
Expand Down

0 comments on commit b0c5184

Please sign in to comment.