Permalink
Browse files

Merge `LIMIT` on top of `DISTINCT`

On databases which support `DISTINCT ON` (like PostgreSQL) we skip the
`rewriteDistinct` phase and the queries already collapse nicely to a
single comprehension. In many cases the `DISTINCT ON` can still be
rewritten to a `DISTINCT` later in the code generator (if the columns
match up). On databases that do not support `DISTINCT ON` (like MySQL)
we have to eliminate these operations early on (in `rewriteDistinct`)
and replace them by either `DISTINCT` (where possible) or `GROUP BY`.
The catch is that we have to inject an artificial subquery boundary on
top of a `DISTINCT` to prevent mappings from being applied across the
`DISTINCT` (which could change the set of columns that determine
distinctness).

This subquery boundary then prevents the `Take` operation from being
merged into the existing comprehension in `mergeComprehensions`. The
solution is to push the always distinctness-preserving operations `Take`
and `Drop` down under `Subquery.AboveDistinct` in `reorderOperations`.

The test case (`q7` in `AggregateTest.testDistinct`) also triggers a bug
when running on the special H2Rownum test profile: When
`resolveZipJoins` uses `rownumStyle=true` you can end up with a
`Subquery.BelowRownum` boundary between a `Distinct` and its enclosing
`Bind`, in which case `rewriteDistinct` doesn't perform the rewriting.
The solution is to ensure that there is always a `Bind` below the
boundary in `resolveZipJoins` and create an identity `Bind` where
necessary to preserve this invariant (which should hold in all phases
between `forceOuterBinds` and `mergeToComprehensions`).
  • Loading branch information...
1 parent fc04186 commit 5cfbd2ef0d11cf0f331d124246bf1f7f6ea1cd25 @szeiger szeiger committed Nov 15, 2016
@@ -329,6 +329,7 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
val q5b = as.distinct.map(_.id)
val q5c = as.distinct.map(a => (a.id, a.a))
val q6 = as.distinct.length
+ val q7 = as.map(a => (a.a, a.b)).distinct.take(10)
if(tdb.driver == H2Driver) {
assertNesting(q1a, 1)
@@ -338,6 +339,7 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
assertNesting(q5a, 1)
assertNesting(q5b, 1)
assertNesting(q5c, 1)
+ assertNesting(q7, 1)
} else if(tdb.driver == PostgresDriver) {
assertNesting(q1a, 1)
assertNesting(q1b, 1)
@@ -346,6 +348,7 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
assertNesting(q5a, 1)
assertNesting(q5b, 1)
assertNesting(q5c, 1)
+ assertNesting(q7, 1)
}
DBIO.seq(
@@ -359,7 +362,8 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
mark("q5a", q5a.result).map(_.sortBy(identity) shouldBe Seq(1, 3)),
mark("q5b", q5b.result).map(_.sortBy(identity) should (r => r == Seq(1, 3) || r == Seq(2, 3))),
mark("q5c", q5c.result).map(_.sortBy(identity) should (r => r == Seq((1, "a"), (3, "c")) || r == Seq((2, "a"), (3, "c")))),
- mark("q6", q6.result).map(_ shouldBe 2)
+ mark("q6", q6.result).map(_ shouldBe 2),
+ mark("q7", q7.result).map(_.sortBy(identity) shouldBe Seq(("a", "a"), ("a", "b"), ("c", "b")))
)
}
}
@@ -3,7 +3,7 @@ package slick.compiler
import slick.ast._
import slick.ast.Util._
import slick.ast.TypeUtil._
-import slick.util.{ConstArray, Ellipsis, ??}
+import slick.util.{ConstArray, Ellipsis}
/** Reorder certain stream operations for more efficient merging in `mergeToComprehensions`. */
class ReorderOperations extends Phase {
@@ -46,11 +46,17 @@ class ReorderOperations extends Phase {
// Remove Subquery boundary on top of TableNode and Join
case Subquery(n @ (_: TableNode | _: Join), _) => n
- // Push distincness-preserving aliasing / literal projection into Subquery.AboveDistinct
+ // Push distinctness-preserving aliasing / literal projection into Subquery.AboveDistinct
case n @ Bind(s, Subquery(from :@ CollectionType(_, tpe), Subquery.AboveDistinct), Pure(StructNode(defs), ts1))
if isAliasingOrLiteral(s, defs) && isDistinctnessPreserving(s, defs, tpe) =>
Subquery(n.copy(from = from), Subquery.AboveDistinct).infer()
+ // Push Take and Drop (always distinctness-preserving) into Subquery.AboveDistinct
+ case Take(Subquery(from, Subquery.AboveDistinct), count) =>
+ Subquery(Take(from, count), Subquery.AboveDistinct).infer()
+ case Drop(Subquery(from, Subquery.AboveDistinct), count) =>
+ Subquery(Drop(from, count), Subquery.AboveDistinct).infer()
+
// Push any aliasing / literal projection into other Subquery
case n @ Bind(s, Subquery(from, cond), Pure(StructNode(defs), ts1)) if cond != Subquery.AboveDistinct && isAliasingOrLiteral(s, defs) =>
Subquery(n.copy(from = from), cond).infer()
@@ -51,7 +51,16 @@ class ResolveZipJoins(rownumStyle: Boolean = false) extends Phase {
val idxExpr =
if(offset == 1L) RowNumber()
else Library.-.typed[Long](RowNumber(), LiteralNode(1L - offset))
- val lbind = Bind(ls, Subquery(from, condBelow), Pure(StructNode(defs :+ (idxSym, idxExpr))))
+ val lbind = from match { // Ensure there is a Bind under the new Subquery, as required after forceOuterBinds
+ case from: Bind => // Already a Bind -> wrap in Subquery
+ Bind(ls, Subquery(from, condBelow), Pure(StructNode(defs :+ (idxSym, idxExpr))))
+ case n => // Other node -> First wrap in identity Bind, then in Subquery
+ val ils = new AnonSymbol
+ val mappings = defs.map(t => (t, new AnonSymbol))
+ val ifrom = Bind(ls, from, Pure(StructNode(mappings.map { case ((_, n), ns) => (ns, n) })))
+ val mappingDefs = mappings.map { case ((s, _), ns) => (s, Select(Ref(ils), ns): Node) }
+ Bind(ils, Subquery(ifrom, condBelow), Pure(StructNode(mappingDefs :+ (idxSym, idxExpr))))
+ }
Bind(s1, Subquery(lbind, condAbove), p.replace {
case Select(Ref(s), ElementSymbol(1)) if s == s1 => Ref(s1)
case Select(Ref(s), ElementSymbol(2)) if s == s1 => Select(Ref(s1), idxSym)

0 comments on commit 5cfbd2e

Please sign in to comment.