Permalink
Browse files

Add `countDefined` operator:

Following on from #1239, which actually changed `.length` semantics for
all queries, not only those of a single column as documented, we add a
new `.countDefined` operator that performs a count of a single column,
omitting NULL rows.

As revealed by the new test cases, 3VL semantics for aggregation
operators were broken in QueryInterpreter. This is fixed, too.

Tests in AggregateTest. Fixes #1285.
  • Loading branch information...
szeiger committed Sep 22, 2015
1 parent 079c110 commit 7199b510bc7078b3ed274fd1b4cdcb76270616a1
@@ -14,13 +14,13 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
}
val ts = TableQuery[T]
def q1(i: Int) = for { t <- ts if t.a === i } yield t
def q2(i: Int) = (q1(i).length, q1(i).map(_.a).sum, q1(i).map(_.b).sum, q1(i).map(_.b).avg)
def q2(i: Int) = (q1(i).length, q1(i).map(_.b).length, q1(i).map(_.b).countDefined, q1(i).map(_.a).sum, q1(i).map(_.b).sum, q1(i).map(_.b).avg)
val q2_0 = q2(0).shaped
val q2_1 = q2(1).shaped
ts.schema.create >>
(ts ++= Seq((1, Some(1)), (1, Some(2)), (1, Some(3)))) >>
q2_0.result.map(_ shouldBe (0, None, None, None)) >>
q2_1.result.map(_ shouldBe (3, Some(3), Some(6), Some(2)))
(ts ++= Seq((1, Some(1)), (1, Some(3)), (1, None))) >>
q2_0.result.map(_ shouldBe (0, 0, 0, None, None, None)) >>
q2_1.result.map(_ shouldBe (3, 3, 2, Some(3), Some(4), Some(2)))
}
def testGroupBy = {
@@ -97,9 +97,9 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
val q6 = ((for {
(u, t) <- us joinLeft ts on (_.id === _.a)
} yield (u, t)).groupBy(_._1.id).map {
case (id, q) => (id, q.length, q.map(_._1).length, q.map(_._2).length)
case (id, q) => (id, q.length, q.map(_._1).length, q.map(_._2).length, q.map(_._2.map(_.a)).length, q.map(_._2.map(_.a)).countDefined)
}).to[Set]
db.run(mark("q6", q6.result)).map(_ shouldBe Set((1, 3, 3, 3), (2, 3, 3, 3), (3, 2, 2, 2), (4, 1, 1, 1)))
db.run(mark("q6", q6.result)).map(_ shouldBe Set((1, 3, 3, 3, 3, 3), (2, 3, 3, 3, 3, 3), (3, 2, 2, 2, 2, 2), (4, 1, 1, 1, 1, 0)))
}.flatMap { _ =>
val q7 = ts.groupBy(_.a).map { case (a, ts) =>
(a, ts.map(_.b).sum, ts.map(_.b).min, ts.map(_.b).max, ts.map(_.b).avg)
@@ -184,7 +184,7 @@ class AggregateTest extends AsyncTest[RelationalTestDB] {
DBIO.seq(
as.schema.create,
as += 1,
q1.result
q1.result.map(_ shouldBe Seq((Some(1), 1)))
)
}
@@ -26,6 +26,10 @@ class OptimizeScalar extends Phase {
if v == v2 && (z == null || z == None) =>
v
// if(!false) v else _
case n @ IfThenElse(ConstArray(Library.Not(LiteralNode(false)), v, _)) =>
v
// Redundant cast to non-nullable within OptionApply
case o @ OptionApply(Library.SilentCast(n)) if o.nodeType == n.nodeType => n
@@ -155,10 +155,21 @@ final class AnyExtensionMethods(val n: Node) extends AnyVal {
/** Extension methods for Queries of a single column */
final class SingleColumnQueryExtensionMethods[B1, P1, C[_]](val q: Query[Rep[P1], _, C]) extends AnyVal {
type OptionTM = TypedType[Option[B1]]
/** Compute the minimum value of a single-column Query, or `None` if the Query is empty */
def min(implicit tm: OptionTM) = Library.Min.column[Option[B1]](q.toNode)
/** Compute the maximum value of a single-column Query, or `None` if the Query is empty */
def max(implicit tm: OptionTM) = Library.Max.column[Option[B1]](q.toNode)
/** Compute the average of a single-column Query, or `None` if the Query is empty */
def avg(implicit tm: OptionTM) = Library.Avg.column[Option[B1]](q.toNode)
/** Compute the sum of a single-column Query, or `None` if the Query is empty */
def sum(implicit tm: OptionTM) = Library.Sum.column[Option[B1]](q.toNode)
/** Count the number of `Some` elements of a single-column Query. */
def countDefined(implicit ev: P1 <:< Option[_]) = Library.Count.column[Int](q.toNode)
}
/** Extension methods for Options of single- and multi-column values */
@@ -282,17 +282,17 @@ class QueryInterpreter(db: HeapBackend#Database, params: Any) extends Logging {
case t: ScalaOptionType[_] => (t.elementType.asInstanceOf[ScalaNumericType[Any]].numeric, true)
case t => (t.asInstanceOf[ScalaNumericType[Any]].numeric, false)
}
reduceOptionIt(it, opt, (a, b) => num.plus(a, b))
reduceOptionIt[Any](it, opt, identity, (a, b) => num.plus(a, b))
case Library.Avg(ch) =>
val coll = run(ch).asInstanceOf[Coll]
val (it, itType) = unwrapSingleColumn(coll, ch.nodeType)
val (num, opt) = itType match {
case t: ScalaOptionType[_] => (t.elementType.asInstanceOf[ScalaNumericType[Any]].numeric, true)
case t => (t.asInstanceOf[ScalaNumericType[Any]].numeric, false)
}
reduceOptionIt(it, opt, (a, b) => num.plus(a, b)).map { sum =>
if(num.isInstanceOf[Fractional[_]]) num.asInstanceOf[Fractional[Any]].div(sum, num.fromInt(coll.size))
else num.fromInt(num.toInt(sum) / coll.size)
reduceOptionIt[(Int, Any)](it, opt, (1, _), { case ((ai, a), (bi, b)) => (ai + bi, num.plus(a, b)) }).map { case (count, sum) =>
if(num.isInstanceOf[Fractional[_]]) num.asInstanceOf[Fractional[Any]].div(sum, num.fromInt(count))
else num.fromInt(num.toInt(sum) / count)
}
case Library.Min(ch) =>
val coll = run(ch).asInstanceOf[Coll]
@@ -301,15 +301,15 @@ class QueryInterpreter(db: HeapBackend#Database, params: Any) extends Logging {
case t: ScalaOptionType[_] => (t.elementType.asInstanceOf[ScalaBaseType[Any]].ordering, true)
case t => (t.asInstanceOf[ScalaBaseType[Any]].ordering, false)
}
reduceOptionIt(it, opt, (a, b) => if(ord.lt(b, a)) b else a)
reduceOptionIt[Any](it, opt, identity, (a, b) => if(ord.lt(b, a)) b else a)
case Library.Max(ch) =>
val coll = run(ch).asInstanceOf[Coll]
val (it, itType) = unwrapSingleColumn(coll, ch.nodeType)
val (ord, opt) = itType match {
case t: ScalaOptionType[_] => (t.elementType.asInstanceOf[ScalaBaseType[Any]].ordering, true)
case t => (t.asInstanceOf[ScalaBaseType[Any]].ordering, false)
}
reduceOptionIt(it, opt, (a, b) => if(ord.gt(b, a)) b else a)
reduceOptionIt[Any](it, opt, identity, (a, b) => if(ord.gt(b, a)) b else a)
case Library.==(ch, LiteralNode(null)) =>
val chV = run(ch)
chV == null || chV.asInstanceOf[Option[_]].isEmpty
@@ -425,12 +425,20 @@ class QueryInterpreter(db: HeapBackend#Database, params: Any) extends Logging {
case t => (coll.iterator, t)
}
def reduceOptionIt(it: Iterator[Any], opt: Boolean, f: (Any, Any) => Any): Option[Any] = {
def reduceOptionIt[T](it: Iterator[Any], opt: Boolean, map: Any => T, reduce: (T, T) => T): Option[T] = {
if(!it.hasNext) None
else if(opt) it.reduceLeft { (z, b) =>
for(z <- z.asInstanceOf[Option[Any]]; b <- b.asInstanceOf[Option[Any]]) yield f(z, b)
}.asInstanceOf[Option[Any]]
else Some(it.reduceLeft { (z, b) => f(z, b) })
else {
val it2 = if(opt) it.collect { case Some(b) => b} else it
var res: T = null.asInstanceOf[T]
var first = true
it2.foreach { b =>
if(first) {
first = false
res = map(b)
} else res = reduce(res, map(b))
}
if(first) None else Some(res)
}
}
def createNullRow(tpe: Type): Any = tpe match {
@@ -58,13 +58,18 @@ was moved from package ``slick.jdbc`` to ``slick.jdbc.hikaricp``.
Counting Option columns
-----------------------
Counting any multi-column collection with `.length` now ignores nullability of the columns. The previous
approach of picking a random column led to inconsistent results. This is particularly relevant when you
try to count one side of an outer join. Up to Slick 3.0 the goal (although not achieved in all cases due
to a design problem) was not to include non-matching rows in the total (equivalent to counting the
discriminator column only). This does not make sense anymore for the new outer join operators (introduced
in 3.0) with correct `Option` types. The new semantics are identical to those of Scala collections.
Semantics for counts of single columns remain unchanged.
Counting collection-valued queries with ``.length`` now ignores nullability of the columns, i.e. it
is equivalent to ``COUNT(*)`` in SQL, no matter what is being counted. The previous approach of
picking a random column led to inconsistent results. This is particularly relevant when you try to
count one side of an outer join. Up to Slick 3.0 the goal (although not achieved in all cases due
to a design problem) was not to include non-matching rows in the total (equivalent to counting only
the discriminator column). This does not make sense anymore for the new outer join operators
(introduced in 3.0) with correct ``Option`` types. The new semantics are identical to those of
Scala collections.
There is a new operator ``.countDefined`` for counting only the defined / matching (i.e. non-NULL
in SQL) rows. To avoid any ambiguities in the definition, it is only available for
collection-valued queries of a single column with an ``Option`` type.
Default String type on MySQL
----------------------------

0 comments on commit 7199b51

Please sign in to comment.