Permalink
Browse files

Split array_length from length (which is for strings) and implemented…

… in mimir
  • Loading branch information...
djspiewak committed Sep 17, 2018
1 parent 6134140 commit 309c6fa441219d7a26e192b2d5f0ccb7ed420eb6
@@ -16,12 +16,12 @@
package quasar.std
import quasar.{BinaryFunc, Mapping}
import quasar.{UnaryFunc, Mapping}
trait ArrayLib extends Library {
val ArrayLength = BinaryFunc(
val ArrayLength = UnaryFunc(
Mapping,
"Gets the length of a given dimension of an array.",
"Gets the length an array.",
noSimplification)
}
@@ -188,6 +188,34 @@ abstract class StdLibSpec extends Qspec {
}
}
"ArrayLib" >> {
import ArrayLib._
"ArrayLength" >> {
"empty" >> {
unary(ArrayLength(_).embed, Data.Arr(Nil), Data.Int(0))
}
"singleton" >> {
unary(ArrayLength(_).embed, Data.Arr(List(Data.Int(42))), Data.Int(1))
}
"three things" >> {
unary(
ArrayLength(_).embed,
Data.Arr(List(Data.Int(5), Data.Int(6), Data.Int(7))),
Data.Int(3))
}
"undefined" >> {
unary(
ArrayLength(_).embed,
Data.NA,
Data.NA)
}
}
}
"StringLib" >> {
import StringLib._
@@ -1531,7 +1559,7 @@ abstract class StdLibSpec extends Qspec {
"arbitrary double" >> prop { (d: Double) =>
val n = BigDecimal(d)
val data =
// testing with the generated double if it is not a Long or
// testing with the generated double if it is not a Long or
// if it is an exact double
if ((d != d.toLong) || (n.isExactDouble)) Data.Dec(n)
// .. but if it is a Long but not an exact double then we test with
@@ -140,13 +140,13 @@ class MimirStdLibSpec extends StdLibSpec with PrecogCake {
val result = actual(table)
lazy val primary = ((result must haveSize(1)) and
lazy val primary = ((result must haveSize(0)) or
(result.head must beCloseTo(expected)))
lazy val fallback = ((result must haveSize(0))) and
(expected mustEqual Data.NA)
lazy val fallback = (expected mustNotEqual Data.NA) or
((result must haveSize(0)))
(primary or fallback).toResult
(primary and fallback).toResult
}
}
@@ -56,6 +56,9 @@ final class MapFuncCorePlanner[T[_[_]]: RecursiveT, F[_]: Applicative]
case MapFuncsCore.JoinSideName(_) => errorImpossible // should never be received
case MapFuncsCore.ArrayLength(a1) =>
(ArrayLength[A](a1): TransSpec[A]).point[F]
case MapFuncsCore.Length(a1) => length.spec(a1).point[F]
case MapFuncsCore.ExtractCentury(a1) =>
@@ -34,6 +34,7 @@ object MapFunc {
def translateUnaryMapping[T[_[_]], MF[a] <: ACopK[a], A]
(implicit MFC: MapFuncCore[T, ?] :<<: MF, MFD: MapFuncDerived[T, ?] :<<: MF)
: scala.PartialFunction[UnaryFunc, A => MF[A]] = {
case array.ArrayLength => a => MFC(C.ArrayLength(a))
case date.ExtractCentury => a => MFC(C.ExtractCentury(a))
case date.ExtractDayOfMonth => a => MFC(C.ExtractDayOfMonth(a))
case date.ExtractDecade => a => MFC(C.ExtractDecade(a))
@@ -92,7 +93,6 @@ object MapFunc {
: scala.PartialFunction[BinaryFunc, (A, A) => MF[A]] = {
// NB: ArrayLength takes 2 params because of SQL, but we really don’t care
// about the second. And it shouldn’t even have two in LP.
case array.ArrayLength => (a1, a2) => MFC(C.Length(a1))
case date.SetTimeZone => (a1, a2) => MFC(C.SetTimeZone(a1, a2))
case date.SetTimeZoneHour => (a1, a2) => MFC(C.SetTimeZoneHour(a1, a2))
case date.SetTimeZoneMinute => (a1, a2) => MFC(C.SetTimeZoneMinute(a1, a2))
@@ -516,6 +516,7 @@ object MapFuncCore {
case TypeOf(a1) => f(a1) ∘ (TypeOf(_))
case Negate(a1) => f(a1) ∘ (Negate(_))
case Not(a1) => f(a1) ∘ (Not(_))
case ArrayLength(a1) => f(a1) ∘ (ArrayLength(_))
case Length(a1) => f(a1) ∘ (Length(_))
case Lower(a1) => f(a1) ∘ (Lower(_))
case Upper(a1) => f(a1) ∘ (Upper(_))
@@ -618,6 +619,7 @@ object MapFuncCore {
case (TypeOf(a1), TypeOf(b1)) => in.equal(a1, b1)
case (Negate(a1), Negate(b1)) => in.equal(a1, b1)
case (Not(a1), Not(b1)) => in.equal(a1, b1)
case (ArrayLength(a1), ArrayLength(b1)) => in.equal(a1, b1)
case (Length(a1), Length(b1)) => in.equal(a1, b1)
case (Lower(a1), Lower(b1)) => in.equal(a1, b1)
case (Upper(a1), Upper(b1)) => in.equal(a1, b1)
@@ -724,6 +726,7 @@ object MapFuncCore {
case TypeOf(a1) => shz("TypeOf", a1)
case Negate(a1) => shz("Negate", a1)
case Not(a1) => shz("Not", a1)
case ArrayLength(a1) => shz("ArrayLength", a1)
case Length(a1) => shz("Length", a1)
case Lower(a1) => shz("Lower", a1)
case Upper(a1) => shz("Upper", a1)
@@ -839,6 +842,7 @@ object MapFuncCore {
case TypeOf(a1) => nAry("TypeOf", a1)
case Negate(a1) => nAry("Negate", a1)
case Not(a1) => nAry("Not", a1)
case ArrayLength(a1) => nAry("ArrayLength", a1)
case Length(a1) => nAry("Length", a1)
case Lower(a1) => nAry("Lower", a1)
case Upper(a1) => nAry("Upper", a1)
@@ -913,6 +917,9 @@ object MapFuncsCore {
@Lenses final case class JoinSideName[T[_[_]], A](name: Symbol) extends Nullary[T, A]
// array
@Lenses final case class ArrayLength[T[_[_]], A](a1: A) extends Unary[T, A]
// string
@Lenses final case class Length[T[_[_]], A](a1: A) extends Unary[T, A]
// date
@@ -113,7 +113,7 @@ object RenderQScriptDSL {
}
}
}
def ejsonRenderQScriptDSLDelay: Delay[RenderQScriptDSL, EJson] = new Delay[RenderQScriptDSL, EJson] {
def apply[A](fa: RenderQScriptDSL[A]): RenderQScriptDSL[EJson[A]] = {
(base: String, a: EJson[A]) =>
@@ -225,6 +225,7 @@ object RenderQScriptDSL {
case TypeOf(a1) => ("TypeOf", (fa(base, a1).right :: Nil).some)
case Negate(a1) => ("Negate", (fa(base, a1).right :: Nil).some)
case Not(a1) => ("Not", (fa(base, a1).right :: Nil).some)
case ArrayLength(a1) => ("ArrayLength", (fa(base, a1).right :: Nil).some)
case Length(a1) => ("Length", (fa(base, a1).right :: Nil).some)
case Lower(a1) => ("Lower", (fa(base, a1).right :: Nil).some)
case Upper(a1) => ("Upper", (fa(base, a1).right :: Nil).some)
@@ -119,6 +119,9 @@ trait TransSpecModule {
// this has to be primitive because of how nutso equality is
case class Within[+A <: SourceType](item: TransSpec[A], in: TransSpec[A]) extends TransSpec[A]
// deals with arrays in a non-trivial way
case class ArrayLength[+A <: SourceType](source: TransSpec[A]) extends TransSpec[A]
// this has to be primitive because it produces an array
case class Range[+A <: SourceType](lower: TransSpec[A], upper: TransSpec[A]) extends TransSpec[A]
@@ -215,6 +218,7 @@ trait TransSpecModule {
case trans.EqualLiteral(source, value, invert) => trans.EqualLiteral(mapSources(source)(f), value, invert)
case trans.Within(item, in) => trans.Within(mapSources(item)(f), mapSources(in)(f))
case trans.ArrayLength(source) => trans.ArrayLength(mapSources(source)(f))
case trans.Range(upper, lower) => trans.Range(mapSources(upper)(f), mapSources(lower)(f))
case trans.Cond(pred, left, right) => trans.Cond(mapSources(pred)(f), mapSources(left)(f), mapSources(right)(f))
@@ -269,6 +273,7 @@ trait TransSpecModule {
case trans.EqualLiteral(source, value, invert) => trans.EqualLiteral(deepMap(source)(f), value, invert)
case trans.Within(item, in) => trans.Within(deepMap(item)(f), deepMap(in)(f))
case trans.ArrayLength(source) => trans.ArrayLength(deepMap(source)(f))
case trans.Range(upper, lower) => trans.Range(deepMap(upper)(f), deepMap(lower)(f))
case trans.Cond(pred, left, right) => trans.Cond(deepMap(pred)(f), deepMap(left)(f), deepMap(right)(f))
@@ -380,6 +385,7 @@ trait TransSpecModule {
case Equal(f, s) => Equal(normalize(f, undef), normalize(s, undef))
case EqualLiteral(f, v, i) => EqualLiteral(normalize(f, undef), v, i)
case Within(item, in) => Within(normalize(item, undef), normalize(in, undef))
case ArrayLength(source) => ArrayLength(normalize(source, undef))
case Range(upper, lower) => Range(normalize(upper, undef), normalize(lower, undef))
case Cond(p, l, r) => Cond(normalize(p, undef), normalize(l, undef), normalize(r, undef))
case Filter(s, t) => Filter(normalize(s, undef), normalize(t, undef))
@@ -455,6 +461,7 @@ trait TransSpecModule {
case Equal(f, s) => paths(f) ++ paths(s) + r
case EqualLiteral(s, _, _) => paths(s) + r
case Within(item, in) => paths(item) ++ paths(in) + r
case ArrayLength(source) => paths(source) + r
case Range(upper, lower) => paths(upper) ++ paths(lower) + r
case Filter(f, p) => paths(f) ++ paths(p) + r
case FilterDefined(s, p, _) => paths(s) ++ paths(p) + r
@@ -490,6 +490,38 @@ abstract class Slice { source =>
Slice (size, columns)
}
def arrayLength: Slice = {
val emptyCol = columns.get(ColumnRef(CPath.Identity, CEmptyArray))
val emptyBS = emptyCol.map(_.definedAt(0, size)).getOrElse(new BitSet)
val results = new Array[Long](size)
val resultsDefined = emptyBS // start by defining everywhere that we're empty
val bitsetIndexes = columns collect {
case (ColumnRef(CPath(CPathIndex(i), _*), _), col) =>
(i -> col.definedAt(0, size))
}
val collapsed = bitsetIndexes.groupBy(_._1).toList map {
case (i, cols) =>
i -> cols.map(_._2).reduceOption(_ | _).getOrElse(new BitSet)
}
val reversed = collapsed.sortWith(_._1 > _._1)
val mask = collapsed.map(_._2).reduceOption(_ | _).getOrElse(new BitSet)
Loop.range(0, size) { row =>
if (!emptyBS(row) && mask(row)) {
// this could be less naive on performance, I think...
results(row) = reversed.find(_._2(row)).get._1 + 1 // the + 1 is because arrays are zero indexed
resultsDefined.set(row)
}
}
val col = new ArrayLongColumn(resultsDefined, results)
Slice(size, Map(ColumnRef(CPath.Identity, CLong) -> col))
}
def toNumber: Slice = {
val size = source.size
val columns = source.columns.get(ColumnRef(CPath.Identity, CString)) match {
@@ -399,6 +399,9 @@ trait SliceTransforms extends TableModule with ColumnarTableTypes with ObjectCon
Map(ColumnRef(CPath.Identity, CBoolean) -> results))
}
case ArrayLength(source) =>
composeSliceTransform2(source).map(_.arrayLength)
case Range(lower, upper) =>
composeSliceTransform2(upper).zip(composeSliceTransform2(lower)) { (upperS, lowerS) =>
val upperColumns: Map[ColumnRef, Column] = upperS.materialized.columns

0 comments on commit 309c6fa

Please sign in to comment.