Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Range.foreach optimization.

This makes code like

  0 to 100 foreach (x += _)

as fast as (often faster than, in fact) a while loop.  See the
comment in Range for the gory details.  More investigation should
be done regarding total impact on inlining behavior.
Review by @odersky.
  • Loading branch information...
commit 4cfc633fc6cb2ab0f473c2e5141724017d444dc6 1 parent d1e3b46
@paulp paulp authored
View
89 src/library/scala/collection/immutable/Range.scala
@@ -71,18 +71,6 @@ extends collection.AbstractSeq[Int]
def isInclusive = false
- @inline final override def foreach[@specialized(Unit) U](f: Int => U) {
- if (length > 0) {
- val last = this.last
- var i = start
- while (i != last) {
- f(i)
- i += step
- }
- f(i)
- }
- }
-
override def length: Int = numRangeElements
override lazy val last: Int =
if (length == 0) Nil.last
@@ -95,6 +83,83 @@ extends collection.AbstractSeq[Int]
if (idx < 0 || idx >= length) throw new IndexOutOfBoundsException(idx.toString)
locationAfterN(idx)
}
+
+ /** @note Making foreach run as fast as a while loop is a challenge.
+ * The key elements which I can observe making a difference are:
+ *
+ * - the inner loop should be as small as possible
+ * - the inner loop should be monomorphic
+ * - the inner loop should perform no boxing and no avoidable tests
+ *
+ * This is achieved by:
+ *
+ * - keeping initialization logic out of the inner loop
+ * - dispatching to custom variations based on initial conditions
+ * - tricking the compiler into always calling Function1#apply$mcVI$sp
+ *
+ * The last one is important and less than obvious. Even when foreach
+ * was specialized on Unit, only Int => Unit arguments benefited from it.
+ * Other function types would be accepted, but in the absence of full
+ * specialization the integer argument was boxed on every call. For example:
+ *
+ class A {
+ final def f(x: Int): Int = x + 1
+ // Calls Range.foreach, which calls Function1.apply
+ def g1 = 1 until 100 foreach { x => f(x) }
+ // Calls Range.foreach$mVc$sp, which calls Function1.apply$mcVI$sp
+ def g2 = 1 until 100 foreach { x => f(x) ; () }
+ }
+ *
+ * However! Since the result of the closure is always discarded, we
+ * simply cast it to Int => Unit, thereby executing the fast version.
+ * The seemingly looming ClassCastException can never arrive.
+ */
+ @inline final override def foreach[U](f: Int => U) {
+ if (step < 0) {
+ if (isInclusive) foreachDownIn(f.asInstanceOf[Int => Unit])
+ else foreachDownEx(f.asInstanceOf[Int => Unit])
+ }
+ else {
+ if (isInclusive) foreachUpIn(f.asInstanceOf[Int => Unit])
+ else foreachUpEx(f.asInstanceOf[Int => Unit])
+ }
+ }
+
+ /** !!! These methods must be public or they will not be inlined.
+ * But they are certainly not intended to be part of the API.
+ * This collision between inlining requirements and access semantics
+ * is highly unfortunate and must be resolved.
+ *
+ * Proposed band-aid: an @internal annotation.
+ */
+ @inline final def foreachDownIn(f: Int => Unit) {
+ var i = start
+ while (i >= end) {
+ f(i)
+ i += step
+ }
+ }
+ @inline final def foreachUpIn(f: Int => Unit) {
+ var i = start
+ while (i <= end) {
+ f(i)
+ i += step
+ }
+ }
+ @inline final def foreachDownEx(f: Int => Unit) {
+ var i = start
+ while (i > end) {
+ f(i)
+ i += step
+ }
+ }
+ @inline final def foreachUpEx(f: Int => Unit) {
+ var i = start
+ while (i < end) {
+ f(i)
+ i += step
+ }
+ }
/** Creates a new range containing the first `n` elements of this range.
*
View
61 test/benchmarks/src/scala/collection/immutable/range-bench.scala
@@ -0,0 +1,61 @@
+package scala.collection.immutable
+package benchmarks
+
+object RangeTest {
+ // not inlined any more, needs investigation
+ //
+ // class XXS {
+ // private val array = Array.range(0, 100)
+ // def tst = { var sum = 0; for (i <- 0 until array.length) sum += array(i); sum }
+ // }
+
+ var x: Int = 0
+
+ def foreachSum(max: Int): Int = {
+ var sum = 0
+ 1 to max foreach (sum += _)
+ sum
+ }
+ def whileSum(max: Int) = {
+ var sum = 0
+ var num = 1
+ while (num <= max) {
+ sum += num
+ num += 1
+ }
+ sum
+ }
+
+ def show(max: Int, foreachNanos: Long, whileNanos: Long) {
+ val winner = if (foreachNanos < whileNanos) "foreachSum" else "whileSum"
+ val ratio = if (foreachNanos < whileNanos) foreachNanos.toDouble / whileNanos else whileNanos.toDouble / foreachNanos
+ println("1 to %d:, %12s wins, %.3f: foreach %.3f while %.3f".format(
+ max, winner, ratio,
+ foreachNanos.toDouble / 1000000L,
+ whileNanos.toDouble / 1000000L)
+ )
+ }
+
+ def run(max: Int) = {
+ val foreachFirst = util.Random.nextBoolean
+ val t1 = System.nanoTime
+ x = if (foreachFirst) foreachSum(max) else whileSum(max)
+ val t2 = System.nanoTime
+ x = if (foreachFirst) whileSum(max) else foreachSum(max)
+ val t3 = System.nanoTime
+
+ val foreachNanos = if (foreachFirst) t2 - t1 else t3 - t2
+ val whileNanos = if (foreachFirst) t3 - t2 else t2 - t1
+ show(max, foreachNanos, whileNanos)
+ }
+
+ def main(args: Array[String]): Unit = {
+ var max = if (args.isEmpty) 100 else args(0).toInt
+ while (max > 0) {
+ run(max)
+ run(max)
+ run(max)
+ max += (max / 7)
+ }
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.