Permalink
Browse files

Range.foreach optimization.

This makes code like

  0 to 100 foreach (x += _)

as fast as (often faster than, in fact) a while loop.  See the
comment in Range for the gory details.  More investigation should
be done regarding total impact on inlining behavior.
Review by @odersky.
  • Loading branch information...
paulp committed Dec 12, 2011
1 parent d1e3b46 commit 4cfc633fc6cb2ab0f473c2e5141724017d444dc6
@@ -71,18 +71,6 @@ extends collection.AbstractSeq[Int]
def isInclusive = false
@inline final override def foreach[@specialized(Unit) U](f: Int => U) {
if (length > 0) {
val last = this.last
var i = start
while (i != last) {
f(i)
i += step
}
f(i)
}
}
override def length: Int = numRangeElements
override lazy val last: Int =
if (length == 0) Nil.last
@@ -95,6 +83,83 @@ extends collection.AbstractSeq[Int]
if (idx < 0 || idx >= length) throw new IndexOutOfBoundsException(idx.toString)
locationAfterN(idx)
}
/** @note Making foreach run as fast as a while loop is a challenge.
* The key elements which I can observe making a difference are:
*
* - the inner loop should be as small as possible
* - the inner loop should be monomorphic
* - the inner loop should perform no boxing and no avoidable tests
*
* This is achieved by:
*
* - keeping initialization logic out of the inner loop
* - dispatching to custom variations based on initial conditions
* - tricking the compiler into always calling Function1#apply$mcVI$sp
*
* The last one is important and less than obvious. Even when foreach
* was specialized on Unit, only Int => Unit arguments benefited from it.
* Other function types would be accepted, but in the absence of full
* specialization the integer argument was boxed on every call. For example:
*
class A {
final def f(x: Int): Int = x + 1
// Calls Range.foreach, which calls Function1.apply
def g1 = 1 until 100 foreach { x => f(x) }
// Calls Range.foreach$mVc$sp, which calls Function1.apply$mcVI$sp
def g2 = 1 until 100 foreach { x => f(x) ; () }
}
*
* However! Since the result of the closure is always discarded, we
* simply cast it to Int => Unit, thereby executing the fast version.
* The seemingly looming ClassCastException can never arrive.
*/
@inline final override def foreach[U](f: Int => U) {
if (step < 0) {
if (isInclusive) foreachDownIn(f.asInstanceOf[Int => Unit])
else foreachDownEx(f.asInstanceOf[Int => Unit])
}
else {
if (isInclusive) foreachUpIn(f.asInstanceOf[Int => Unit])
else foreachUpEx(f.asInstanceOf[Int => Unit])
}
}
/** !!! These methods must be public or they will not be inlined.
* But they are certainly not intended to be part of the API.
* This collision between inlining requirements and access semantics
* is highly unfortunate and must be resolved.
*
* Proposed band-aid: an @internal annotation.
*/
@inline final def foreachDownIn(f: Int => Unit) {
var i = start
while (i >= end) {
f(i)
i += step
}
}
@inline final def foreachUpIn(f: Int => Unit) {
var i = start
while (i <= end) {
f(i)
i += step
}
}
@inline final def foreachDownEx(f: Int => Unit) {
var i = start
while (i > end) {
f(i)
i += step
}
}
@inline final def foreachUpEx(f: Int => Unit) {
var i = start
while (i < end) {
f(i)
i += step
}
}
/** Creates a new range containing the first `n` elements of this range.
*
@@ -0,0 +1,61 @@
package scala.collection.immutable
package benchmarks
object RangeTest {
// not inlined any more, needs investigation
//
// class XXS {
// private val array = Array.range(0, 100)
// def tst = { var sum = 0; for (i <- 0 until array.length) sum += array(i); sum }
// }
var x: Int = 0
def foreachSum(max: Int): Int = {
var sum = 0
1 to max foreach (sum += _)
sum
}
def whileSum(max: Int) = {
var sum = 0
var num = 1
while (num <= max) {
sum += num
num += 1
}
sum
}
def show(max: Int, foreachNanos: Long, whileNanos: Long) {
val winner = if (foreachNanos < whileNanos) "foreachSum" else "whileSum"
val ratio = if (foreachNanos < whileNanos) foreachNanos.toDouble / whileNanos else whileNanos.toDouble / foreachNanos
println("1 to %d:, %12s wins, %.3f: foreach %.3f while %.3f".format(
max, winner, ratio,
foreachNanos.toDouble / 1000000L,
whileNanos.toDouble / 1000000L)
)
}
def run(max: Int) = {
val foreachFirst = util.Random.nextBoolean
val t1 = System.nanoTime
x = if (foreachFirst) foreachSum(max) else whileSum(max)
val t2 = System.nanoTime
x = if (foreachFirst) whileSum(max) else foreachSum(max)
val t3 = System.nanoTime
val foreachNanos = if (foreachFirst) t2 - t1 else t3 - t2
val whileNanos = if (foreachFirst) t3 - t2 else t2 - t1
show(max, foreachNanos, whileNanos)
}
def main(args: Array[String]): Unit = {
var max = if (args.isEmpty) 100 else args(0).toInt
while (max > 0) {
run(max)
run(max)
run(max)
max += (max / 7)
}
}
}

0 comments on commit 4cfc633

Please sign in to comment.