Skip to content

Commit

Permalink
SI-6584, Stream#distinct uses too much memory.
Browse files Browse the repository at this point in the history
[backport]
Nesting recursive calls in Stream is always a dicey business.
  • Loading branch information
paulp committed Jan 30, 2013
1 parent d2316df commit 98534b2
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
13 changes: 10 additions & 3 deletions src/library/scala/collection/immutable/Stream.scala
Expand Up @@ -841,9 +841,16 @@ self =>
* // produces: "1, 2, 3, 4, 5, 6" * // produces: "1, 2, 3, 4, 5, 6"
* }}} * }}}
*/ */
override def distinct: Stream[A] = override def distinct: Stream[A] = {
if (isEmpty) this // This should use max memory proportional to N, whereas
else cons(head, tail.filter(head != _).distinct) // recursively calling distinct on the tail is N^2.
def loop(seen: Set[A], rest: Stream[A]): Stream[A] = {
if (rest.isEmpty) rest
else if (seen(rest.head)) loop(seen, rest.tail)
else cons(rest.head, loop(seen + rest.head, rest.tail))
}
loop(Set(), this)
}


/** Returns a new sequence of given length containing the elements of this /** Returns a new sequence of given length containing the elements of this
* sequence followed by zero or more occurrences of given elements. * sequence followed by zero or more occurrences of given elements.
Expand Down
8 changes: 8 additions & 0 deletions test/files/run/t6584.check
@@ -0,0 +1,8 @@
Array: 102400
Vector: 102400
List: 102400
Stream: 102400
Array: 102400
Vector: 102400
List: 102400
Stream: 102400
16 changes: 16 additions & 0 deletions test/files/run/t6584.scala
@@ -0,0 +1,16 @@
object Test {
def main(args: Array[String]): Unit = {
val size = 100 * 1024
val doubled = (1 to size) ++ (1 to size)

println("Array: " + Array.tabulate(size)(x => x).distinct.size)
println("Vector: " + Vector.tabulate(size)(x => x).distinct.size)
println("List: " + List.tabulate(size)(x => x).distinct.size)
println("Stream: " + Stream.tabulate(size)(x => x).distinct.size)

println("Array: " + doubled.toArray.distinct.size)
println("Vector: " + doubled.toVector.distinct.size)
println("List: " + doubled.toList.distinct.size)
println("Stream: " + doubled.toStream.distinct.size)
}
}

0 comments on commit 98534b2

Please sign in to comment.