Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #2605 from JamesIry/weak_hashset_uniques

SI-7149 Use a WeakHashSet for type uniqueness
  • Loading branch information...
commit 803d451a28824af17f0cab446e4c76f51003fd01 2 parents 4a8887c + b7e0fd2
@gkossakowski gkossakowski authored
View
13 src/reflect/scala/reflect/internal/SymbolTable.scala
@@ -307,27 +307,20 @@ abstract class SymbolTable extends macros.Universe
}
object perRunCaches {
- import java.lang.ref.WeakReference
import scala.collection.generic.Clearable
// Weak references so the garbage collector will take care of
// letting us know when a cache is really out of commission.
- private val caches = mutable.HashSet[WeakReference[Clearable]]()
+ private val caches = WeakHashSet[Clearable]()
def recordCache[T <: Clearable](cache: T): T = {
- caches += new WeakReference(cache)
+ caches += cache
cache
}
def clearAll() = {
debuglog("Clearing " + caches.size + " caches.")
- caches foreach { ref =>
- val cache = ref.get()
- if (cache == null)
- caches -= ref
- else
- cache.clear()
- }
+ caches foreach (_.clear)
}
def newWeakMap[K, V]() = recordCache(mutable.WeakHashMap[K, V]())
View
4 src/reflect/scala/reflect/internal/Types.scala
@@ -3675,13 +3675,13 @@ trait Types
// Hash consing --------------------------------------------------------------
private val initialUniquesCapacity = 4096
- private var uniques: util.HashSet[Type] = _
+ private var uniques: util.WeakHashSet[Type] = _
private var uniqueRunId = NoRunId
protected def unique[T <: Type](tp: T): T = {
if (Statistics.canEnable) Statistics.incCounter(rawTypeCount)
if (uniqueRunId != currentRunId) {
- uniques = util.HashSet[Type]("uniques", initialUniquesCapacity)
+ uniques = util.WeakHashSet[Type](initialUniquesCapacity)
perRunCaches.recordCache(uniques)
uniqueRunId = currentRunId
}
View
447 src/reflect/scala/reflect/internal/util/WeakHashSet.scala
@@ -1,59 +1,430 @@
package scala
package reflect.internal.util
-import scala.collection.mutable
+import java.lang.ref.{WeakReference, ReferenceQueue}
+import scala.annotation.tailrec
import scala.collection.generic.Clearable
-import scala.runtime.AbstractFunction1
+import scala.collection.mutable.{Set => mSet}
-/** A bare-bones implementation of a mutable `Set` that uses weak references
- * to hold the elements.
+/**
+ * A HashSet where the elements are stored weakly. Elements in this set are elligible for GC if no other
+ * hard references are associated with them. Its primary use case is as a canonical reference
+ * identity holder (aka "hash-consing") via findEntryOrUpdate
*
- * This implementation offers only add/remove/test operations,
- * therefore it does not fulfill the contract of Scala collection sets.
+ * This Set implementation cannot hold null. Any attempt to put a null in it will result in a NullPointerException
+ *
+ * This set implmeentation is not in general thread safe without external concurrency control. However it behaves
+ * properly when GC concurrently collects elements in this set.
*/
-class WeakHashSet[T <: AnyRef] extends AbstractFunction1[T, Boolean] with Clearable {
- private val underlying = mutable.HashSet[WeakReferenceWithEquals[T]]()
+final class WeakHashSet[A <: AnyRef](val initialCapacity: Int, val loadFactor: Double) extends Set[A] with Function1[A, Boolean] with mSet[A] {
+
+ import WeakHashSet._
+
+ def this() = this(initialCapacity = WeakHashSet.defaultInitialCapacity, loadFactor = WeakHashSet.defaultLoadFactor)
+
+ type This = WeakHashSet[A]
+
+ /**
+ * queue of Entries that hold elements scheduled for GC
+ * the removeStaleEntries() method works through the queue to remeove
+ * stale entries from the table
+ */
+ private[this] val queue = new ReferenceQueue[A]
+
+ /**
+ * the number of elements in this set
+ */
+ private[this] var count = 0
+
+ /**
+ * from a specified initial capacity compute the capacity we'll use as being the next
+ * power of two equal to or greater than the specified initial capacity
+ */
+ private def computeCapacity = {
+ if (initialCapacity < 0) throw new IllegalArgumentException("initial capacity cannot be less than 0");
+ var candidate = 1
+ while (candidate < initialCapacity) {
+ candidate *= 2
+ }
+ candidate
+ }
+
+ /**
+ * the underlying table of entries which is an array of Entry linked lists
+ */
+ private[this] var table = new Array[Entry[A]](computeCapacity)
+
+ /**
+ * the limit at which we'll increase the size of the hash table
+ */
+ var threshhold = computeThreshHold
+
+ private[this] def computeThreshHold: Int = (table.size * loadFactor).ceil.toInt
- /** Add the given element to this set. */
- def +=(elem: T): this.type = {
- underlying += new WeakReferenceWithEquals(elem)
- this
+ /**
+ * find the bucket associated with an elements's hash code
+ */
+ private[this] def bucketFor(hash: Int): Int = {
+ // spread the bits around to try to avoid accidental collisions using the
+ // same algorithm as java.util.HashMap
+ var h = hash
+ h ^= h >>> 20 ^ h >>> 12
+ h ^= h >>> 7 ^ h >>> 4
+
+ // this is finding h % table.length, but takes advantage of the
+ // fact that table length is a power of 2,
+ // if you don't do bit flipping in your head, if table.length
+ // is binary 100000.. (with n 0s) then table.length - 1
+ // is 1111.. with n 1's.
+ // In other words this masks on the last n bits in the hash
+ h & (table.length - 1)
}
- /** Remove the given element from this set. */
- def -=(elem: T): this.type = {
- underlying -= new WeakReferenceWithEquals(elem)
- this
+ /**
+ * remove a single entry from a linked list in a given bucket
+ */
+ private[this] def remove(bucket: Int, prevEntry: Entry[A], entry: Entry[A]) {
+ prevEntry match {
+ case null => table(bucket) = entry.tail
+ case _ => prevEntry.tail = entry.tail
+ }
+ count -= 1
}
- /** Does the given element belong to this set? */
- def contains(elem: T): Boolean =
- underlying.contains(new WeakReferenceWithEquals(elem))
+ /**
+ * remove entries associated with elements that have been gc'ed
+ */
+ private[this] def removeStaleEntries() {
+ def poll(): Entry[A] = queue.poll().asInstanceOf[Entry[A]]
- /** Does the given element belong to this set? */
- def apply(elem: T): Boolean = contains(elem)
+ @tailrec
+ def queueLoop {
+ val stale = poll()
+ if (stale != null) {
+ val bucket = bucketFor(stale.hash)
- /** Return the number of elements in this set, including reclaimed elements. */
- def size = underlying.size
+ @tailrec
+ def linkedListLoop(prevEntry: Entry[A], entry: Entry[A]): Unit = if (stale eq entry) remove(bucket, prevEntry, entry)
+ else if (entry != null) linkedListLoop(entry, entry.tail)
- /** Remove all elements in this set. */
- def clear() = underlying.clear()
-}
+ linkedListLoop(null, table(bucket))
-/** A WeakReference implementation that implements equals and hashCode by
- * delegating to the referent.
- */
-class WeakReferenceWithEquals[T <: AnyRef](ref: T) {
- def get(): T = underlying.get()
+ queueLoop
+ }
+ }
+
+ queueLoop
+ }
+
+ /**
+ * Double the size of the internal table
+ */
+ private[this] def resize() {
+ val oldTable = table
+ table = new Array[Entry[A]](oldTable.size * 2)
+ threshhold = computeThreshHold
+
+ @tailrec
+ def tableLoop(oldBucket: Int): Unit = if (oldBucket < oldTable.size) {
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): Unit = entry match {
+ case null => ()
+ case _ => {
+ val bucket = bucketFor(entry.hash)
+ val oldNext = entry.tail
+ entry.tail = table(bucket)
+ table(bucket) = entry
+ linkedListLoop(oldNext)
+ }
+ }
+ linkedListLoop(oldTable(oldBucket))
+
+ tableLoop(oldBucket + 1)
+ }
+ tableLoop(0)
+ }
+
+ // from scala.reflect.internal.Set, find an element or null if it isn't contained
+ override def findEntry(elem: A): A = elem match {
+ case null => throw new NullPointerException("WeakHashSet cannot hold nulls")
+ case _ => {
+ removeStaleEntries()
+ val hash = elem.hashCode
+ val bucket = bucketFor(hash)
+
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): A = entry match {
+ case null => null.asInstanceOf[A]
+ case _ => {
+ val entryElem = entry.get
+ if (elem == entryElem) entryElem
+ else linkedListLoop(entry.tail)
+ }
+ }
+
+ linkedListLoop(table(bucket))
+ }
+ }
+ // add an element to this set unless it's already in there and return the element
+ def findEntryOrUpdate(elem: A): A = elem match {
+ case null => throw new NullPointerException("WeakHashSet cannot hold nulls")
+ case _ => {
+ removeStaleEntries()
+ val hash = elem.hashCode
+ val bucket = bucketFor(hash)
+ val oldHead = table(bucket)
+
+ def add() = {
+ table(bucket) = new Entry(elem, hash, oldHead, queue)
+ count += 1
+ if (count > threshhold) resize()
+ elem
+ }
+
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): A = entry match {
+ case null => add()
+ case _ => {
+ val entryElem = entry.get
+ if (elem == entryElem) entryElem
+ else linkedListLoop(entry.tail)
+ }
+ }
+
+ linkedListLoop(oldHead)
+ }
+ }
+
+ // add an element to this set unless it's already in there and return this set
+ override def +(elem: A): this.type = elem match {
+ case null => throw new NullPointerException("WeakHashSet cannot hold nulls")
+ case _ => {
+ removeStaleEntries()
+ val hash = elem.hashCode
+ val bucket = bucketFor(hash)
+ val oldHead = table(bucket)
+
+ def add() {
+ table(bucket) = new Entry(elem, hash, oldHead, queue)
+ count += 1
+ if (count > threshhold) resize()
+ }
+
+ @tailrec
+ def linkedListLoop(entry: Entry[A]): Unit = entry match {
+ case null => add()
+ case _ if (elem == entry.get) => ()
+ case _ => linkedListLoop(entry.tail)
+ }
+
+ linkedListLoop(oldHead)
+ this
+ }
+ }
+
+ def +=(elem: A) = this + elem
+
+ // from scala.reflect.interanl.Set
+ override def addEntry(x: A) { this += x }
+
+ // remove an element from this set and return this set
+ override def -(elem: A): this.type = elem match {
+ case null => this
+ case _ => {
+ removeStaleEntries()
+ val bucket = bucketFor(elem.hashCode)
+
+
+
+ @tailrec
+ def linkedListLoop(prevEntry: Entry[A], entry: Entry[A]): Unit = entry match {
+ case null => ()
+ case _ if (elem == entry.get) => remove(bucket, prevEntry, entry)
+ case _ => linkedListLoop(entry, entry.tail)
+ }
- override val hashCode = ref.hashCode
+ linkedListLoop(null, table(bucket))
+ this
+ }
+ }
+
+ def -=(elem: A) = this - elem
+
+ // empty this set
+ override def clear(): Unit = {
+ table = new Array[Entry[A]](table.size)
+ threshhold = computeThreshHold
+ count = 0
+
+ // drain the queue - doesn't do anything because we're throwing away all the values anyway
+ @tailrec def queueLoop(): Unit = if (queue.poll() != null) queueLoop()
+ queueLoop()
+ }
+
+ // true if this set is empty
+ override def empty: This = new WeakHashSet[A](initialCapacity, loadFactor)
+
+ // the number of elements in this set
+ override def size: Int = {
+ removeStaleEntries()
+ count
+ }
+
+ override def apply(x: A): Boolean = this contains x
+
+ override def foreach[U](f: A => U): Unit = iterator foreach f
+
+ override def toList(): List[A] = iterator.toList
+
+ // Iterator over all the elements in this set in no particular order
+ override def iterator: Iterator[A] = {
+ removeStaleEntries()
+
+ new Iterator[A] {
+
+ /**
+ * the bucket currently being examined. Initially it's set past the last bucket and will be decremented
+ */
+ private[this] var currentBucket: Int = table.size
+
+ /**
+ * the entry that was last examined
+ */
+ private[this] var entry: Entry[A] = null
+
+ /**
+ * the element that will be the result of the next call to next()
+ */
+ private[this] var lookaheadelement: A = null.asInstanceOf[A]
+
+ @tailrec
+ def hasNext: Boolean = {
+ while (entry == null && currentBucket > 0) {
+ currentBucket -= 1
+ entry = table(currentBucket)
+ }
+
+ if (entry == null) false
+ else {
+ lookaheadelement = entry.get
+ if (lookaheadelement == null) {
+ // element null means the weakref has been cleared since we last did a removeStaleEntries(), move to the next entry
+ entry = entry.tail
+ hasNext
+ } else {
+ true
+ }
+ }
+ }
- override def equals(other: Any): Boolean = other match {
- case wf: WeakReferenceWithEquals[_] =>
- underlying.get() == wf.get()
- case _ =>
- false
+ def next(): A = if (lookaheadelement == null)
+ throw new IndexOutOfBoundsException("next on an empty iterator")
+ else {
+ val result = lookaheadelement
+ lookaheadelement = null.asInstanceOf[A]
+ entry = entry.tail
+ result
+ }
+ }
}
- private val underlying = new java.lang.ref.WeakReference(ref)
+ /**
+ * Diagnostic information about the internals of this set. Not normally
+ * needed by ordinary code, but may be useful for diagnosing performance problems
+ */
+ private[util] class Diagnostics {
+ /**
+ * Verify that the internal structure of this hash set is fully consistent.
+ * Throws an assertion error on any problem. In order for it to be reliable
+ * the entries must be stable. If any are garbage collected during validation
+ * then an assertion may inappropriately fire.
+ */
+ def fullyValidate {
+ var computedCount = 0
+ var bucket = 0
+ while (bucket < table.size) {
+ var entry = table(bucket)
+ while (entry != null) {
+ assert(entry.get != null, s"$entry had a null value indicated that gc activity was happening during diagnostic validation or that a null value was inserted")
+ computedCount += 1
+ val cachedHash = entry.hash
+ val realHash = entry.get.hashCode
+ assert(cachedHash == realHash, s"for $entry cached hash was $cachedHash but should have been $realHash")
+ val computedBucket = bucketFor(realHash)
+ assert(computedBucket == bucket, s"for $entry the computed bucket was $computedBucket but should have been $bucket")
+
+ entry = entry.tail
+ }
+
+ bucket += 1
+ }
+
+ assert(computedCount == count, s"The computed count was $computedCount but should have been $count")
+ }
+
+ /**
+ * Produces a diagnostic dump of the table that underlies this hash set.
+ */
+ def dump = table.deep
+
+ /**
+ * Number of buckets that hold collisions. Useful for diagnosing performance issues.
+ */
+ def collisionBucketsCount: Int =
+ (table filter (entry => entry != null && entry.tail != null)).size
+
+ /**
+ * Number of buckets that are occupied in this hash table.
+ */
+ def fullBucketsCount: Int =
+ (table filter (entry => entry != null)).size
+
+ /**
+ * Number of buckets in the table
+ */
+ def bucketsCount: Int = table.size
+
+ /**
+ * Number of buckets that don't hold anything
+ */
+ def emptyBucketsCount = bucketsCount - fullBucketsCount
+
+ /**
+ * Number of elements that are in collision. Useful for diagnosing performance issues.
+ */
+ def collisionsCount = size - (fullBucketsCount - collisionBucketsCount)
+
+ /**
+ * A map from a count of elements to the number of buckets with that count
+ */
+ def elementCountDistribution = table map linkedListSize groupBy identity map {case (size, list) => (size, list.size)}
+
+ private def linkedListSize(entry: Entry[A]) = {
+ var e = entry
+ var count = 0
+ while (e != null) {
+ count += 1
+ e = e.tail
+ }
+ count
+ }
+ }
+
+ private[util] def diagnostics = new Diagnostics
}
+
+/**
+ * Companion object for WeakHashSet
+ */
+object WeakHashSet {
+ /**
+ * A single entry in a WeakHashSet. It's a WeakReference plus a cached hash code and
+ * a link to the next Entry in the same bucket
+ */
+ private class Entry[A](element: A, val hash:Int, var tail: Entry[A], queue: ReferenceQueue[A]) extends WeakReference[A](element, queue)
+
+ val defaultInitialCapacity = 16
+ val defaultLoadFactor = .75
+
+ def apply[A <: AnyRef](initialCapacity: Int = WeakHashSet.defaultInitialCapacity, loadFactor: Double = WeakHashSet.defaultLoadFactor) = new WeakHashSet[A](initialCapacity, defaultLoadFactor)
+}
View
174 test/files/run/WeakHashSetTest.scala
@@ -0,0 +1,174 @@
+object Test {
+ def main(args: Array[String]) {
+ val test = scala.reflect.internal.util.WeakHashSetTest
+ test.checkEmpty
+ test.checkPlusEquals
+ test.checkPlusEqualsCollisions
+ test.checkRehashing
+ test.checkRehashCollisions
+ test.checkFindOrUpdate
+ test.checkMinusEquals
+ test.checkMinusEqualsCollisions
+ test.checkClear
+ test.checkIterator
+ test.checkIteratorCollisions
+
+ // This test is commented out because it relies on gc behavior which isn't reliable enough in an automated environment
+ // test.checkRemoveUnreferencedObjects
+ }
+}
+
+// put the main test object in the same package as WeakHashSet because
+// it uses the package private "diagnostics" method
+package scala.reflect.internal.util {
+
+ object WeakHashSetTest {
+ // a class guaranteed to provide hash collisions
+ case class Collider(x : String) extends Comparable[Collider] with Serializable {
+ override def hashCode = 0
+ def compareTo(y : Collider) = this.x compareTo y.x
+ }
+
+ // basic emptiness check
+ def checkEmpty {
+ val hs = new WeakHashSet[String]()
+ assert(hs.size == 0)
+ hs.diagnostics.fullyValidate
+ }
+
+ // make sure += works
+ def checkPlusEquals {
+ val hs = new WeakHashSet[String]()
+ val elements = List("hello", "goodbye")
+ elements foreach (hs += _)
+ assert(hs.size == 2)
+ assert(hs contains "hello")
+ assert(hs contains "goodbye")
+ hs.diagnostics.fullyValidate
+ }
+
+ // make sure += works when there are collisions
+ def checkPlusEqualsCollisions {
+ val hs = new WeakHashSet[Collider]()
+ val elements = List("hello", "goodbye") map Collider
+ elements foreach (hs += _)
+ assert(hs.size == 2)
+ assert(hs contains Collider("hello"))
+ assert(hs contains Collider("goodbye"))
+ hs.diagnostics.fullyValidate
+ }
+
+ // add a large number of elements to force rehashing and then validate
+ def checkRehashing {
+ val size = 200
+ val hs = new WeakHashSet[String]()
+ val elements = (0 until size).toList map ("a" + _)
+ elements foreach (hs += _)
+ elements foreach {i => assert(hs contains i)}
+ hs.diagnostics.fullyValidate
+ }
+
+ // make sure rehashing works properly when the set is rehashed
+ def checkRehashCollisions {
+ val size = 200
+ val hs = new WeakHashSet[Collider]()
+ val elements = (0 until size).toList map {x => Collider("a" + x)}
+ elements foreach (hs += _)
+ elements foreach {i => assert(hs contains i)}
+ hs.diagnostics.fullyValidate
+ }
+
+ // test that unreferenced objects are removed
+ // not run in an automated environment because gc behavior can't be relied on
+ def checkRemoveUnreferencedObjects {
+ val size = 200
+ val hs = new WeakHashSet[Collider]()
+ val elements = (0 until size).toList map {x => Collider("a" + x)}
+ elements foreach (hs += _)
+ // don't throw the following into a retained collection so gc
+ // can remove them
+ for (i <- 0 until size) {
+ hs += Collider("b" + i)
+ }
+ System.gc()
+ Thread.sleep(1000)
+ assert(hs.size == 200)
+ elements foreach {i => assert(hs contains i)}
+ for (i <- 0 until size) {
+ assert(!(hs contains Collider("b" + i)))
+ }
+ hs.diagnostics.fullyValidate
+ }
+
+ // make sure findOrUpdate returns the originally entered element
+ def checkFindOrUpdate {
+ val size = 200
+ val hs = new WeakHashSet[Collider]()
+ val elements = (0 until size).toList map {x => Collider("a" + x)}
+ elements foreach {x => assert(hs findEntryOrUpdate x eq x)}
+ for (i <- 0 until size) {
+ // when we do a lookup the result should be the same reference we
+ // original put in
+ assert(hs findEntryOrUpdate(Collider("a" + i)) eq elements(i))
+ }
+ hs.diagnostics.fullyValidate
+ }
+
+ // check -= functionality
+ def checkMinusEquals {
+ val hs = new WeakHashSet[String]()
+ val elements = List("hello", "goodbye")
+ elements foreach (hs += _)
+ hs -= "goodbye"
+ assert(hs.size == 1)
+ assert(hs contains "hello")
+ assert(!(hs contains "goodbye"))
+ hs.diagnostics.fullyValidate
+ }
+
+ // check -= when there are collisions
+ def checkMinusEqualsCollisions {
+ val hs = new WeakHashSet[Collider]
+ val elements = List(Collider("hello"), Collider("goodbye"))
+ elements foreach (hs += _)
+ hs -= Collider("goodbye")
+ assert(hs.size == 1)
+ assert(hs contains Collider("hello"))
+ assert(!(hs contains Collider("goodbye")))
+ hs -= Collider("hello")
+ assert(hs.size == 0)
+ assert(!(hs contains Collider("hello")))
+ hs.diagnostics.fullyValidate
+ }
+
+ // check that the clear method actually cleans everything
+ def checkClear {
+ val size = 200
+ val hs = new WeakHashSet[String]()
+ val elements = (0 until size).toList map ("a" + _)
+ elements foreach (hs += _)
+ hs.clear()
+ assert(hs.size == 0)
+ elements foreach {i => assert(!(hs contains i))}
+ hs.diagnostics.fullyValidate
+ }
+
+ // check that the iterator covers all the contents
+ def checkIterator {
+ val hs = new WeakHashSet[String]()
+ val elements = (0 until 20).toList map ("a" + _)
+ elements foreach (hs += _)
+ assert(elements.iterator.toList.sorted == elements.sorted)
+ hs.diagnostics.fullyValidate
+ }
+
+ // check that the iterator covers all the contents even when there is a collision
+ def checkIteratorCollisions {
+ val hs = new WeakHashSet[Collider]
+ val elements = (0 until 20).toList map {x => Collider("a" + x)}
+ elements foreach (hs += _)
+ assert(elements.iterator.toList.sorted == elements.sorted)
+ hs.diagnostics.fullyValidate
+ }
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.