Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #3322 from rklaehn/issue/6253

SI-6253 HashSet should implement union
  • Loading branch information...
commit 695b9e11a02175c2df47222e7a80627453f39e52 2 parents fe408de + 034f6b9
@Ichoran Ichoran authored
View
525 src/library/scala/collection/immutable/HashSet.scala
@@ -15,6 +15,7 @@ package immutable
import generic._
import scala.collection.parallel.immutable.ParHashSet
import scala.collection.GenSet
+import scala.annotation.tailrec
/** This class implements immutable sets using a hash trie.
*
@@ -38,7 +39,7 @@ class HashSet[A] extends AbstractSet[A]
with CustomParallelizable[A, ParHashSet[A]]
with Serializable
{
- import HashSet.{nullToEmpty, bufferSize}
+ import HashSet.{nullToEmpty, bufferSize, LeafHashSet}
override def companion: GenericCompanion[HashSet] = HashSet
@@ -85,8 +86,81 @@ class HashSet[A] extends AbstractSet[A]
override def + (elem1: A, elem2: A, elems: A*): HashSet[A] =
this + elem1 + elem2 ++ elems
+ override def union(that: GenSet[A]): HashSet[A] = that match {
+ case that: HashSet[A] =>
+ val buffer = new Array[HashSet[A]](bufferSize(this.size + that.size))
+ nullToEmpty(union0(that, 0, buffer, 0))
+ case _ => super.union(that)
+ }
+
+ override def intersect(that: GenSet[A]): HashSet[A] = that match {
+ case that: HashSet[A] =>
+ val buffer = new Array[HashSet[A]](bufferSize(this.size min that.size))
+ nullToEmpty(intersect0(that, 0, buffer, 0))
+ case _ => super.intersect(that)
+ }
+
+ override def diff(that: GenSet[A]): HashSet[A] = that match {
+ case that: HashSet[A] =>
+ val buffer = new Array[HashSet[A]](bufferSize(this.size))
+ nullToEmpty(diff0(that, 0, buffer, 0))
+ case _ => super.diff(that)
+ }
+
+ /**
+ * Union with a leaf HashSet at a given level.
+ * @param that a leaf HashSet
+ * @param level the depth in the tree. We need this when we have to create a branch node on top of this and that
+ * @return The union of this and that at the given level. Unless level is zero, the result is not a self-contained
+ * HashSet but needs to be stored at the correct depth
+ */
+ private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = {
+ // the default implementation is for the empty set, so we just return that
+ that
+ }
+
+ /**
+ * Union with a HashSet at a given level
+ * @param that a HashSet
+ * @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree
+ * @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes
+ * @param offset0 the first offset into the buffer in which we are allowed to write
+ * @return The union of this and that at the given level. Unless level is zero, the result is not a self-contained
+ * HashSet but needs to be stored at the correct depth
+ */
+ private[immutable] def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
+ // the default implementation is for the empty set, so we just return that
+ that
+ }
+
+ /**
+ * Intersection with another hash set at a given level
+ * @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree
+ * @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes
+ * @param offset0 the first offset into the buffer in which we are allowed to write
+ * @return The intersection of this and that at the given level. Unless level is zero, the result is not a
+ * self-contained HashSet but needs to be stored at the correct depth
+ */
+ private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
+ // the default implementation is for the empty set, so we just return the empty set
+ null
+ }
+
+ /**
+ * Diff with another hash set at a given level
+ * @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree
+ * @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes
+ * @param offset0 the first offset into the buffer in which we are allowed to write
+ * @return The diff of this and that at the given level. Unless level is zero, the result is not a
+ * self-contained HashSet but needs to be stored at the correct depth
+ */
+ private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
+ // the default implementation is for the empty set, so we just return the empty set
+ null
+ }
+
def - (e: A): HashSet[A] =
- removed0(e, computeHash(e), 0)
+ nullToEmpty(removed0(e, computeHash(e), 0))
override def filter(p: A => Boolean) = {
val buffer = new Array[HashSet[A]](bufferSize(size))
@@ -165,7 +239,14 @@ object HashSet extends ImmutableSetFactory[HashSet] {
}
}
- class HashSet1[A](private[HashSet] val key: A, private[HashSet] val hash: Int) extends HashSet[A] {
+ /**
+ * Common superclass of HashSet1 and HashSetCollision1, which are the two possible leaves of the Trie
+ */
+ private[HashSet] sealed abstract class LeafHashSet[A] extends HashSet[A] {
+ private[HashSet] def hash:Int
+ }
+
+ class HashSet1[A](private[HashSet] val key: A, private[HashSet] val hash: Int) extends LeafHashSet[A] {
override def size = 1
override def get0(key: A, hash: Int, level: Int): Boolean =
@@ -190,8 +271,42 @@ object HashSet extends ImmutableSetFactory[HashSet] {
}
}
+ override private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = that match {
+ case that if that.hash != this.hash =>
+ // different hash code, so there is no need to investigate further.
+ // Just create a branch node containing the two.
+ makeHashTrieSet(this.hash, this, that.hash, that, level)
+ case that: HashSet1[A] =>
+ if (this.key == that.key) {
+ this
+ } else {
+ // 32-bit hash collision (rare, but not impossible)
+ new HashSetCollision1[A](hash, ListSet.empty + this.key + that.key)
+ }
+ case that: HashSetCollision1[A] =>
+ val ks1 = that.ks + key
+ // Could use eq check (faster) if ListSet was guaranteed to return itself
+ if (ks1.size == that.ks.size) {
+ that
+ } else {
+ new HashSetCollision1[A](hash, ks1)
+ }
+ }
+
+ override private[immutable] def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = {
+ // switch to the Leaf version of union
+ // we can exchange the arguments because union is symmetrical
+ that.union0(this, level)
+ }
+
+ override private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] =
+ if (that.get0(key, hash, level)) this else null
+
+ override private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] =
+ if (that.get0(key, hash, level)) null else this
+
override def removed0(key: A, hash: Int, level: Int): HashSet[A] =
- if (hash == this.hash && key == this.key) HashSet.empty[A] else this
+ if (hash == this.hash && key == this.key) null else this
override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] =
if (negate ^ p(key)) this else null
@@ -200,8 +315,7 @@ object HashSet extends ImmutableSetFactory[HashSet] {
override def foreach[U](f: A => U): Unit = f(key)
}
- private[immutable] class HashSetCollision1[A](private[HashSet] val hash: Int, val ks: ListSet[A])
- extends HashSet[A] {
+ private[immutable] class HashSetCollision1[A](private[HashSet] val hash: Int, val ks: ListSet[A]) extends LeafHashSet[A] {
override def size = ks.size
@@ -220,15 +334,116 @@ object HashSet extends ImmutableSetFactory[HashSet] {
if (hash == this.hash) new HashSetCollision1(hash, ks + key)
else makeHashTrieSet(this.hash, this, hash, new HashSet1(key, hash), level)
+ override def union0(that: LeafHashSet[A], level: Int): HashSet[A] = that match {
+ case that if that.hash != this.hash =>
+ // different hash code, so there is no need to investigate further.
+ // Just create a branch node containing the two.
+ makeHashTrieSet(this.hash, this, that.hash, that, level)
+ case that: HashSet1[A] =>
+ val ks1 = ks + that.key
+ // Could use eq check (faster) if ListSet was guaranteed to return itself
+ if (ks1.size == ks.size) {
+ this
+ } else {
+ // create a new HashSetCollision with the existing hash
+ // we don't have to check for size=1 because union is never going to remove elements
+ new HashSetCollision1[A](hash, ks1)
+ }
+ case that: HashSetCollision1[A] =>
+ val ks1 = this.ks ++ that.ks
+ ks1.size match {
+ case size if size == this.ks.size =>
+ // could this check be made faster by doing an eq check?
+ // I am not sure we can rely on ListSet returning itself when all elements are already in the set,
+ // so it seems unwise to rely on it.
+ this
+ case size if size == that.ks.size =>
+ // we have to check this as well, since we don't want to create a new instance if this is a subset of that
+ that
+ case _ =>
+ // create a new HashSetCollision with the existing hash
+ // we don't have to check for size=1 because union is never going to remove elements
+ new HashSetCollision1[A](hash, ks1)
+ }
+ }
+
+ override def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
+ case that: LeafHashSet[A] =>
+ // switch to the simpler Tree/Leaf implementation
+ this.union0(that, level)
+ case that: HashTrieSet[A] =>
+ // switch to the simpler Tree/Leaf implementation
+ // we can swap this and that because union is symmetrical
+ that.union0(this, level)
+ case _ => this
+ }
+
+ override private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
+ // filter the keys, taking advantage of the fact that we know their hash code
+ val ks1 = ks.filter(that.get0(_, hash, level))
+ ks1.size match {
+ case 0 =>
+ // the empty set
+ null
+ case size if size == this.size =>
+ // unchanged
+ // We do this check first since even if the result is of size 1 since
+ // it is preferable to return the existing set for better structural sharing
+ this
+ case size if size == that.size =>
+ // the other set
+ // We do this check first since even if the result is of size 1 since
+ // it is preferable to return the existing set for better structural sharing
+ that
+ case 1 =>
+ // create a new HashSet1 with the hash we already know
+ new HashSet1(ks1.head, hash)
+ case _ =>
+ // create a new HashSetCollison with the hash we already know and the new keys
+ new HashSetCollision1(hash, ks1)
+ }
+ }
+
+ override private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
+ val ks1 = ks.filterNot(that.get0(_, hash, level))
+ ks1.size match {
+ case 0 =>
+ // the empty set
+ null
+ case size if size == this.size =>
+ // unchanged
+ // We do this check first since even if the result is of size 1 since
+ // it is preferable to return the existing set for better structural sharing
+ this
+ case 1 =>
+ // create a new HashSet1 with the hash we already know
+ new HashSet1(ks1.head, hash)
+ case _ =>
+ // create a new HashSetCollison with the hash we already know and the new keys
+ new HashSetCollision1(hash, ks1)
+ }
+ }
+
override def removed0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash) {
val ks1 = ks - key
- if(ks1.isEmpty)
- HashSet.empty[A]
- else if(ks1.tail.isEmpty)
- new HashSet1(ks1.head, hash)
- else
- new HashSetCollision1(hash, ks1)
+ ks1.size match {
+ case 0 =>
+ // the empty set
+ null
+ case size if size == ks.size =>
+ // We do this check first since even if the result is of size 1 since
+ // it is preferable to return the existing set for better structural sharing
+ // we can not rely on ks.- returning the same instance if we subtract an element that is not in it
+ // so we need to do the size check
+ this
+ case 1 =>
+ // create a new HashSet1 with the hash we already know
+ new HashSet1(ks1.head, hash)
+ case _ =>
+ // create a new HashSetCollison with the hash we already know and the new keys
+ new HashSetCollision1(hash, ks1)
+ }
} else this
override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
@@ -345,6 +560,284 @@ object HashSet extends ImmutableSetFactory[HashSet] {
}
}
+ override private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = {
+ val index = (that.hash >>> level) & 0x1f
+ val mask = (1 << index)
+ val offset = Integer.bitCount(bitmap & (mask - 1))
+ if ((bitmap & mask) != 0) {
+ val sub = elems(offset)
+ val sub1 = sub.union0(that, level + 5)
+ if (sub eq sub1) this
+ else {
+ val elems1 = new Array[HashSet[A]](elems.length)
+ Array.copy(elems, 0, elems1, 0, elems.length)
+ elems1(offset) = sub1
+ new HashTrieSet(bitmap, elems1, size + (sub1.size - sub.size))
+ }
+ } else {
+ val elems1 = new Array[HashSet[A]](elems.length + 1)
+ Array.copy(elems, 0, elems1, 0, offset)
+ elems1(offset) = that
+ Array.copy(elems, offset, elems1, offset + 1, elems.length - offset)
+ val bitmap1 = bitmap | mask
+ new HashTrieSet(bitmap1, elems1, size + that.size)
+ }
+ }
+
+ override private[immutable] def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
+ case that if that eq this =>
+ // shortcut for when that is this
+ // this happens often for nodes deeper in the tree, especially when that and this share a common "heritage"
+ // e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B
+ // then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking
+ // at these nodes.
+ this
+ case that: LeafHashSet[A] =>
+ // when that is a leaf, we can switch to the simpler Tree/Leaf implementation
+ this.union0(that, level)
+ case that: HashTrieSet[A] =>
+ val a = this.elems
+ var abm = this.bitmap
+ var ai = 0
+
+ val b = that.elems
+ var bbm = that.bitmap
+ var bi = 0
+
+ // fetch a new temporary array that is guaranteed to be big enough (32 elements)
+ var offset = offset0
+ var rs = 0
+
+ // loop as long as there are bits left in either abm or bbm
+ while ((abm | bbm) != 0) {
+ // lowest remaining bit in abm
+ val alsb = abm ^ (abm & (abm - 1))
+ // lowest remaining bit in bbm
+ val blsb = bbm ^ (bbm & (bbm - 1))
+ if (alsb == blsb) {
+ val sub1 = a(ai).union0(b(bi), level + 5, buffer, offset)
+ rs += sub1.size
+ buffer(offset) = sub1
+ offset += 1
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb
+ ai += 1
+ // clear lowest remaining one bit in bbm and increase the b index
+ bbm &= ~blsb
+ bi += 1
+ } else if (unsignedCompare(alsb - 1, blsb - 1)) {
+ // alsb is smaller than blsb, or alsb is set and blsb is 0
+ // in any case, alsb is guaranteed to be set here!
+ val sub1 = a(ai)
+ rs += sub1.size
+ buffer(offset) = sub1
+ offset += 1
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb
+ ai += 1
+ } else {
+ // blsb is smaller than alsb, or blsb is set and alsb is 0
+ // in any case, blsb is guaranteed to be set here!
+ val sub1 = b(bi)
+ rs += sub1.size
+ buffer(offset) = sub1
+ offset += 1
+ // clear lowest remaining one bit in bbm and increase the b index
+ bbm &= ~blsb
+ bi += 1
+ }
+ }
+ if (rs == this.size) {
+ // if the result would be identical to this, we might as well return this
+ this
+ } else if (rs == that.size) {
+ // if the result would be identical to that, we might as well return that
+ that
+ } else {
+ // we don't have to check whether the result is a leaf, since union will only make the set larger
+ // and this is not a leaf to begin with.
+ val length = offset - offset0
+ val elems = new Array[HashSet[A]](length)
+ System.arraycopy(buffer, offset0, elems, 0, length)
+ new HashTrieSet(this.bitmap | that.bitmap, elems, rs)
+ }
+ case _ => this
+ }
+
+ override private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
+ case that if that eq this =>
+ // shortcut for when that is this
+ // this happens often for nodes deeper in the tree, especially when that and this share a common "heritage"
+ // e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B
+ // then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking
+ // at these nodes!
+ this
+ case that: LeafHashSet[A] =>
+ // when that is a leaf, we can switch to the simpler Tree/Leaf implementation
+ // it is OK to swap the arguments because intersect is symmetric
+ // (we can't do this in case of diff, which is not symmetric)
+ that.intersect0(this, level, buffer, offset0)
+ case that: HashTrieSet[A] =>
+ val a = this.elems
+ var abm = this.bitmap
+ var ai = 0
+
+ val b = that.elems
+ var bbm = that.bitmap
+ var bi = 0
+
+ // if the bitmasks do not overlap, the result is definitely empty so we can abort here
+ if ((abm & bbm) == 0)
+ return null
+
+ // fetch a new temporary array that is guaranteed to be big enough (32 elements)
+ var offset = offset0
+ var rs = 0
+ var rbm = 0
+
+ // loop as long as there are bits left that are set in both abm and bbm
+ while ((abm & bbm) != 0) {
+ // highest remaining bit in abm
+ val alsb = abm ^ (abm & (abm - 1))
+ // highest remaining bit in bbm
+ val blsb = bbm ^ (bbm & (bbm - 1))
+ if (alsb == blsb) {
+ val sub1 = a(ai).intersect0(b(bi), level + 5, buffer, offset)
+ if (sub1 ne null) {
+ rs += sub1.size
+ rbm |= alsb
+ buffer(offset) = sub1
+ offset += 1
+ }
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb;
+ ai += 1
+ // clear lowest remaining one bit in bbm and increase the b index
+ bbm &= ~blsb;
+ bi += 1
+ } else if (unsignedCompare(alsb - 1, blsb - 1)) {
+ // alsb is smaller than blsb, or alsb is set and blsb is 0
+ // in any case, alsb is guaranteed to be set here!
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb;
+ ai += 1
+ } else {
+ // blsb is smaller than alsb, or blsb is set and alsb is 0
+ // in any case, blsb is guaranteed to be set here!
+ // clear lowest remaining one bit in bbm and increase the b index
+ bbm &= ~blsb;
+ bi += 1
+ }
+ }
+
+ if (rbm == 0) {
+ // if the result bitmap is empty, the result is the empty set
+ null
+ } else if (rs == size0) {
+ // if the result has the same number of elements as this, it must be identical to this,
+ // so we might as well return this
+ this
+ } else if (rs == that.size0) {
+ // if the result has the same number of elements as that, it must be identical to that,
+ // so we might as well return that
+ that
+ } else {
+ val length = offset - offset0
+ if (length == 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]])
+ buffer(offset0)
+ else {
+ val elems = new Array[HashSet[A]](length)
+ System.arraycopy(buffer, offset0, elems, 0, length)
+ new HashTrieSet[A](rbm, elems, rs)
+ }
+ }
+ case _ => null
+ }
+
+ override private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
+ case that if that eq this =>
+ // shortcut for when that is this
+ // this happens often for nodes deeper in the tree, especially when that and this share a common "heritage"
+ // e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B
+ // then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking
+ // at these nodes!
+ null
+ case that: HashSet1[A] =>
+ removed0(that.key, that.hash, level)
+ case that: HashTrieSet[A] =>
+ val a = this.elems
+ var abm = this.bitmap
+ var ai = 0
+
+ val b = that.elems
+ var bbm = that.bitmap
+ var bi = 0
+
+ // fetch a new temporary array that is guaranteed to be big enough (32 elements)
+ var offset = offset0
+ var rs = 0
+ var rbm = 0
+
+ // loop until there are no more bits in abm
+ while(abm!=0) {
+ // highest remaining bit in abm
+ val alsb = abm ^ (abm & (abm - 1))
+ // highest remaining bit in bbm
+ val blsb = bbm ^ (bbm & (bbm - 1))
+ if (alsb == blsb) {
+ val sub1 = a(ai).diff0(b(bi), level + 5, buffer, offset)
+ if (sub1 ne null) {
+ rs += sub1.size
+ rbm |= alsb
+ buffer(offset) = sub1
+ offset += 1
+ }
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb; ai += 1
+ // clear lowest remaining one bit in bbm and increase the b index
+ bbm &= ~blsb; bi += 1
+ } else if (unsignedCompare(alsb - 1, blsb - 1)) {
+ // alsb is smaller than blsb, or alsb is set and blsb is 0
+ // in any case, alsb is guaranteed to be set here!
+ val sub1 = a(ai)
+ rs += sub1.size
+ rbm |= alsb
+ buffer(offset) = sub1; offset += 1
+ // clear lowest remaining one bit in abm and increase the a index
+ abm &= ~alsb; ai += 1
+ } else {
+ // blsb is smaller than alsb, or blsb is set and alsb is 0
+ // in any case, blsb is guaranteed to be set here!
+ // clear lowest remaining one bit in bbm and increase the b index
+ bbm &= ~blsb; bi += 1
+ }
+ }
+ if (rbm == 0) {
+ null
+ } else if (rs == this.size0) {
+ // if the result has the same number of elements as this, it must be identical to this,
+ // so we might as well return this
+ this
+ } else {
+ val length = offset - offset0
+ if (length == 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]])
+ buffer(offset0)
+ else {
+ val elems = new Array[HashSet[A]](length)
+ System.arraycopy(buffer, offset0, elems, 0, length)
+ new HashTrieSet[A](rbm, elems, rs)
+ }
+ }
+ case that: HashSetCollision1[A] =>
+ // we remove the elements using removed0 so we can use the fact that we know the hash of all elements
+ // to be removed
+ @tailrec def removeAll(s:HashSet[A], r:ListSet[A]) : HashSet[A] =
+ if(r.isEmpty || (s eq null)) s
+ else removeAll(s.removed0(r.head, that.hash, level), r.tail)
+ removeAll(this, that.ks)
+ case _ => this
+ }
+
override def removed0(key: A, hash: Int, level: Int): HashSet[A] = {
val index = (hash >>> level) & 0x1f
val mask = (1 << index)
@@ -353,7 +846,7 @@ object HashSet extends ImmutableSetFactory[HashSet] {
val sub = elems(offset)
val subNew = sub.removed0(key, hash, level + 5)
if (sub eq subNew) this
- else if (subNew.isEmpty) {
+ else if (subNew eq null) {
val bitmapNew = bitmap ^ mask
if (bitmapNew != 0) {
val elemsNew = new Array[HashSet[A]](elems.length - 1)
@@ -367,7 +860,7 @@ object HashSet extends ImmutableSetFactory[HashSet] {
else
new HashTrieSet(bitmapNew, elemsNew, sizeNew)
} else
- HashSet.empty[A]
+ null
} else {
val elemsNew = new Array[HashSet[A]](elems.length)
Array.copy(elems, 0, elemsNew, 0, elems.length)
@@ -526,6 +1019,10 @@ object HashSet extends ImmutableSetFactory[HashSet] {
result
}
+ // unsigned comparison
+ @inline private[this] def unsignedCompare(i: Int, j: Int) =
+ (i < j) ^ (i < 0) ^ (j < 0)
+
@SerialVersionUID(2L) private class SerializationProxy[A,B](@transient private var orig: HashSet[A]) extends Serializable {
private def writeObject(out: java.io.ObjectOutputStream) {
val s = orig.size
View
64 test/files/run/t6253a.scala
@@ -0,0 +1,64 @@
+import scala.collection.immutable.HashSet
+
+object Test extends App {
+
+ var hashCount = 0
+
+ /**
+ * A key that produces lots of hash collisions, to exercise the part of the code that deals with those
+ */
+ case class Collision(value: Int) {
+
+ override def hashCode = {
+ // we do not check hash counts for Collision keys because ListSet.++ uses a mutable hashset internally,
+ // so when we have hash collisions, union will call key.hashCode.
+ // hashCount += 1
+ value / 5
+ }
+ }
+
+ /**
+ * A key that is identical to int other than that it counts hashCode invocations
+ */
+ case class HashCounter(value: Int) {
+
+ override def hashCode = {
+ hashCount += 1
+ value
+ }
+ }
+
+ def testUnion[T](sizes: Seq[Int], offsets: Seq[Double], keyType: String, mkKey: Int => T) {
+ for {
+ i <- sizes
+ o <- offsets
+ } {
+ val e = HashSet.empty[T]
+ val j = (i * o).toInt
+ // create two sets of size i with overlap o
+ val a = e ++ (0 until i).map(mkKey)
+ require(a.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val b = e ++ (j until (i + j)).map(mkKey)
+ require(b.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val as = e ++ (0 until j).map(mkKey)
+ require(as.size == j, s"Building HashSet of size $j failed. Key type $keyType.")
+ val hashCount0 = hashCount
+ val u = a union b
+ require(hashCount == hashCount0, s"key.hashCode should not be called, but has been called ${hashCount - hashCount0} times. Key type $keyType.")
+ require(u == (a union scala.collection.mutable.HashSet(b.toSeq: _*)), s"Operation must still work for other sets!")
+ require(u.size == i + j, s"Expected size ${i+j}. Real size ${u.size}. Key type $keyType.")
+ for (x <- 0 until i + j)
+ require(u.contains(mkKey(x)), s"Key type $keyType. Set (0 until ${i + j}) should contain $x but does not.")
+ val a_as = a union as
+ val as_a = as union a
+ require((a_as eq a) || (a_as eq as), s"No structural sharing in a union as. Key type $keyType, a=(0 until $i) as=(0 until $j)")
+ require((as_a eq a) || (as_a eq as), s"No structural sharing in as union a. Key type $keyType, a=(0 until $i) as=(0 until $j)")
+ }
+ }
+
+ val sizes = Seq(1, 10, 100, 1000, 10000, 100000)
+ val offsets = Seq(0.0, 0.25, 0.5, 0.75, 1.0)
+ testUnion(sizes, offsets, "int", identity[Int])
+ testUnion(sizes, offsets, "hashcounter", HashCounter.apply)
+ testUnion(sizes, offsets, "collision", Collision.apply)
+}
View
62 test/files/run/t6253b.scala
@@ -0,0 +1,62 @@
+import scala.collection.immutable.HashSet
+
+object Test extends App {
+
+ var hashCount = 0
+
+ /**
+ * A key that produces lots of hash collisions, to exercise the part of the code that deals with those
+ */
+ case class Collision(value: Int) {
+
+ override def hashCode = {
+ hashCount += 1
+ value / 5
+ }
+ }
+
+ /**
+ * A key that is identical to int other than that it counts hashCode invocations
+ */
+ case class HashCounter(value: Int) {
+
+ override def hashCode = {
+ hashCount += 1
+ value
+ }
+ }
+
+ def testIntersect[T](sizes: Seq[Int], offsets: Seq[Double], keyType: String, mkKey: Int => T) {
+ for {
+ i <- sizes
+ o <- offsets
+ } {
+ val e = HashSet.empty[T]
+ val j = (i * o).toInt
+ // create two sets of size i with overlap o
+ val a = e ++ (0 until i).map(mkKey)
+ require(a.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val b = e ++ (j until (i + j)).map(mkKey)
+ require(b.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val as = e ++ (0 until j).map(mkKey)
+ require(as.size == j, s"Building HashSet of size $j failed. Key type $keyType.")
+ val hashCount0 = hashCount
+ val u = a intersect b
+ require(hashCount == hashCount0, s"key.hashCode should not be called, but has been called ${hashCount - hashCount0} times. Key type $keyType.")
+ require(u == (a intersect scala.collection.mutable.HashSet(b.toSeq: _*)), s"Operation must still work for other sets!")
+ require(u.size == i - j, s"Expected size ${i + j}. Real size ${u.size}. Key type $keyType.")
+ for (x <- j until i)
+ require(u.contains(mkKey(x)), s"Key type $keyType. Set (0 until ${i + j}) should contain $x but does not.")
+ val a_as = a intersect as
+ val as_a = as intersect a
+ require((a_as eq as) || (a_as eq a), s"No structural sharing in a intersect as. Key type $keyType, a=(0 until $i) as=(0 until $j)")
+ require((as_a eq as) || (as_a eq a), s"No structural sharing in as intersect a. Key type $keyType, a=(0 until $i) as=(0 until $j)")
+ }
+ }
+
+ val sizes = Seq(1, 10, 100, 1000, 10000, 100000)
+ val offsets = Seq(0.0, 0.25, 0.5, 0.75, 1.0)
+ testIntersect(sizes, offsets, "int", identity[Int])
+ testIntersect(sizes, offsets, "hashcounter", HashCounter.apply)
+ testIntersect(sizes, offsets, "collision", Collision.apply)
+}
View
63 test/files/run/t6253c.scala
@@ -0,0 +1,63 @@
+import scala.collection.immutable.HashSet
+
+object Test extends App {
+
+ var hashCount = 0
+
+ /**
+ * A key that produces lots of hash collisions, to exercise the part of the code that deals with those
+ */
+ case class Collision(value: Int) {
+
+ override def hashCode = {
+ hashCount += 1
+ value / 5
+ }
+ }
+
+ /**
+ * A key that is identical to int other than that it counts hashCode invocations
+ */
+ case class HashCounter(value: Int) {
+
+ override def hashCode = {
+ hashCount += 1
+ value
+ }
+ }
+
+ def testDiff[T](sizes: Seq[Int], offsets: Seq[Double], keyType: String, mkKey: Int => T) {
+ for {
+ i <- sizes
+ o <- offsets
+ } {
+ val e = HashSet.empty[T]
+ val j = (i * o).toInt
+ // create two sets of size i with overlap o
+ val a = e ++ (0 until i).map(mkKey)
+ require(a.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val b = e ++ (j until (i + j)).map(mkKey)
+ require(b.size == i, s"Building HashSet of size $i failed. Key type $keyType.")
+ val as = e ++ (0 until j).map(mkKey)
+ require(as.size == j, s"Building HashSet of size $j failed. Key type $keyType.")
+ val hashCount0 = hashCount
+ val u = a diff b
+ require(hashCount == hashCount0, s"key.hashCode should not be called, but has been called ${hashCount - hashCount0} times. Key type $keyType.")
+ require(u == (a diff scala.collection.mutable.HashSet(b.toSeq: _*)), s"Operation must still work for other sets!")
+ require(u.size == j, s"Expected size $j. Real size ${u.size}. Key type $keyType.")
+ for (x <- 0 until j)
+ require(u.contains(mkKey(x)), s"Key type $keyType. Set (0 until ${i + j}) should contain $x but does not.")
+ require((as intersect b).isEmpty)
+ val b_as = b diff as
+ val as_b = as diff b
+ require((b_as eq b) || (b_as eq as), s"No structural sharing in b diff as. Key type $keyType, b=($j until ${i + j}) as=(0 until $j)")
+ require((as_b eq b) || (as_b eq as), s"No structural sharing in as diff b. Key type $keyType, b=($j until ${i + j}) as=(0 until $j)")
+ }
+ }
+
+ val sizes = Seq(1, 10, 100, 1000, 10000, 100000)
+ val offsets = Seq(0.0, 0.25, 0.5, 0.75, 1.0)
+ testDiff(sizes, offsets, "int", identity[Int])
+ testDiff(sizes, offsets, "hashCounter", HashCounter.apply)
+ testDiff(sizes, offsets, "collision", Collision.apply)
+}
Please sign in to comment.
Something went wrong with that request. Please try again.