From af64e304dbe6b27aaf82098e15711afd97f36226 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 25 Nov 2022 14:48:57 +0100 Subject: [PATCH] Delete HashTable --- .../scala/collection/mutable/HashTable.scala | 417 ------------------ .../collection/mutable/OpenHashMap.scala | 6 +- 2 files changed, 4 insertions(+), 419 deletions(-) delete mode 100644 src/library/scala/collection/mutable/HashTable.scala diff --git a/src/library/scala/collection/mutable/HashTable.scala b/src/library/scala/collection/mutable/HashTable.scala deleted file mode 100644 index dc559e86ce98..000000000000 --- a/src/library/scala/collection/mutable/HashTable.scala +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Scala (https://www.scala-lang.org) - * - * Copyright EPFL and Lightbend, Inc. - * - * Licensed under Apache License 2.0 - * (http://www.apache.org/licenses/LICENSE-2.0). - * - * See the NOTICE file distributed with this work for - * additional information regarding copyright ownership. - */ - -package scala -package collection.mutable - -import collection.{AbstractIterator, Iterator} - -import java.lang.Integer.{numberOfLeadingZeros, rotateRight} -import scala.util.hashing.byteswap32 - -import java.lang.Integer - -/** This class can be used to construct data structures that are based - * on hashtables. Class `HashTable[A]` implements a hashtable - * that maps keys of type `A` to values of the fully abstract - * member type `Entry`. Classes that make use of `HashTable` - * have to provide an implementation for `Entry`. - * - * There are mainly two parameters that affect the performance of a hashtable: - * the initial size and the load factor. The size - * refers to the number of buckets in the hashtable, and the load - * factor is a measure of how full the hashtable is allowed to get before - * its size is automatically doubled. Both parameters may be changed by - * overriding the corresponding values in class `HashTable`. - * - * @tparam A type of the elements contained in this hash table. - */ -// Was an abstract class, but to simplify the upgrade of the parallel collections I’ve made it a trait -private[collection] /*abstract class*/ trait HashTable[A, B, Entry >: Null <: HashEntry[A, Entry]] extends HashTable.HashUtils[A] { - // Replacing Entry type parameter by abstract type member here allows to not expose to public - // implementation-specific entry classes such as `DefaultEntry` or `LinkedEntry`. - // However, I'm afraid it's too late now for such breaking change. - import HashTable._ - - protected var _loadFactor = defaultLoadFactor - - /** The actual hash table. - */ - protected[collection] var table: Array[HashEntry[A, Entry]] = new Array(initialCapacity) - - /** The number of mappings contained in this hash table. - */ - protected[collection] var tableSize: Int = 0 - - final def size: Int = tableSize - - /** The next size value at which to resize (capacity * load factor). - */ - protected[collection] var threshold: Int = initialThreshold(_loadFactor) - - /** The array keeping track of the number of elements in 32 element blocks. - */ - protected var sizemap: Array[Int] = null - - protected var seedvalue: Int = tableSizeSeed - - protected def tableSizeSeed = Integer.bitCount(table.length - 1) - - /** The initial size of the hash table. - */ - protected def initialSize: Int = 16 - - /** The initial threshold. - */ - private def initialThreshold(_loadFactor: Int): Int = newThreshold(_loadFactor, initialCapacity) - - private def initialCapacity = capacity(initialSize) - - private def lastPopulatedIndex = { - var idx = table.length - 1 - while (table(idx) == null && idx > 0) - idx -= 1 - - idx - } - - /** - * Initializes the collection from the input stream. `readEntry` will be called for each - * entry to be read from the input stream. - */ - private[collection] def init(in: java.io.ObjectInputStream, readEntry: => Entry): Unit = { - _loadFactor = in.readInt() - assert(_loadFactor > 0) - - val size = in.readInt() - tableSize = 0 - assert(size >= 0) - - seedvalue = in.readInt() - - val smDefined = in.readBoolean() - - table = new Array(capacity(sizeForThreshold(_loadFactor, size))) - threshold = newThreshold(_loadFactor, table.length) - - if (smDefined) sizeMapInit(table.length) else sizemap = null - - var index = 0 - while (index < size) { - addEntry(readEntry) - index += 1 - } - } - - /** - * Serializes the collection to the output stream by saving the load factor, collection - * size and collection entries. `writeEntry` is responsible for writing an entry to the stream. - * - * `foreachEntry` determines the order in which the key/value pairs are saved to the stream. To - * deserialize, `init` should be used. - */ - private[collection] def serializeTo(out: java.io.ObjectOutputStream, writeEntry: Entry => Unit): Unit = { - out.writeInt(_loadFactor) - out.writeInt(tableSize) - out.writeInt(seedvalue) - out.writeBoolean(isSizeMapDefined) - - foreachEntry(writeEntry) - } - - /** Find entry with given key in table, null if not found. - */ - final def findEntry(key: A): Entry = - findEntry0(key, index(elemHashCode(key))) - - protected[collection] final def findEntry0(key: A, h: Int): Entry = { - var e = table(h).asInstanceOf[Entry] - while (e != null && !elemEquals(e.key, key)) e = e.next - e - } - - /** Add entry to table - * pre: no entry with same key exists - */ - protected[collection] final def addEntry(e: Entry): Unit = { - addEntry0(e, index(elemHashCode(e.key))) - } - - protected[collection] final def addEntry0(e: Entry, h: Int): Unit = { - e.next = table(h).asInstanceOf[Entry] - table(h) = e - tableSize = tableSize + 1 - nnSizeMapAdd(h) - if (tableSize > threshold) - resize(2 * table.length) - } - - /** Find entry with given key in table, or add new one if not found. - * May be somewhat faster then `findEntry`/`addEntry` pair as it - * computes entry's hash index only once. - * Returns entry found in table or null. - * New entries are created by calling `createNewEntry` method. - */ - def findOrAddEntry(key: A, value: B): Entry = { - val h = index(elemHashCode(key)) - val e = findEntry0(key, h) - if (e ne null) e else { addEntry0(createNewEntry(key, value), h); null } - } - - /** Creates new entry to be immediately inserted into the hashtable. - * This method is guaranteed to be called only once and in case that the entry - * will be added. In other words, an implementation may be side-effecting. - */ - def createNewEntry(key: A, value: B): Entry - - /** Remove entry from table if present. - */ - final def removeEntry(key: A) : Entry = { - removeEntry0(key, index(elemHashCode(key))) - } - /** Remove entry from table if present. - */ - private[collection] final def removeEntry0(key: A, h: Int) : Entry = { - var e = table(h).asInstanceOf[Entry] - if (e != null) { - if (elemEquals(e.key, key)) { - table(h) = e.next - tableSize = tableSize - 1 - nnSizeMapRemove(h) - e.next = null - return e - } else { - var e1 = e.next - while (e1 != null && !elemEquals(e1.key, key)) { - e = e1 - e1 = e1.next - } - if (e1 != null) { - e.next = e1.next - tableSize = tableSize - 1 - nnSizeMapRemove(h) - e1.next = null - return e1 - } - } - } - null - } - - /** An iterator returning all entries. - */ - def entriesIterator: Iterator[Entry] = new AbstractIterator[Entry] { - val iterTable = table - var idx = lastPopulatedIndex - var es = iterTable(idx) - - def hasNext = es != null - def next() = { - val res = es - es = es.next - while (es == null && idx > 0) { - idx = idx - 1 - es = iterTable(idx) - } - res.asInstanceOf[Entry] - } - } - - /** Avoid iterator for a 2x faster traversal. */ - def foreachEntry[U](f: Entry => U): Unit = { - val iterTable = table - var idx = lastPopulatedIndex - var es = iterTable(idx) - - while (es != null) { - val next = es.next // Cache next in case f removes es. - f(es.asInstanceOf[Entry]) - es = next - - while (es == null && idx > 0) { - idx -= 1 - es = iterTable(idx) - } - } - } - - /** Remove all entries from table - */ - def clearTable(): Unit = { - var i = table.length - 1 - while (i >= 0) { table(i) = null; i = i - 1 } - tableSize = 0 - nnSizeMapReset(0) - } - - private def resize(newSize: Int): Unit = { - val oldTable = table - table = new Array(newSize) - nnSizeMapReset(table.length) - var i = oldTable.length - 1 - while (i >= 0) { - var e = oldTable(i) - while (e != null) { - val h = index(elemHashCode(e.key)) - val e1 = e.next - e.next = table(h).asInstanceOf[Entry] - table(h) = e - e = e1 - nnSizeMapAdd(h) - } - i = i - 1 - } - threshold = newThreshold(_loadFactor, newSize) - } - - /* Size map handling code */ - - /* - * The following three sizeMap* functions (Add, Remove, Reset) - * are used to update the size map of the hash table. - * - * The size map logically divides the hash table into `sizeMapBucketSize` element buckets - * by keeping an integer entry for each such bucket. Each integer entry simply denotes - * the number of elements in the corresponding bucket. - * Best understood through an example, see: - * table = [/, 1, /, 6, 90, /, -3, 5] (8 entries) - * sizemap = [ 2 | 3 ] (2 entries) - * where sizeMapBucketSize == 4. - * - * By default the size map is not initialized, so these methods don't do anything, thus, - * their impact on hash table performance is negligible. However, if the hash table - * is converted into a parallel hash table, the size map is initialized, as it will be needed - * there. - */ - protected final def nnSizeMapAdd(h: Int) = if (sizemap ne null) { - sizemap(h >> sizeMapBucketBitSize) += 1 - } - - protected final def nnSizeMapRemove(h: Int) = if (sizemap ne null) { - sizemap(h >> sizeMapBucketBitSize) -= 1 - } - - protected final def nnSizeMapReset(tableLength: Int) = if (sizemap ne null) { - val nsize = calcSizeMapSize(tableLength) - if (sizemap.length != nsize) sizemap = new Array[Int](nsize) - else java.util.Arrays.fill(sizemap, 0) - } - - private[collection] final def totalSizeMapBuckets = if (sizeMapBucketSize < table.length) 1 else table.length / sizeMapBucketSize - - protected final def calcSizeMapSize(tableLength: Int) = (tableLength >> sizeMapBucketBitSize) + 1 - - // discards the previous sizemap and only allocates a new one - protected def sizeMapInit(tableLength: Int): Unit = { - sizemap = new Array[Int](calcSizeMapSize(tableLength)) - } - - // discards the previous sizemap and populates the new one - protected final def sizeMapInitAndRebuild() = { - sizeMapInit(table.length) - - // go through the buckets, count elements - var tableidx = 0 - var bucketidx = 0 - val tbl = table - var tableuntil = 0 - if (tbl.length < sizeMapBucketSize) tableuntil = tbl.length else tableuntil = sizeMapBucketSize - val totalbuckets = totalSizeMapBuckets - while (bucketidx < totalbuckets) { - var currbucketsize = 0 - while (tableidx < tableuntil) { - var e = tbl(tableidx) - while (e ne null) { - currbucketsize += 1 - e = e.next - } - tableidx += 1 - } - sizemap(bucketidx) = currbucketsize - tableuntil += sizeMapBucketSize - bucketidx += 1 - } - } - - private[collection] def printSizeMap() = { - println(sizemap.to(collection.immutable.List)) - } - - protected final def sizeMapDisable() = sizemap = null - - protected final def isSizeMapDefined = sizemap ne null - - // override to automatically initialize the size map - protected def alwaysInitSizeMap = false - - /* End of size map handling code */ - - protected def elemEquals(key1: A, key2: A): Boolean = (key1 == key2) - - /** - * Note: we take the most significant bits of the hashcode, not the lower ones - * this is of crucial importance when populating the table in parallel - */ - protected[collection] final def index(hcode: Int): Int = { - val ones = table.length - 1 - val exponent = Integer.numberOfLeadingZeros(ones) - (improve(hcode, seedvalue) >>> exponent) & ones - } -} - -private[collection] object HashTable { - /** The load factor for the hash table (in 0.001 step). - */ - private[collection] final def defaultLoadFactor: Int = 750 // corresponds to 75% - private[collection] final def loadFactorDenum = 1000 // should be loadFactorDenom, but changing that isn't binary compatible - - private[collection] final def newThreshold(_loadFactor: Int, size: Int) = ((size.toLong * _loadFactor) / loadFactorDenum).toInt - - private[collection] final def sizeForThreshold(_loadFactor: Int, thr: Int) = ((thr.toLong * loadFactorDenum) / _loadFactor).toInt - - private[collection] final def capacity(expectedSize: Int) = nextPositivePowerOfTwo(expectedSize) - - trait HashUtils[KeyType] { - protected final def sizeMapBucketBitSize = 5 - // so that: - protected final def sizeMapBucketSize = 1 << sizeMapBucketBitSize - - protected[collection] def elemHashCode(key: KeyType) = key.## - - /** - * Defer to a high-quality hash in [[scala.util.hashing]]. - * The goal is to distribute across bins as well as possible even if a hash code has low entropy at some bits. - *

- * OLD VERSION - quick, but bad for sequence 0-10000 - little entropy in higher bits - since 2003 - * {{{ - * var h: Int = hcode + ~(hcode << 9) - * h = h ^ (h >>> 14) - * h = h + (h << 4) - * h ^ (h >>> 10) - * }}} - * the rest of the computation is due to SI-5293 - */ - protected final def improve(hcode: Int, seed: Int): Int = rotateRight(byteswap32(hcode), seed) - } - - /** - * Returns a power of two >= `target`. - */ - private[collection] def nextPositivePowerOfTwo(target: Int): Int = 1 << -numberOfLeadingZeros(target - 1) -} - -/** Class used internally. - */ -private[collection] trait HashEntry[A, E <: HashEntry[A, E]] { - val key: A - var next: E = _ -} diff --git a/src/library/scala/collection/mutable/OpenHashMap.scala b/src/library/scala/collection/mutable/OpenHashMap.scala index 481317337663..22e99d4650d1 100644 --- a/src/library/scala/collection/mutable/OpenHashMap.scala +++ b/src/library/scala/collection/mutable/OpenHashMap.scala @@ -13,8 +13,8 @@ package scala.collection package mutable +import java.lang.Integer.numberOfLeadingZeros import java.util.ConcurrentModificationException - import scala.collection.generic.DefaultSerializable /** @@ -41,6 +41,8 @@ object OpenHashMap extends MapFactory[OpenHashMap] { final private class OpenEntry[Key, Value](var key: Key, var hash: Int, var value: Option[Value]) + + private[mutable] def nextPositivePowerOfTwo(target: Int): Int = 1 << -numberOfLeadingZeros(target - 1) } /** A mutable hash map based on an open addressing method. The precise scheme is @@ -75,7 +77,7 @@ class OpenHashMap[Key, Value](initialSize : Int) override def mapFactory: MapFactory[OpenHashMap] = OpenHashMap - private[this] val actualInitialSize = HashTable.nextPositivePowerOfTwo(initialSize) + private[this] val actualInitialSize = OpenHashMap.nextPositivePowerOfTwo(initialSize) private[this] var mask = actualInitialSize - 1