From 299b96de83ce9ddd33d372b87e1998762e7298bb Mon Sep 17 00:00:00 2001 From: nelson-brochado Date: Mon, 13 Feb 2017 23:03:09 +0100 Subject: [PATCH] Fixed a few things in HashTable and Heap --- ands/ds/HashTable.py | 177 ++++++++++++++++++------------------- ands/ds/Heap.py | 17 ++-- tests/ds/test_HashTable.py | 26 +++--- 3 files changed, 109 insertions(+), 111 deletions(-) diff --git a/ands/ds/HashTable.py b/ands/ds/HashTable.py index 9cb77295..6337adc3 100644 --- a/ands/ds/HashTable.py +++ b/ands/ds/HashTable.py @@ -8,15 +8,15 @@ Created: 01/06/2015 -Updated: 21/02/2016 +Updated: 13/02/2017 # Description -Hash table that re-sizes if no more slot is available. +Hash table that re-sizes if no more slots are available. The process of re-sizing doubles the current capacity of the hash table each time (for now). It uses [linear probing](https://en.wikipedia.org/wiki/Linear_probing) when there's a collision. The hash function uses both the Python's built-in `hash` function and the `%` operator. -You can access and put an item in the hash table by using the same convinient notation +You can access and put an item in the hash table by using the same convenient notation that is used by the Python's standard `dict` class, that is: h = HashTable() @@ -27,21 +27,36 @@ - [http://interactivepython.org/runestone/static/pythonds/SortSearch/Hashing.html](http://interactivepython.org/runestone/static/pythonds/SortSearch/Hashing.html) - [http://stackoverflow.com/questions/279539/best-way-to-remove-an-entry-from-a-hash-table](http://stackoverflow.com/questions/279539/best-way-to-remove-an-entry-from-a-hash-table) +- [http://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-a-list](http://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-a-list) +- [http://stackoverflow.com/questions/1541797/check-for-duplicates-in-a-flat-list](http://stackoverflow.com/questions/1541797/check-for-duplicates-in-a-flat-list) """ +from collections import Counter + from tabulate import tabulate -__all__ = ["HashTable", "has_duplicates", "find_duplicates"] +__all__ = ["HashTable", "has_duplicates_ignore_nones", "find_duplicates_ignore_nones"] class HashTable: def __init__(self, capacity: int = 11): - self.n = capacity - self.keys = [None] * self.n - self.values = [None] * self.n + assert isinstance(capacity, int) + self._n = capacity + self._keys = [None] * self._n + self._values = [None] * self._n - # HASH FUNCTIONS + @property + def size(self): + """Returns the number of pairs key-value in this map.""" + assert len(self._keys) == len(self._values) == self._n + return sum(k is not None for k in self._keys) + + @property + def capacity(self): + """Returns the size of the internal buffers that store the keys and the values.""" + assert len(self._keys) == len(self._values) == self._n + return len(self._keys) def hash_code(self, key, size: int) -> int: """Returns a hash code (an int) between 0 and `size` (excluded). @@ -57,34 +72,32 @@ def rehash(self, old_hash: int, size: int) -> int: we want to have a new hash value from the old hash value.""" return (old_hash + 1) % size - # PUT - - def put(self, key: object, value: object): - """Inserts the pair `key`-`value` in this map. + def put(self, key: object, value: object) -> None: + """Inserts the pair `key`/`value` in this map. - If `key` is `None`, a `TypeError` is raised, - because keys cannot be `None`.""" + If `key` is `None`, a `TypeError` is raised, because keys cannot be `None`.""" if key is None: raise TypeError("key cannot be None.") - assert not has_duplicates(self.keys) - a = self._put(key, value, self.n) - assert not has_duplicates(self.keys) - return a + assert not has_duplicates_ignore_nones(self._keys) + self._put(key, value, self._n) + assert not has_duplicates_ignore_nones(self._keys) - def _put(self, key, value, size): - assert not has_duplicates(self.keys), "precondition in _put" + def _put(self, key: object, value: object, size: int) -> None: + """Helper method of `self.put` and thus it's considered PRIVATE.""" + + assert not has_duplicates_ignore_nones(self._keys) hash_value = self.hash_code(key, size) # No need to allocate new space. - if self.keys[hash_value] is None: - self.keys[hash_value] = key - self.values[hash_value] = value + if self._keys[hash_value] is None: + self._keys[hash_value] = key + self._values[hash_value] = value # If self already contains key, then its value is overridden. - elif self.keys[hash_value] == key: - self.values[hash_value] = value + elif self._keys[hash_value] == key: + self._values[hash_value] = value # Collision: there's already a key-value pair # at the slot dedicated to this key-value pair, @@ -94,8 +107,7 @@ def _put(self, key, value, size): next_slot = self.rehash(hash_value, size) rehashed = False - while self.keys[next_slot] is not None and self.keys[ - next_slot] != key: + while self._keys[next_slot] is not None and self._keys[next_slot] != key: next_slot = self.rehash(next_slot, size) @@ -103,52 +115,48 @@ def _put(self, key, value, size): if next_slot == hash_value: rehashed = True - keys = self.keys - values = self.values + keys = self._keys + values = self._values - new_size = len(self.keys) * 2 + 1 - self.keys = [None] * new_size - self.values = [None] * new_size + new_size = len(self._keys) * 2 + 1 + self._keys = [None] * new_size + self._values = [None] * new_size - # Reashing and putting all elements again + # Rehashing and putting all elements again # Note that the following call to self._put # will never reach this statement # because there will be slots available for k in keys: - v = self._get(k, keys, values, self.n) + v = self._get(k, keys, values, self._n) self._put(k, v, new_size) self._put(key, value, new_size) - self.n = new_size + self._n = new_size # We exited the loop either because # we have found a free slot or a slot containing our key. # (and not after having re-sized the table!) if not rehashed: - if self.keys[next_slot] is None: - self.keys[next_slot] = key - self.values[next_slot] = value + if self._keys[next_slot] is None: + self._keys[next_slot] = key + self._values[next_slot] = value else: - assert self.keys[next_slot] == key - self.values[next_slot] = value - - if has_duplicates(self.keys): - find_duplicates(self.keys) + assert self._keys[next_slot] == key + self._values[next_slot] = value - assert not has_duplicates(self.keys), "postcondition in _put" + assert not has_duplicates_ignore_nones(self._keys) - def get(self, key): + def get(self, key: object) -> object: """Returns the value associated with `key`. - It returns `None` if there's no value associated with `key`. - If `key` is `None`, a `TypeError` is raised, - because keys cannot be None.""" + If `key` is `None`, a `TypeError` is raised, because keys cannot be None.""" if key is None: raise TypeError("key cannot be None.") - return self._get(key, self.keys, self.values, self.n) + return self._get(key, self._keys, self._values, self._n) - def _get(self, key, keys, values, size): - assert not has_duplicates(keys), "precondition in _get" + def _get(self, key: object, keys: list, values: list, size: int) -> object: + """Helper method of `self.get` and thus it's considered PRIVATE.""" + assert not has_duplicates_ignore_nones(keys) hash_value = self.hash_code(key, size) @@ -171,62 +179,53 @@ def _get(self, key, keys, values, size): if position == hash_value: stop = True - assert not has_duplicates(keys), "postcondition _get" + assert not has_duplicates_ignore_nones(keys) return data - def __getitem__(self, key): - return self.get(key) - - def __setitem__(self, key, value): - self.put(key, value) - - def delete(self, key): - """Deletes the mapping (if any) between `key` - and its corresponding associated value. - If there's no mapping, `None` is returned.""" + def delete(self, key) -> object: + """Deletes the mapping between `key` and its corresponding associated value. + If there's no mapping, nothing is done.""" try: - i = self.keys.index(key) - v = self.values[i] - self.keys[i] = self.values[i] = None + i = self._keys.index(key) + v = self._values[i] + self._keys[i] = self._values[i] = None return v except ValueError: - return None + pass - @property - def size(self): - """Returns the number of pairs key-value in this map.""" - assert len(self.keys) == len(self.values) == self.n - return sum(k is not None for k in self.keys) - - @property - def capacity(self): - """Returns the size of the internal buffers that store the keys and the values.""" - assert len(self.keys) == len(self.values) == self.n - return len(self.keys) - - def show(self): - """Pretty-prints (using `tabulate.tabulate()`) this table.""" + def show(self) -> None: + """Prints this hash table in table-like format.""" c = 0 data = [] - for i in range(len(self.keys)): - if self.keys[i] is not None: + for i in range(len(self._keys)): + if self._keys[i] is not None: c += 1 - data.append([c, self.keys[i], self.values[i]]) + data.append([c, self._keys[i], self._values[i]]) print(tabulate(data, headers=["#", "Keys", "Values"], tablefmt="grid")) + def __getitem__(self, key): + return self.get(key) + + def __setitem__(self, key, value): + self.put(key, value) + def __str__(self): - return str([(k, v) - for k, v in zip(self.keys, self.values) if k is not None]) + return str([(k, v) for k, v in zip(self._keys, self._values) if k is not None]) def __repr__(self): return self.__str__() -def has_duplicates(ls): +def has_duplicates_ignore_nones(ls: list) -> bool: + """Returns `True` if `ls` does contain duplicate elements, `False` otherwise. + + None items in `ls` are not considered.""" ls = [item for item in ls if item is not None] return len(ls) != len(set(ls)) -def find_duplicates(ls): - return [item for item, count in collections.Counter( - ls).items() if (count > 1 and item is not None)] +def find_duplicates_ignore_nones(ls: list) -> list: + """"Returns a list with the items from `ls` which appear more than once in the same list. + + None items in `ls` are ignored in this procedure.""" + return [item for item, count in Counter(ls).items() if (count > 1 and item is not None)] diff --git a/ands/ds/Heap.py b/ands/ds/Heap.py index 60d27f77..98f6d4df 100755 --- a/ands/ds/Heap.py +++ b/ands/ds/Heap.py @@ -8,7 +8,7 @@ Created: 01/07/2015 -Updated: 05/02/2017 +Updated: 13/02/2017 # Description @@ -21,12 +21,11 @@ - Slides by prof. A. Carzaniga - Chapter 13 of [Introduction to Algorithms (3rd ed.)](https://mitpress.mit.edu/books/introduction-algorithms) by CLRS - [NotImplementedError](https://docs.python.org/3/library/exceptions.html#NotImplementedError) - +- [How do I check if an object is an instance of a given class or of a subclass of it?](http://effbot.org/pyfaq/how-do-i-check-if-an-object-is-an-instance-of-a-given-class-or-of-a-subclass-of-it.htm) """ import io import math -from collections import Iterable __all__ = ["BinaryHeap", "HeapNode", "build_pretty_binary_heap"] @@ -331,7 +330,7 @@ def is_on_even_level(self, i: int) -> bool: return int(math.log2(i + 1) % 2) == 0 def is_on_odd_level(self, i: int) -> bool: - """Returns `True` (`False`) if `self.is_on_even_level(i)` returns `False` (`True`).""" + """Returns `True` when self.is_on_even_level(i) returns `False`, and vice-versa.""" return not self.is_on_even_level(i) def __str__(self) -> str: @@ -342,13 +341,13 @@ def __repr__(self) -> str: @staticmethod def _create_list_of_heap_nodes(ls: list) -> list: - """Creates and returns a list of `HeapNode` - objects with the objects in `ls`. + """Creates and returns a list of `HeapNode` objects with the objects in `ls`. **Time Complexity:** O(n).""" nodes = [] - for _, x in enumerate(ls): + for x in ls: # x represents also its priority. + # Check if x is either an int or a float. if isinstance(x, (int, float)): nodes.append(HeapNode(x)) else: @@ -376,8 +375,8 @@ def build_pretty_binary_heap(heap: list, total_width=36, fill=" ") -> str: To change the length of the line under the heap, you can simply change the line_length variable.""" - if not isinstance(heap, Iterable): - raise TypeError("heap must be an iterable object") + if not isinstance(heap, list): + raise TypeError("heap must be an list object") if len(heap) == 0: return "Nothing to print: heap is empty." diff --git a/tests/ds/test_HashTable.py b/tests/ds/test_HashTable.py index a90c3987..5867548c 100755 --- a/tests/ds/test_HashTable.py +++ b/tests/ds/test_HashTable.py @@ -18,7 +18,7 @@ import unittest from random import sample, randint, uniform, choice -from ands.ds.HashTable import HashTable, has_duplicates +from ands.ds.HashTable import HashTable, has_duplicates_ignore_nones def gen_rand_str(size): @@ -53,7 +53,7 @@ def find_all_indices(e, ls): p = a a = sample(a, len(a)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) for i, num in enumerate(a): try: @@ -61,7 +61,7 @@ def find_all_indices(e, ls): except ValueError: find_all_indices(num, a).index(t.get(num)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) def test_put_and_get_1(self): """Testing that errors are raised.""" @@ -88,12 +88,12 @@ def test_put_and_get_2(self, n=100): t.put(letter, i + j) self.assertEqual(t.size, len(ls)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) for i, letter in enumerate(ls): self.assertEqual(t.get(letter), ls.index(letter) + n) self.assertEqual(t.size, len(ls)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) def test_put_and_get_3(self, n=100): """Testing insertion of permutations of the same items @@ -121,12 +121,12 @@ def test_put_and_get_3(self, n=100): a = sample(a, len(a)) self.assertEqual(t.size, len(a)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) for i, letter in enumerate(a): self.assertEqual(t.get(letter), p.index(letter) + n) self.assertEqual(t.size, len(a)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) def test_put_and_get_ints(self): self.put_and_get_numbers() @@ -157,12 +157,12 @@ def test_put_and_get_strings(self, n=100): a = sample(a, len(a)) self.assertEqual(t.size, len(a)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) for i, s in enumerate(a): self.assertEqual(t.get(s), p.index(s) + n) self.assertEqual(t.size, len(a)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) def test_put_and_get_non_hashable_type(self): t = HashTable() @@ -189,21 +189,21 @@ def test_delete_letters(self, n=100): t[letter] = i + j self.assertEqual(t.size, len(ls)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) for i, letter in enumerate(ls): self.assertEqual(t[letter], ls.index(letter) + n) self.assertEqual(t.size, len(ls)) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) for i, letter in enumerate(ls): v = t.delete(letter) self.assertIsNotNone(v) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) self.assertEqual(t.size, len(ls) - (i + 1)) self.assertEqual(t.size, 0) - self.assertFalse(has_duplicates(t.keys)) + self.assertFalse(has_duplicates_ignore_nones(t._keys)) def test_empty_hash_table_capacity(self): h = HashTable()