Skip to content

Commit

Permalink
Fixed a few things in HashTable and Heap
Browse files Browse the repository at this point in the history
  • Loading branch information
nbro committed Feb 13, 2017
1 parent 61712ab commit 299b96d
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 111 deletions.
177 changes: 88 additions & 89 deletions ands/ds/HashTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
Created: 01/06/2015
Updated: 21/02/2016
Updated: 13/02/2017
# Description
Hash table that re-sizes if no more slot is available.
Hash table that re-sizes if no more slots are available.
The process of re-sizing doubles the current capacity of the hash table each time (for now).
It uses [linear probing](https://en.wikipedia.org/wiki/Linear_probing) when there's a collision.
The hash function uses both the Python's built-in `hash` function and the `%` operator.
You can access and put an item in the hash table by using the same convinient notation
You can access and put an item in the hash table by using the same convenient notation
that is used by the Python's standard `dict` class, that is:
h = HashTable()
Expand All @@ -27,21 +27,36 @@
- [http://interactivepython.org/runestone/static/pythonds/SortSearch/Hashing.html](http://interactivepython.org/runestone/static/pythonds/SortSearch/Hashing.html)
- [http://stackoverflow.com/questions/279539/best-way-to-remove-an-entry-from-a-hash-table](http://stackoverflow.com/questions/279539/best-way-to-remove-an-entry-from-a-hash-table)
- [http://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-a-list](http://stackoverflow.com/questions/9835762/find-and-list-duplicates-in-a-list)
- [http://stackoverflow.com/questions/1541797/check-for-duplicates-in-a-flat-list](http://stackoverflow.com/questions/1541797/check-for-duplicates-in-a-flat-list)
"""

from collections import Counter

from tabulate import tabulate

__all__ = ["HashTable", "has_duplicates", "find_duplicates"]
__all__ = ["HashTable", "has_duplicates_ignore_nones", "find_duplicates_ignore_nones"]


class HashTable:
def __init__(self, capacity: int = 11):
self.n = capacity
self.keys = [None] * self.n
self.values = [None] * self.n
assert isinstance(capacity, int)
self._n = capacity
self._keys = [None] * self._n
self._values = [None] * self._n

# HASH FUNCTIONS
@property
def size(self):
"""Returns the number of pairs key-value in this map."""
assert len(self._keys) == len(self._values) == self._n
return sum(k is not None for k in self._keys)

@property
def capacity(self):
"""Returns the size of the internal buffers that store the keys and the values."""
assert len(self._keys) == len(self._values) == self._n
return len(self._keys)

def hash_code(self, key, size: int) -> int:
"""Returns a hash code (an int) between 0 and `size` (excluded).
Expand All @@ -57,34 +72,32 @@ def rehash(self, old_hash: int, size: int) -> int:
we want to have a new hash value from the old hash value."""
return (old_hash + 1) % size

# PUT

def put(self, key: object, value: object):
"""Inserts the pair `key`-`value` in this map.
def put(self, key: object, value: object) -> None:
"""Inserts the pair `key`/`value` in this map.
If `key` is `None`, a `TypeError` is raised,
because keys cannot be `None`."""
If `key` is `None`, a `TypeError` is raised, because keys cannot be `None`."""
if key is None:
raise TypeError("key cannot be None.")

assert not has_duplicates(self.keys)
a = self._put(key, value, self.n)
assert not has_duplicates(self.keys)
return a
assert not has_duplicates_ignore_nones(self._keys)
self._put(key, value, self._n)
assert not has_duplicates_ignore_nones(self._keys)

def _put(self, key, value, size):
assert not has_duplicates(self.keys), "precondition in _put"
def _put(self, key: object, value: object, size: int) -> None:
"""Helper method of `self.put` and thus it's considered PRIVATE."""

assert not has_duplicates_ignore_nones(self._keys)

hash_value = self.hash_code(key, size)

# No need to allocate new space.
if self.keys[hash_value] is None:
self.keys[hash_value] = key
self.values[hash_value] = value
if self._keys[hash_value] is None:
self._keys[hash_value] = key
self._values[hash_value] = value

# If self already contains key, then its value is overridden.
elif self.keys[hash_value] == key:
self.values[hash_value] = value
elif self._keys[hash_value] == key:
self._values[hash_value] = value

# Collision: there's already a key-value pair
# at the slot dedicated to this key-value pair,
Expand All @@ -94,61 +107,56 @@ def _put(self, key, value, size):
next_slot = self.rehash(hash_value, size)
rehashed = False

while self.keys[next_slot] is not None and self.keys[
next_slot] != key:
while self._keys[next_slot] is not None and self._keys[next_slot] != key:

next_slot = self.rehash(next_slot, size)

# Allocate new buffer of length len(self.keys)*2 + 1
if next_slot == hash_value:
rehashed = True

keys = self.keys
values = self.values
keys = self._keys
values = self._values

new_size = len(self.keys) * 2 + 1
self.keys = [None] * new_size
self.values = [None] * new_size
new_size = len(self._keys) * 2 + 1
self._keys = [None] * new_size
self._values = [None] * new_size

# Reashing and putting all elements again
# Rehashing and putting all elements again
# Note that the following call to self._put
# will never reach this statement
# because there will be slots available
for k in keys:
v = self._get(k, keys, values, self.n)
v = self._get(k, keys, values, self._n)
self._put(k, v, new_size)

self._put(key, value, new_size)
self.n = new_size
self._n = new_size

# We exited the loop either because
# we have found a free slot or a slot containing our key.
# (and not after having re-sized the table!)
if not rehashed:
if self.keys[next_slot] is None:
self.keys[next_slot] = key
self.values[next_slot] = value
if self._keys[next_slot] is None:
self._keys[next_slot] = key
self._values[next_slot] = value
else:
assert self.keys[next_slot] == key
self.values[next_slot] = value

if has_duplicates(self.keys):
find_duplicates(self.keys)
assert self._keys[next_slot] == key
self._values[next_slot] = value

assert not has_duplicates(self.keys), "postcondition in _put"
assert not has_duplicates_ignore_nones(self._keys)

def get(self, key):
def get(self, key: object) -> object:
"""Returns the value associated with `key`.
It returns `None` if there's no value associated with `key`.
If `key` is `None`, a `TypeError` is raised,
because keys cannot be None."""
If `key` is `None`, a `TypeError` is raised, because keys cannot be None."""
if key is None:
raise TypeError("key cannot be None.")
return self._get(key, self.keys, self.values, self.n)
return self._get(key, self._keys, self._values, self._n)

def _get(self, key, keys, values, size):
assert not has_duplicates(keys), "precondition in _get"
def _get(self, key: object, keys: list, values: list, size: int) -> object:
"""Helper method of `self.get` and thus it's considered PRIVATE."""
assert not has_duplicates_ignore_nones(keys)

hash_value = self.hash_code(key, size)

Expand All @@ -171,62 +179,53 @@ def _get(self, key, keys, values, size):
if position == hash_value:
stop = True

assert not has_duplicates(keys), "postcondition _get"
assert not has_duplicates_ignore_nones(keys)
return data

def __getitem__(self, key):
return self.get(key)

def __setitem__(self, key, value):
self.put(key, value)

def delete(self, key):
"""Deletes the mapping (if any) between `key`
and its corresponding associated value.
If there's no mapping, `None` is returned."""
def delete(self, key) -> object:
"""Deletes the mapping between `key` and its corresponding associated value.
If there's no mapping, nothing is done."""
try:
i = self.keys.index(key)
v = self.values[i]
self.keys[i] = self.values[i] = None
i = self._keys.index(key)
v = self._values[i]
self._keys[i] = self._values[i] = None
return v
except ValueError:
return None
pass

@property
def size(self):
"""Returns the number of pairs key-value in this map."""
assert len(self.keys) == len(self.values) == self.n
return sum(k is not None for k in self.keys)

@property
def capacity(self):
"""Returns the size of the internal buffers that store the keys and the values."""
assert len(self.keys) == len(self.values) == self.n
return len(self.keys)

def show(self):
"""Pretty-prints (using `tabulate.tabulate()`) this table."""
def show(self) -> None:
"""Prints this hash table in table-like format."""
c = 0
data = []
for i in range(len(self.keys)):
if self.keys[i] is not None:
for i in range(len(self._keys)):
if self._keys[i] is not None:
c += 1
data.append([c, self.keys[i], self.values[i]])
data.append([c, self._keys[i], self._values[i]])
print(tabulate(data, headers=["#", "Keys", "Values"], tablefmt="grid"))

def __getitem__(self, key):
return self.get(key)

def __setitem__(self, key, value):
self.put(key, value)

def __str__(self):
return str([(k, v)
for k, v in zip(self.keys, self.values) if k is not None])
return str([(k, v) for k, v in zip(self._keys, self._values) if k is not None])

def __repr__(self):
return self.__str__()


def has_duplicates(ls):
def has_duplicates_ignore_nones(ls: list) -> bool:
"""Returns `True` if `ls` does contain duplicate elements, `False` otherwise.
None items in `ls` are not considered."""
ls = [item for item in ls if item is not None]
return len(ls) != len(set(ls))


def find_duplicates(ls):
return [item for item, count in collections.Counter(
ls).items() if (count > 1 and item is not None)]
def find_duplicates_ignore_nones(ls: list) -> list:
""""Returns a list with the items from `ls` which appear more than once in the same list.
None items in `ls` are ignored in this procedure."""
return [item for item, count in Counter(ls).items() if (count > 1 and item is not None)]
17 changes: 8 additions & 9 deletions ands/ds/Heap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
Created: 01/07/2015
Updated: 05/02/2017
Updated: 13/02/2017
# Description
Expand All @@ -21,12 +21,11 @@
- Slides by prof. A. Carzaniga
- Chapter 13 of [Introduction to Algorithms (3rd ed.)](https://mitpress.mit.edu/books/introduction-algorithms) by CLRS
- [NotImplementedError](https://docs.python.org/3/library/exceptions.html#NotImplementedError)
- [How do I check if an object is an instance of a given class or of a subclass of it?](http://effbot.org/pyfaq/how-do-i-check-if-an-object-is-an-instance-of-a-given-class-or-of-a-subclass-of-it.htm)
"""

import io
import math
from collections import Iterable

__all__ = ["BinaryHeap", "HeapNode", "build_pretty_binary_heap"]

Expand Down Expand Up @@ -331,7 +330,7 @@ def is_on_even_level(self, i: int) -> bool:
return int(math.log2(i + 1) % 2) == 0

def is_on_odd_level(self, i: int) -> bool:
"""Returns `True` (`False`) if `self.is_on_even_level(i)` returns `False` (`True`)."""
"""Returns `True` when self.is_on_even_level(i) returns `False`, and vice-versa."""
return not self.is_on_even_level(i)

def __str__(self) -> str:
Expand All @@ -342,13 +341,13 @@ def __repr__(self) -> str:

@staticmethod
def _create_list_of_heap_nodes(ls: list) -> list:
"""Creates and returns a list of `HeapNode`
objects with the objects in `ls`.
"""Creates and returns a list of `HeapNode` objects with the objects in `ls`.
**Time Complexity:** O(n)."""
nodes = []
for _, x in enumerate(ls):
for x in ls:
# x represents also its priority.
# Check if x is either an int or a float.
if isinstance(x, (int, float)):
nodes.append(HeapNode(x))
else:
Expand Down Expand Up @@ -376,8 +375,8 @@ def build_pretty_binary_heap(heap: list, total_width=36, fill=" ") -> str:
To change the length of the line under the heap,
you can simply change the line_length variable."""
if not isinstance(heap, Iterable):
raise TypeError("heap must be an iterable object")
if not isinstance(heap, list):
raise TypeError("heap must be an list object")
if len(heap) == 0:
return "Nothing to print: heap is empty."

Expand Down

0 comments on commit 299b96d

Please sign in to comment.