diff --git a/.gitignore b/.gitignore index 01767ca9..9e25aea0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,26 @@ -# FILES +# Files + ands/algorithms/greedy/huffman.py ands/ds/Graph.py -WARNINGS +/ands/ds/MinPriorityQueue.py *.egg *.py[cod] *$py.class .DS_Store -# DIRECTORIES -ands/ds/TST/ -ands/algorithms/graphs -ands/algorithms/unclassified -ands/algorithms/numerical -venv/ +# Directories + +ands/algorithms/graphs/ +ands/algorithms/unclassified/ +ands/algorithms/numerical/ +/ands/algorithms/math/combinatorics/ +/ands/algorithms/math/ + +.idea/ _ignore/ +venv/ env/ *.egg-info/ __pycache__/ -.idea/ -/ands/algorithms/math/combinatorics/ -/ands/algorithms/math/ -/ands/ds/MinPriorityQueue.py diff --git a/ands/ds/TST.py b/ands/ds/TST.py index 7c8a6bb1..7b55a775 100644 --- a/ands/ds/TST.py +++ b/ands/ds/TST.py @@ -17,9 +17,7 @@ An advantage compared to hash maps is that ternary search tries support sorting, but the _keys_ of a ternary-search trie can only be _strings_, -whereas a hash map supports any kind of hashable key. - -This TST should NOT allow empty strings to be inserted. +whereas a hash map supports any kind of hashable keys. ## References @@ -87,27 +85,47 @@ def has_children(self) -> bool: class TST: - def __init__(self, root=None): - self.n = 0 # number of key/values pairs - self.root = root + """Methods or fields that start with an underscore _ are considered private, + so they should not be access and never modified from a client of this class. + + This TST does not allow (through public methods) empty strings to be inserted. + + In general the way the ternary search tree looks like + depends highly on the order of insertion of the keys, + that is, inserting the same keys but in different orders + produces internally a different structure or shape of the TST.""" + + def __init__(self): + self._n = 0 # number of key/values pairs + self._root = None def size(self): - return self.n + return self._n - def is_root(self, u): - return self.root == u + def is_empty(self): + return self._n == 0 + + def _is_root(self, u: TSTNode): + return self._root == u def insert(self, key: str, value: object): """Inserts the key-value pair into the symbol table, overwriting the old value with the new value, if the key is already in the symbol table.""" + + # Preconditions + assert self._n >= 0 and (self._root if self._n > 0 else True) + if not isinstance(key, str): raise TypeError("key must be an instance of type str.") if not key: raise ValueError("key must be a string of length >= 1.") if value is None: - raise TypeError("value cannot be None.") - self.root = self._insert(self.root, key, value, 0) + raise ValueError("value cannot be None.") + self._root = self._insert(self._root, key, value, 0) + + # Postconditions + assert self._n >= 0 and (self._root if self._n > 0 else True) def _insert(self, node: TSTNode, key: str, value: object, index: int): """Inserts key into self starting from node. @@ -130,71 +148,55 @@ def _insert(self, node: TSTNode, key: str, value: object, index: int): node.mid.parent = node else: if not node.value: - self.n += 1 + self._n += 1 node.value = value return node - def delete(self, key: str) -> TSTNode: - """Deletes and returns the value associated with key in this TST. - This operation does not change the structure of this TST, - but only merely makes it "forget" that there's a map with key `key`.""" + def search(self, key: str): + """Iterative alternative to `self.search_recursively`.""" if not isinstance(key, str): raise TypeError("key must be an instance of type str.") if not key: raise ValueError("key must be a string of length >= 1.") - return self._delete(self.root, key) - def _delete(self, node: TSTNode, key: str): - """Implementation based on the non-recursive implementation of _search.""" + result = self.search_recursively(key) - def _delete_fix(u): - while u and not u.has_children(): - if self.is_root(u): - assert u.parent is None - self.root = None - break - if u.is_left_child(): - u.parent.left = None - elif u.is_right_child(): - u.parent.right = None - else: - u.parent.mid = None - p = u.parent - u.parent = None - u = p + # Postcondition: self.search_recursively and self.search_iteratively + # should always produce the same output given the same input key. + assert result == self.search_iteratively(key) - if node is None: - return None - - for i in range(len(key) - 1): - while node and key[i] != node.key: - if key[i] < node.key: - node = node.left - else: - node = node.right - if node is None: # unsuccessful search - return None - else: - # arriving here only if exited from the while loop - # because the condition key[i] != node.key was false - node = node.mid - if not node or node.key != key[-1]: - return None - else: - result = node.value - node.value = None - self.n -= 1 - _delete_fix(node) - return result + return result def search_recursively(self, key: str): - """Returns the value associated with key, if key is in self, else None.""" + """Returns the value associated with key, if key is in self, else None. + + The search in a TST works as follows. + We start at the root and we compare its character with the first character of key. + - If they are the same, we follow the middle link of the root node. + - If the first character of key is smaller lexicographically + than the key at the root, then we take the left link or pointer. + We do this because we know that all strings that start with characters + that are smaller lexicographically than key[0] are on its left subtree. + - If the first character of key is greater lexicographically + than the key at the root, we take similarly the right link or pointer. + We keep applying this idea at every node. + Moreover, WHEN THERE'S A MATCH, next time we compare the key + of the next node with the next character of key. + For example, if there's a match between the first node (the root) and key[0], + we follow the middle link, and the next comparison is between + the key of the specific next node and key[1], not key[0]!""" if not isinstance(key, str): raise TypeError("key must be an instance of type str.") if not key: raise ValueError("key must be a string of length >= 1.") - node = self._search_recursively(self.root, key, 0) - return node.value if node else None + + node = self._search_recursively(self._root, key, 0) + + if node is not None: + assert node.value is not None # Postcondition: values should never be None! + return node.value + else: + return None def _search_recursively(self, node: TSTNode, key: str, index: int): """Returns sub-TST corresponding to given key.""" @@ -205,21 +207,105 @@ def _search_recursively(self, node: TSTNode, key: str, index: int): return self._search_recursively(node.left, key, index) elif key[index] > node.key: return self._search_recursively(node.right, key, index) - elif index < len(key) - 1: + elif index < len(key) - 1: # This is a match, but we're not at the last character of key. return self._search_recursively(node.mid, key, index + 1) - else: + else: # This is a match and we're at the last character of key. return node - def search(self, key: str): - """Iterative alternative to `self.search_recursively`.""" + def search_iteratively(self, key: str): + """Iterative alternative to self.search_recursively. + The search starts, as the recursive version, from the root.""" if not isinstance(key, str): raise TypeError("key must be an instance of type str.") if not key: raise ValueError("key must be a string of length >= 1.") - return TST._search(self.root, key) - @staticmethod - def _search(node, key): + node = self._root + + if node is None: + return None + + # Up to the penultimate index (i.e. len(key) - 1) + # because if we reach the penultimate character and it's a match, + # then we follow the mid node (i.e. we end up in what's possibly the last node). + index = 0 + + while index < len(key) - 1: + while node and key[index] != node.key: + if key[index] < node.key: + node = node.left + else: + node = node.right + + if node is None: # Unsuccessful search. + return None + else: + # Arriving here only if exited from the while loop + # because the condition key[i] != node.key was false, + # that is key[index] == node.key, thus we follow the middle link. + node = node.mid + index += 1 + + assert index == len(key) - 1 # postcondition: index indices the last character of key! + + # If node is (still) not None, then we may still need to go left or right, + # and we stop when either we find a node which has the same key as the last character of key, + # or when end up node being equal to None, i.e. the key does not exist in this TST. + while node and key[index] != node.key: + if key[index] < node.key: + node = node.left + else: + node = node.right + + if node is None: # Unsuccessful search. + return None + else: # We exit the previous while loop because key[index] == node.key. + assert node.value is not None # Postcondition: values should never be None! + return node.value + + def contains(self, key: str): + """Returns True if the key is in self, False otherwise.""" + return self.search_recursively(key) is not None + + def delete(self, key: str) -> TSTNode: + """Deletes and returns the value associated with key in this TST. + This operation does not change the structure of this TST, + but only merely makes it "forget" that there's a map with key `key`.""" + + # Preconditions + assert self._n >= 0 and (self._root if self._n > 0 else True) + + if not isinstance(key, str): + raise TypeError("key must be an instance of type str.") + if not key: + raise ValueError("key must be a string of length >= 1.") + + d = self._delete(self._root, key) + + # Postconditions + assert self._n >= 0 and (self._root if self._n > 0 else True) + + return d + + def _delete(self, node: TSTNode, key: str): + """Implementation based on the non-recursive implementation of search_iteratively.""" + + def _delete_fix(u): + while u and not u.has_children(): + if self._is_root(u): + assert u.parent is None + self._root = None + break + if u.is_left_child(): + u.parent.left = None + elif u.is_right_child(): + u.parent.right = None + else: + u.parent.mid = None + p = u.parent + u.parent = None + u = p + if node is None: return None @@ -235,23 +321,22 @@ def _search(node, key): # arriving here only if exited from the while loop # because the condition key[i] != node.key was false node = node.mid - if not node or node.key != key[-1]: return None else: - return node.value - - def contains(self, key: str): - """Returns True if the key is in self, False otherwise.""" - return self.search_recursively(key) is not None + result = node.value + node.value = None + self._n -= 1 + _delete_fix(node) + return result def traverse(self): - return self._traverse(self.root, "") + # Assert preconditions. + return self._traverse(self._root, "") def _traverse(self, node, prefix): - if node is None: + if node is None: # base case return - self._traverse(node.left, prefix) if node.value is not None: print(prefix + node.key, "=>", node.value) @@ -260,12 +345,13 @@ def _traverse(self, node, prefix): def count(self): """Counts the number of strings in self.""" - return self._count(self.root, 0) + c = self._count(self._root, 0) + assert c == self.size() # Post-condition. + return c def _count(self, node, counter): - if node is None: + if node is None: # base case return counter - counter = self._count(node.left, counter) if node.value is not None: counter += 1 diff --git a/tests/ds/test_TST.py b/tests/ds/test_TST.py new file mode 100644 index 00000000..73a9f041 --- /dev/null +++ b/tests/ds/test_TST.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +""" +# Meta info + +Author: Nelson Brochado +Created: 29/01/2017 +Updated: 29/01/2017 + +# Description + +Unit tests for the TST class. + +""" + +import random +import string +import unittest + +from ands.ds.TST import TST + + +class TestTST(unittest.TestCase): + def gen_rand_str(self, n): + """Generates a string of size n of printable characters.""" + return "".join(random.choice(string.printable) for _ in range(n)) + + def test_creation(self): + t = TST() + # state guaranteed at creation time + self.assertEqual(t.count(), 0) + self.assertTrue(t.is_empty()) + self.assertIsNone(t._root) + + def test_insert_key_not_string(self): + t = TST() + self.assertRaises(TypeError, t.insert, 5) + + def test_insert_key_empty_string(self): + t = TST() + self.assertRaises(ValueError, t.insert, "", 2) + + def test_insert_none_value(self): + t = TST() + self.assertRaises(ValueError, t.insert, "key", None) + + def test_insert_one(self): + t = TST() + t.insert("one", 97) + + # using just count() since count calls size() as a post-condition assertion!! + self.assertEqual(t.count(), 1) + self.assertEqual(t.search("one"), 97) + + # Testing the structure of the TST remains as expected. + r = t._root + self.assertIsNone(r.left) + self.assertIsNone(r.right) + self.assertIsNone(r.parent) + self.assertIsNotNone(r.mid) + self.assertEqual(r.key, "o") + self.assertEqual(r.value, None) + + p = r + r = r.mid + self.assertIsNotNone(r) + self.assertIsNone(r.left) + self.assertIsNone(r.right) + self.assertIsNotNone(r.mid) + self.assertIs(r.parent, p) + self.assertEqual(r.key, "n") + self.assertEqual(r.value, None) + + p = r + r = r.mid + self.assertIsNotNone(r) + self.assertIsNone(r.left) + self.assertIsNone(r.right) + self.assertIsNone(r.mid) + self.assertIs(r.parent, p) + self.assertEqual(r.key, "e") + self.assertEqual(r.value, 97) + + def test_insert_some_no_update(self): + """This only tests a permutation of the keys, + but exhaustive testing is also impossible in most of the cases! + This example is based on: https://www.youtube.com/watch?v=CIGyewO7868, min. 7.30""" + t = TST() + + t.insert("she", 2) + self.assertEqual(t.count(), 1) + self.assertEqual(t.search("she"), 2) + + t.insert("sells", 3) + self.assertEqual(t.count(), 2) + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + + t.insert("sea", 5) + self.assertEqual(t.count(), 3) + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + self.assertEqual(t.search("sea"), 5) + + t.insert("shells", 7) + self.assertEqual(t.count(), 4) + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + self.assertEqual(t.search("sea"), 5) + self.assertEqual(t.search("shells"), 7) + + t.insert("by", 11) + self.assertEqual(t.count(), 5) + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + self.assertEqual(t.search("sea"), 5) + self.assertEqual(t.search("shells"), 7) + self.assertEqual(t.search("by"), 11) + + t.insert("the", 13) + self.assertEqual(t.count(), 6) + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + self.assertEqual(t.search("sea"), 5) + self.assertEqual(t.search("shells"), 7) + self.assertEqual(t.search("by"), 11) + self.assertEqual(t.search("the"), 13) + + def test_insert_some_with_update(self): + # Test based on example: This example is based on: https://www.youtube.com/watch?v=CIGyewO7868 + t = TST() + t.insert("she", 2) + t.insert("sells", 3) + t.insert("sea", 5) + t.insert("shells", 7) + t.insert("by", 11) + t.insert("the", 13) + + # Updating value associated with key "sea" + t.insert("sea", 17) + + self.assertEqual(t.count(), 6) # The size of the TST should not have changed! + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + self.assertEqual(t.search("sea"), 17) + self.assertEqual(t.search("shells"), 7) + self.assertEqual(t.search("by"), 11) + self.assertEqual(t.search("the"), 13) + + def test_insert_some_after_update(self): + # Test based on example: This example is based on: https://www.youtube.com/watch?v=CIGyewO7868 + t = TST() + t.insert("she", 2) + t.insert("sells", 3) + t.insert("sea", 5) + t.insert("shells", 7) + t.insert("by", 11) + t.insert("the", 13) + t.insert("sea", 17) + t.insert("shore", 19) + + self.assertEqual(t.count(), 7) # The size of the TST should not have changed! + self.assertEqual(t.search("she"), 2) + self.assertEqual(t.search("sells"), 3) + self.assertEqual(t.search("sea"), 17) + self.assertEqual(t.search("shells"), 7) + self.assertEqual(t.search("by"), 11) + self.assertEqual(t.search("the"), 13) + self.assertEqual(t.search("shore"), 19) + + def test_insert_random_keys(self): + t = TST() + + n = random.randint(10, 100) + + random_pairs = {} + + for i in range(n): + str_size = random.randint(1, 11) + key = self.gen_rand_str(str_size) + value = random.randint(-100, 100) + random_pairs[key] = value + t.insert(key, value) + + self.assertEqual(t.count(), len(random_pairs)) + + for k, v in random_pairs.items(): + self.assertEqual(t.search(k), v) + \ No newline at end of file diff --git a/tests/ds/test_TSTNode.py b/tests/ds/test_TSTNode.py new file mode 100644 index 00000000..c76decf7 --- /dev/null +++ b/tests/ds/test_TSTNode.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +""" +# Meta info + +Author: Nelson Brochado +Created: 29/01/2017 +Updated: 29/01/2017 + +# Description + +Testing the TSTNode class inside TST.py. + +""" + +import unittest + +from ands.ds.TST import TSTNode + + +class TestTSTNode(unittest.TestCase): + def test_create_key_not_string(self): + self.assertRaises(TypeError, TSTNode, 13) + + def test_create_key_empty_string(self): + self.assertRaises(ValueError, TSTNode, "") + + def test_create_acceptable_key(self): + self.assertIsInstance(TSTNode("unit testing"), TSTNode) + + def test_create_default(self): + u = TSTNode("default values") + self.assertEqual(u.key, "default values") + self.assertIsNone(u.value) + self.assertIsNone(u.parent) + self.assertIsNone(u.mid) + self.assertIsNone(u.left) + self.assertIsNone(u.right) + + def test_create_custom(self): + p = TSTNode("parent") + left = TSTNode("left") + mid = TSTNode("mid") + right = TSTNode("right") + u = TSTNode("u", 11, p, left, mid, right) + self.assertEqual(u.value, 11) + self.assertIs(u.parent, p) + self.assertIs(u.left, left) + self.assertIs(u.mid, mid) + self.assertIs(u.right, right) + + def test_is_left_child_no_parent(self): + u = TSTNode("u") + self.assertRaises(AttributeError, u.is_left_child) + + def test_is_left_child_false(self): + p = TSTNode("p") + u = TSTNode("u", 3, p) + self.assertFalse(u.is_left_child()) + + def test_is_left_child_true(self): + p = TSTNode("p") + u = TSTNode("u", 3, p) + p.left = u + self.assertTrue(u.is_left_child()) + + def test_is_right_child_no_parent(self): + u = TSTNode("u") + self.assertRaises(AttributeError, u.is_right_child) + + def test_is_right_child_false(self): + p = TSTNode("p") + u = TSTNode("u", 3, p) + self.assertFalse(u.is_right_child()) + + def test_is_right_child_true(self): + p = TSTNode("p") + u = TSTNode("u", 3, p) + p.right = u + self.assertTrue(u.is_right_child()) + + def test_is_mid_child_no_parent(self): + u = TSTNode("u") + self.assertRaises(AttributeError, u.is_mid_child) + + def test_is_mid_child_false(self): + p = TSTNode("p") + u = TSTNode("u", 3, p) + self.assertFalse(u.is_mid_child()) + + def test_is_mid_child_true(self): + p = TSTNode("p") + u = TSTNode("u", 3, p) + p.mid = u + self.assertTrue(u.is_mid_child()) + + def test_has_children_0(self): + u = TSTNode("u") + self.assertFalse(u.has_children()) + + def test_has_children_1(self): + u = TSTNode("u", right=TSTNode("right")) + self.assertTrue(u.has_children()) + + def test_has_children_2(self): + u = TSTNode("u", mid=TSTNode("mid"), left=TSTNode("left")) + self.assertTrue(u.has_children()) + + def test_has_children_3(self): + u = TSTNode("u", mid=TSTNode("mid"), left=TSTNode("left"), right=TSTNode("right")) + self.assertTrue(u.has_children())