From 2bb1dc831f9ace1808ae69a83373cf4fb9977dcc Mon Sep 17 00:00:00 2001 From: nelson-brochado Date: Wed, 8 Mar 2017 16:05:14 +0100 Subject: [PATCH 1/2] Fixed issue by specifying public interface - Improved also robustness of data structure --- .../{DSForests.py => DisjointSetsForest.py} | 171 ++++++++++----- tests/ds/test_DSForests.py | 195 ------------------ tests/ds/test_DisjointSetsForest.py | 162 +++++++++++++++ 3 files changed, 279 insertions(+), 249 deletions(-) rename ands/ds/{DSForests.py => DisjointSetsForest.py} (59%) delete mode 100755 tests/ds/test_DSForests.py create mode 100755 tests/ds/test_DisjointSetsForest.py diff --git a/ands/ds/DSForests.py b/ands/ds/DisjointSetsForest.py similarity index 59% rename from ands/ds/DSForests.py rename to ands/ds/DisjointSetsForest.py index ac18dc41..3a76f17c 100644 --- a/ands/ds/DSForests.py +++ b/ands/ds/DisjointSetsForest.py @@ -8,7 +8,7 @@ Created: 21/02/2016 -Updated: 03/01/2016 +Updated: 08/03/2017 # Description @@ -23,7 +23,7 @@ 3. union(x, y): unions the sets where x and y are (if they do not belong already to the same set). -`DSForests` uses two heuristics that improve the performance with respect to a naive implementation. +`DisjointSetsForest` uses two heuristics that improve the performance with respect to a naive implementation. 1. Union by rank: attach the smaller tree to the root of the larger tree @@ -31,11 +31,24 @@ These two techniques complement each other: applied together, the amortized time per operation is only O( α (n)). +## Public interface + +- make_set(x): add x to the DisjointSetsForest +- find(x): returns the root or representative of x +- union(x, y): unites the sets where x and y reside +- print_set(x): prints the set where x resides to the standard output +- contains(x): check if x is in the DisjointSetsForest +- size: returns the number of elements add to the data structure (using make_set) +- sets: returns the number of disjoint sets currently in the data structure + +All other methods or fields are considered private and were NOT intended to be used by clients! + # TODO - Deletion operation (OPTIONAL, since it's usually not part of the interface of a disjoint-set data structure) - Pretty-print(x), for some element x in the disjoint-set data structure. - Implement the version explained [here](http://algs4.cs.princeton.edu/15uf/) +- Add complexity analysis for print_set # References @@ -45,12 +58,19 @@ - [http://stackoverflow.com/a/22945492/3924118](http://stackoverflow.com/a/22945492/3924118) - [http://stackoverflow.com/q/23055236/3924118](http://stackoverflow.com/q/23055236/3924118) - [https://www.cs.usfca.edu/~galles/JavascriptVisual/DisjointSets.html](https://www.cs.usfca.edu/~galles/JavascriptVisual/DisjointSets.html) -to visualize how disjoint-sets work. +to visualize how disjoint-_sets work. """ +__all__ = ["DisjointSetsForest"] + + +class DSFNode: + """DSFNode is the node used internally by `DisjointSetsForest` + to represent nodes in the disjoint trees (or sets). + + Clients should NOT need to use this class.""" -class DSNode: def __init__(self, x, rank=0): # This attribute can contain any hashable value. self.value = x @@ -58,12 +78,12 @@ def __init__(self, x, rank=0): # The rank of node x only changes in one specific union(x, y) case: # when x is the representative of its set # and the representative of the set where y resides has the same rank as x. - # In the DSForests implementation below, if a situation as just described occurs, + # In the DisjointSetsForest implementation below, if a situation as just described occurs, # then the x.rank is increased by 1. self.rank = rank # Reference to the representative of the set where this node resides - # Since DSForests actually implements a tree, + # Since DisjointSetsForest actually implements a tree, # self.parent is also the root of that tree. self.parent = self @@ -73,9 +93,9 @@ def __init__(self, x, rank=0): self.next = self def is_root(self): - """A DSNode x is a root or representative of a set + """A DSFNode x is a root or representative of a set whenever its parent pointer points to himself. - Of course this is only true if x is already in a DSForests object.""" + Of course this is only true if x is already in a DisjointSetsForest object.""" return self.parent == self def __str__(self): @@ -88,18 +108,48 @@ def __repr__(self): return "(value: {0}, rank: {1}, parent: {2})".format(self.value, self.rank, self.parent) -class DSForests: - def __init__(self): - # keeps tracks of the DSNodes in this disjoint-set forests. - self.sets = {} +class DisjointSetsForest: + """Disjoint-set forests is a collection of disjoint sets. + + Two sets A and B are disjoint if they have no element in common, + or, in other words, their intersection is the empty set. + + It's called forest because the way the disjoint set data structure is implemented, + that is it's implemented by representing a forest of trees. + A disjoint-set data structure can be implemented differently. - def make_set(self, x) -> DSNode: - """Creates a set object for `x`.""" - assert x not in self.sets - self.sets[x] = DSNode(x) - return self.sets[x] + This data structure does not allow duplicates.""" - def find(self, x: DSNode) -> DSNode: + def __init__(self): + # keeps tracks of the DSNodes in this disjoint-set forests. + self._sets = {} + self._n = 0 + + def make_set(self, x: object) -> None: + """Creates a set object for `x`. + + If `x` is already in self, then `ValueError` is raised.""" + if x in self._sets: + raise LookupError("x is already in self") + self._sets[x] = DSFNode(x) + self._n += 1 + assert 0 <= self.sets <= self.size + + @property + def size(self) -> int: + """Returns the number of elements in this DisjointSetsForest.""" + return len(self._sets) + + @property + def sets(self) -> int: + """Returns the number of disjoint sets in `self`.""" + return self._n + + def contains(self, x: object) -> bool: + """Returns True if x is in self, False otherwise.""" + return x in self._sets + + def _find(self, x: DSFNode) -> DSFNode: """Finds and returns the representative (or root) of `x`. It follows parent nodes until it reaches the root of the tree (set) to which `x` belongs. @@ -128,14 +178,15 @@ def find(self, x: DSNode) -> DSNode: α (n) is less than 5 for all remotely practical values of n. Thus, the amortized running time per operation is effectively a small constant.""" - assert x + assert x is not None if x.parent != x: - x.parent = self.find(x.parent) + x.parent = self._find(x.parent) return x.parent - def find_iteratively(self, x: DSNode) -> DSNode: + @staticmethod + def _find_iteratively(x: DSFNode) -> DSFNode: """This version is just an iterative alternative to the find method.""" - assert x + assert x is not None y = x @@ -143,9 +194,6 @@ def find_iteratively(self, x: DSNode) -> DSNode: while y != y.parent: y = y.parent - # post-condition - assert y == self.find(x) - # now y is the representative of x, # but we also want to do a path compression, # i.e. connect all nodes in the path from x to y directly to y. @@ -156,24 +204,35 @@ def find_iteratively(self, x: DSNode) -> DSNode: return y - def union(self, x, y) -> DSNode: - """"Union by rank" 2 trees (sets) into one by attaching + def find(self, x: object) -> object: + """Finds and returns the representative (or root) of `x`. + + Raises a `LookupError` if `x` does not belong to this `DisjointSetsForest`. + + **Time Complexity:** O*(α (n)).""" + if x not in self._sets: + raise LookupError("x is not in self") + x_root = self._find(self._sets[x]).value + assert x_root == DisjointSetsForest._find_iteratively(self._sets[x]).value + return x_root + + def union(self, x: object, y: object) -> object: + """"Union by rank" 2 sets into one by attaching the root of one to the root of the other. - Returns the `DSNode` object representing the representative of + + Returns the root object representing the representative of the set resulted from the union of the sets containing `x` and `y`. + It returns None if `x` and `y` are already in the same set. "Union by rank" consists of attaching the smaller tree to the root of the larger tree. Since it is the depth of the tree that affects the running time, - the tree with smaller depth gets added - under the root of the deeper tree, + the tree with smaller depth gets added under the root of the deeper tree, which only increases the depth if the depths were equal. - In the context of this algorithm, - the term _rank_ is used instead of depth, - since it stops being equal to the depth - if path compression is also used. + In the context of this algorithm, the term _rank_ is used instead of depth, + since it stops being equal to the depth if path compression is also used. The rank is an upper bound on the height of the node. @@ -188,48 +247,52 @@ def union(self, x, y) -> DSNode: α (n) is less than 5 for all remotely practical values of n. Thus, the amortized running time per operation is effectively a small constant.""" - assert x in self.sets and y in self.sets + if x not in self._sets: + raise LookupError("x is not in self") + if y not in self._sets: + raise LookupError("y is not in self") - # Since the original values x and y are not used afterwards, - # and what we actually need in two places of this algorithm are the corresponding DSNodes - # we set x and y to be respectively their DSNode counter-part. - x = self.sets[x] - y = self.sets[y] + x_node = self._sets[x] + y_node = self._sets[y] - x_root = self.find(x) - y_root = self.find(y) + x_root = self._find(x_node) + y_root = self._find(y_node) # x and y are already joined. if x_root == y_root: return - # Exchanging the next pointers of x and y. + # Exchanging the next pointers of x_node and y_node. # This is needed in order to print the elements of a set in O(m) time, - # where m is the size of the same set. + # where m is the size of the same set, in self.print_set. # Check here: http://stackoverflow.com/a/22945492/3924118. - x.next, y.next = y.next, x.next + x_node.next, y_node.next = y_node.next, x_node.next + + self._n -= 1 + assert 0 <= self.sets <= self.size # x and y are not in the same set, therefore we merge them. if x_root.rank < y_root.rank: x_root.parent = y_root - return y_root + return y_root.value else: y_root.parent = x_root if x_root.rank == y_root.rank: x_root.rank += 1 - return x_root + return x_root.value - def print_set(self, x) -> None: - assert x in self.sets + def print_set(self, x: object) -> None: + if x not in self._sets: + raise LookupError("x is not in self") - x = self.sets[x] - y = x + x_node = self._sets[x] + y = x_node - print("{0} -> {{{1}".format(x, x), end="") - while y.next != x: + print("{0} -> {{{1}".format(x_node, x_node), end="") + while y.next != x_node: print(",", y.next, end="") y = y.next print("}") def __str__(self): - return str(self.sets) + return str(self._sets) diff --git a/tests/ds/test_DSForests.py b/tests/ds/test_DSForests.py deleted file mode 100755 index ba9b226a..00000000 --- a/tests/ds/test_DSForests.py +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -# Meta info - -Author: Nelson Brochado - -Created: 22/02/16 - -Updated: 03/01/17 - -# Description - -Tests for the DSForests class and associated classes. - -# Note - -Since find_iteratively internally asserts that its result is equal to find, -in these tests I'm using find_iteratively -to not need to write tests for both find and find_iteratively. -""" - -import unittest -from random import randint - -from ands.ds.DSForests import DSForests, DSNode - - -class TestDSNode(unittest.TestCase): - def test_creation(self): - n = DSNode(7) - self.assertTrue(n.is_root()) - self.assertEqual(n.parent, n) - self.assertEqual(n.next, n) - self.assertEqual(n.rank, 0) - self.assertEqual(n.value, 7) - - def test_creation_custom_rank(self): - n = DSNode(9, 101) - self.assertEqual(n.rank, 101) - - def test_repr(self): - n = DSNode(31) - self.assertEqual("(value: 31, rank: 0, parent: self)", repr(n)) - n.parent = "null" - self.assertEqual("(value: 31, rank: 0, parent: null)", repr(n)) - - def test_str(self): - n = DSNode(39) - self.assertEqual("39", str(n)) - - -class TestDSForests(unittest.TestCase): - def test_empty_creation(self): - DSForests() - - def test_make_set_one(self): - ds = DSForests() - s = ds.make_set(3) - self.assertEqual(len(ds.sets), 1) - self.assertEqual(s.value, 3) - self.assertEqual(s.rank, 0) - self.assertEqual(s.parent, s) - self.assertEqual(s.next, s) - - def test_make_set_many(self): - ds = DSForests() - - n = randint(5, 11) - for elem in range(n): - a = ds.make_set(elem) - self.assertEqual(a.value, elem) - self.assertEqual(a.rank, 0) - self.assertEqual(a.parent, a) - self.assertEqual(a.next, a) - - self.assertEqual(len(ds.sets), n) - - def test_find_one(self): - ds = DSForests() - a = ds.make_set(12) - self.assertEqual(ds.find_iteratively(a), a) - - def test_find_two(self): - ds = DSForests() - a = ds.make_set(-11) - b = ds.make_set(13) - self.assertEqual(ds.find_iteratively(a), a) - self.assertEqual(ds.find_iteratively(b), b) - - def test_union_same_element(self): - ds = DSForests() - - ds.make_set(51) - u = ds.union(51, 51) - - self.assertIsNone(u) - - def test_union_same_set(self): - ds = DSForests() - - ds.make_set(17) - ds.make_set(19) - ds.union(17, 19) - u = ds.union(17, 19) - - self.assertIsNone(u) - - def test_union_else(self): - # it also tests the if statement inside the else - ds = DSForests() - a = ds.make_set(19) - b = ds.make_set(23) - u = ds.union(19, 23) - - self.assertEqual(u, a) - - self.assertEqual(a.next, b) - self.assertEqual(b.next, a) - - self.assertEqual(ds.find_iteratively(a), a) - self.assertEqual(a.value, 19) - self.assertEqual(a.rank, 1) - self.assertEqual(a.parent, a) - - self.assertEqual(ds.find_iteratively(b), a) - self.assertEqual(b.value, 23) - self.assertEqual(b.rank, 0) - self.assertEqual(b.parent, a) - - def test_union_if(self): - ds = DSForests() - - a = ds.make_set(12) - b = ds.make_set(14) - ds.union(12, 14) - c = ds.make_set(28) - u2 = ds.union(28, 12) - - self.assertEqual(a.next, c) - self.assertEqual(b.next, a) - self.assertEqual(c.next, b) - - self.assertEqual(u2, a) - - self.assertEqual(ds.find_iteratively(c), a) - self.assertEqual(c.value, 28) - self.assertEqual(c.rank, 0) - self.assertEqual(c.parent, a) - self.assertEqual(a.value, 12) - self.assertEqual(a.rank, 1) - self.assertEqual(a.parent, a) - - def test_print_set(self): - # This is not really a test with assertions, but just a visual one. - # This example was actually from an exercise of an assignment - # that I had during the course "Algorithms and Data Structures 2" at USI. - ds = DSForests() - - print() - - for i in range(1, 17): - ds.make_set(i) - # ds.print_set(i) - - # print("--------------------------------------") - - for i in range(1, 16, 2): - ds.union(i, i + 1) - # ds.print_set(i) - - # print("--------------------------------------") - - for i in range(2, 15, 4): - ds.union(i, i + 2) - # ds.print_set(i) - - # print("--------------------------------------") - - ds.union(4, 7) - # ds.print_set(5) - # ds.print_set(11) - # ds.print_set(16) - - # print("--------------------------------------") - - ds.union(10, 16) - # ds.print_set(8) - # ds.print_set(13) - - # print("--------------------------------------") - - ds.union(8, 13) - ds.print_set(3) diff --git a/tests/ds/test_DisjointSetsForest.py b/tests/ds/test_DisjointSetsForest.py new file mode 100755 index 00000000..5430f68c --- /dev/null +++ b/tests/ds/test_DisjointSetsForest.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +# Meta info + +Author: Nelson Brochado + +Created: 22/02/2016 + +Updated: 08/03/2017 + +# Description + +Tests for the DisjointSetsForest class and associated classes. +""" + +import unittest +from random import randint, choice + +from ands.ds.DisjointSetsForest import DisjointSetsForest, DSFNode + + +class TestDSFNode(unittest.TestCase): + def test_creation(self): + n = DSFNode(7) + self.assertTrue(n.is_root()) + self.assertEqual(n.parent, n) + self.assertEqual(n.next, n) + self.assertEqual(n.rank, 0) + self.assertEqual(n.value, 7) + + def test_creation_custom_rank(self): + n = DSFNode(9, 101) + self.assertEqual(n.rank, 101) + + def test_repr(self): + n = DSFNode(31) + self.assertEqual("(value: 31, rank: 0, parent: self)", repr(n)) + n.parent = "null" + self.assertEqual("(value: 31, rank: 0, parent: null)", repr(n)) + + def test_str(self): + n = DSFNode(39) + self.assertEqual("39", str(n)) + + +class TestDSForests(unittest.TestCase): + def setUp(self): + self.d = DisjointSetsForest() + + def test_make_set_elem_already_exits(self): + self.d.make_set(3) + self.assertRaises(LookupError, self.d.make_set, 3) + + def test_make_set_one(self): + self.assertIsNone(self.d.make_set(3)) + self.assertEqual(self.d.size, 1) + self.assertEqual(self.d.sets, 1) + self.assertEqual(self.d.find(3), 3) + + def test_make_set_many(self): + n = randint(5, 11) + + for elem in range(n): + self.d.make_set(elem) + self.assertEqual(self.d.find(elem), elem) + + self.assertEqual(self.d.size, n) + self.assertEqual(self.d.sets, n) + + def test_contains(self): + self.assertFalse(self.d.contains(3)) + self.d.make_set(3) + self.assertTrue(self.d.contains(3)) + + def test_find_one_when_does_not_exist(self): + self.assertRaises(LookupError, self.d.find, 7) + + def test_find_one(self): + self.d.make_set(5) + self.assertEqual(self.d.find(5), 5) + + def test_find_two(self): + self.d.make_set(-11) + self.d.make_set(13) + self.assertEqual(self.d.find(-11), -11) + self.assertEqual(self.d.find(13), 13) + + def test_union_elements_do_not_exist(self): + self.d.make_set(7) + self.assertRaises(LookupError, self.d.union, 5, 7) + self.assertRaises(LookupError, self.d.union, 7, 5) + self.assertRaises(LookupError, self.d.union, 11, 5) + + def test_union_same_element(self): + self.d.make_set(51) + self.assertIsNone(self.d.union(51, 51)) + self.assertEqual(self.d.size, 1) + self.assertEqual(self.d.sets, 1) + + def test_union(self): + self.d.make_set(51) + self.d.make_set(53) + self.assertEqual(self.d.sets, 2) + self.assertIsNotNone(self.d.union(51, 53)) + self.assertEqual(self.d.size, 2) + self.assertEqual(self.d.sets, 1) + + def test_union_when_already_in_same_set(self): + self.d.make_set(17) + self.d.make_set(19) + self.d.union(17, 19) + self.assertIsNone(self.d.union(17, 19)) + self.assertEqual(self.d.size, 2) + self.assertEqual(self.d.sets, 1) + + def test_sequence_of_make_set_find_and_union(self): + + n = randint(43, 101) + ls = [] + + for _ in range(n): + + x = randint(-33, 77) + while self.d.contains(x): + x = randint(-33, 77) + ls.append(x) + + self.d.make_set(x) + + # While there's more than one set do a few unions + while self.d.sets > 1: + x = choice(ls) + y = choice(ls) + self.d.union(x, y) + + # Assert that all elements are still in the ds + for elem in ls: + self.assertIsNotNone(self.d.find(elem)) + self.assertTrue(self.d.contains(elem)) + + self.assertEqual(self.d.size, n) + + def test_print_set_when_elem_not_exist(self): + self.assertRaises(LookupError, self.d.print_set, 3) + + def test_print_set(self): + for i in range(1, 17): + self.d.make_set(i) + + for i in range(1, 16, 2): + self.d.union(i, i + 1) + + for i in range(2, 15, 4): + self.d.union(i, i + 2) + + self.d.union(4, 7) + self.d.union(10, 16) + self.d.union(8, 13) + + self.d.print_set(3) From 55d4ce253fa4e05cf58e5c8e9ff050867dd7f0d9 Mon Sep 17 00:00:00 2001 From: nelson-brochado Date: Wed, 8 Mar 2017 16:18:09 +0100 Subject: [PATCH 2/2] Created an abstract class from which DisjointSetsForest derive since DS can be implemented in different ways --- ands/ds/DisjointSets.py | 42 +++++++++++++++++++++++++++++++++++ ands/ds/DisjointSetsForest.py | 14 +++++++----- 2 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 ands/ds/DisjointSets.py diff --git a/ands/ds/DisjointSets.py b/ands/ds/DisjointSets.py new file mode 100644 index 00000000..b74dccd7 --- /dev/null +++ b/ands/ds/DisjointSets.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +""" +# Meta info + +Author: Nelson Brochado + +Created: 08/03/2017 + +Updated: 08/03/2017 + +# Description + +Module which contains the abstract class from which DisjointSetsForest derives. + +The reason to have this abstract class is because +a disjoint-sets data structure can possibly be implemented in different ways. +""" + +from abc import ABCMeta, abstractmethod + +__all__ = ["DisjointSets"] + + +class DisjointSets(metaclass=ABCMeta): + """Abstract class from which DisjointSetsForest derives. + + A DisjointSets data structure is sometimes also called DisjointSet, UnionFind or MergeSet""" + + @abstractmethod + def make_set(self, x: object) -> None: + pass + + @abstractmethod + def find(self, x: object) -> object: + pass + + @abstractmethod + def union(self, x: object, y: object) -> object: + pass diff --git a/ands/ds/DisjointSetsForest.py b/ands/ds/DisjointSetsForest.py index 3a76f17c..ff393418 100644 --- a/ands/ds/DisjointSetsForest.py +++ b/ands/ds/DisjointSetsForest.py @@ -62,6 +62,8 @@ """ +from ands.ds.DisjointSets import DisjointSets + __all__ = ["DisjointSetsForest"] @@ -108,7 +110,7 @@ def __repr__(self): return "(value: {0}, rank: {1}, parent: {2})".format(self.value, self.rank, self.parent) -class DisjointSetsForest: +class DisjointSetsForest(DisjointSets): """Disjoint-set forests is a collection of disjoint sets. Two sets A and B are disjoint if they have no element in common, @@ -129,7 +131,7 @@ def make_set(self, x: object) -> None: """Creates a set object for `x`. If `x` is already in self, then `ValueError` is raised.""" - if x in self._sets: + if self.contains(x): raise LookupError("x is already in self") self._sets[x] = DSFNode(x) self._n += 1 @@ -210,7 +212,7 @@ def find(self, x: object) -> object: Raises a `LookupError` if `x` does not belong to this `DisjointSetsForest`. **Time Complexity:** O*(α (n)).""" - if x not in self._sets: + if not self.contains(x): raise LookupError("x is not in self") x_root = self._find(self._sets[x]).value assert x_root == DisjointSetsForest._find_iteratively(self._sets[x]).value @@ -247,9 +249,9 @@ def union(self, x: object, y: object) -> object: α (n) is less than 5 for all remotely practical values of n. Thus, the amortized running time per operation is effectively a small constant.""" - if x not in self._sets: + if not self.contains(x): raise LookupError("x is not in self") - if y not in self._sets: + if not self.contains(y): raise LookupError("y is not in self") x_node = self._sets[x] @@ -282,7 +284,7 @@ def union(self, x: object, y: object) -> object: return x_root.value def print_set(self, x: object) -> None: - if x not in self._sets: + if not self.contains(x): raise LookupError("x is not in self") x_node = self._sets[x]