Update docs for main module and rm random.py

Regarding the deletion of the random.py file: this is a fix for issue #364. The problem that the inclusion of the random.py file was supposed to address was the issue of reproducibility of Markov chain, and the idea was to set the global random seed to 2018 at any point where we would like to import the random module internally within the gerrychain package. However, this approach also causes the trees that are generated by the tree.py file to be fixed if the user does not set the random seed after the import of the gerrychain package. So an import pattern of import random random.seed(0) import gerrychain print(random.random()) print(random.random()) will output 0.5331579307274593 0.02768951210200299 as opposed to the expected 0.8444218515250481 0.7579544029403025 will actually force the random seed to be 2018 rather than the expected 0. This can often cause issues in jupyter notebooks where the user is not aware that the random seed has been forcibly set to 2018 after the import of gerrychain. Instead, it is best to allow to user to set the random seed themselves, and to not forcibly set the random seed within the gerrychain package since that can affect the execution of other packages and can cause the chain to hang when the 2018 seed does not produce a valid tree. This issue does not appear if we remove the random.py file and instead use the random module from the standard library within the tree.py and accept.py files. This is because of how python handles successive imports of the same module. Consider the following snipit: import random random.seed(0) import random print(random.random()) print(random.random()) This will output 0.8444218515250481 0.7579544029403025 as expected. This is because the random module is only imported once and then places its name in the internal list of imported modules. Subsequent imports of the random module within the same python session will not will simply retrieve the module from the list and will not re-execute the code contained within the module. Thus, the random seed is only set once and not reset when the random module is imported again. In terms of reproducibility, this means that the user will be required to set the random seed themselves if they want to reproduce the same chain, but this is a relatively standard expectation, and will be required when we move the package over to a rust backend in the future.
mggg · Jan 12, 2024 · dff63b5 · cdonnay · Jan 12, 2024 · cdonnay
1 parent aa02ee4
commit dff63b5
Show file tree

Hide file tree

Showing 16 changed files with 436 additions and 145 deletions.
diff --git a/.gitignore b/.gitignore
@@ -63,7 +63,7 @@ target/
 
 # pyenv python configuration file
 .python-version
-
+.venv
 junit.xml
 
 # crapple

diff --git a/docs/conf.py b/docs/conf.py
@@ -80,7 +80,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.

diff --git a/gerrychain/accept.py b/gerrychain/accept.py
@@ -1,4 +1,13 @@
-from .random import random
+"""
+This module provides the main acceptance function used in ReCom Markov chains.
+
+Dependencies:
+- random: For random number generation for probabilistic acceptance.
+
+Last Updated: 11 Jan 2024
+"""
+
+import random
 from gerrychain.partition import Partition
 
 
@@ -7,12 +16,15 @@ def always_accept(partition: Partition) -> bool:
 
 
 def cut_edge_accept(partition: Partition) -> bool:
-    """Always accepts the flip if the number of cut_edges increases.
+    """
+    Always accepts the flip if the number of cut_edges increases.
     Otherwise, uses the Metropolis criterion to decide.
 
     :param partition: The current partition to accept a flip from.
-    :return: True if accepted, False to remain in place
+    :type partition: Partition
 
+    :return: True if accepted, False to remain in place
+    :rtype: bool
     """
     bound = 1.0
 

diff --git a/gerrychain/chain.py b/gerrychain/chain.py
@@ -1,3 +1,26 @@
+"""
+This module provides the MarkovChain class, which is designed to facilitate the creation
+and iteration of Markov chains in the context of political redistricting and gerrymandering
+analysis. It allows for the exploration of different districting plans based on specified
+constraints and acceptance criteria.
+
+Key Components:
+- MarkovChain: The main class used for creating and iterating over Markov chain states.
+- Validator: A helper class for validating proposed states in the Markov chain.
+    see :class:`~gerrychain.constraints.Validator` for more details.
+
+Usage:
+The primary use of this module is to create an instance of MarkovChain with appropriate
+parameters like proposal function, constraints, acceptance function, and initial state,
+and then to iterate through the states of the Markov chain, yielding a new proposal
+at each step.
+
+Dependencies:
+- typing: Used for type hints.
+
+Last Updated: 11 Jan 2024
+"""
+
 from .constraints import Validator
 from typing import Union, Iterable, Callable, Optional
 
@@ -7,8 +30,11 @@
 
 class MarkovChain:
     """
-    MarkovChain is an iterator that allows the user to iterate over the states
-    of a Markov chain run.
+    MarkovChain is a class that creates an iterator for iterating over the states
+    of a Markov chain run in a gerrymandering analysis context.
+
+    It allows for the generation of a sequence of partitions (states) of a political
+    districting plans, where each partition represents a possible state in the Markov chain.
 
     Example usage:
 
@@ -30,15 +56,23 @@ def __init__(
     ) -> None:
         """
         :param proposal: Function proposing the next state from the current state.
+        :type proposal: Callable
         :param constraints: A function with signature ``Partition -> bool`` determining whether
             the proposed next state is valid (passes all binary constraints). Usually
             this is a :class:`~gerrychain.constraints.Validator` class instance.
+        :type constraints: Union[Iterable[Callable], Validator, Iterable[Bounds], Callable]
         :param accept: Function accepting or rejecting the proposed state. In the most basic
             use case, this always returns ``True``. But if the user wanted to use a
             Metropolis-Hastings acceptance rule, this is where you would implement it.
+        :type accept: Callable
         :param initial_state: Initial :class:`gerrychain.partition.Partition` class.
+        :type initial_state: Optional[Partition]
         :param total_steps: Number of steps to run.
+        :type total_steps: int
 
+        :return: None
+
+        :raises ValueError: If the initial_state is not valid according to the constraints.
         """
         if callable(constraints):
             is_valid = Validator([constraints])
@@ -65,11 +99,34 @@ def __init__(
         self.state = initial_state
 
     def __iter__(self) -> 'MarkovChain':
+        """
+        Resets the Markov chain iterator.
+
+        This method is called when an iterator is required for a container. It sets the
+        counter to 0 and resets the state to the initial state.
+
+        :return: Returns itself as an iterator object.
+        :rtype: MarkovChain
+        """
         self.counter = 0
         self.state = self.initial_state
         return self
 
     def __next__(self) -> Optional[Partition]:
+        """
+        Advances the Markov chain to the next state.
+
+        This method is called to get the next item in the iteration.
+        It proposes the next state and moves to t it if that state is
+        valid according to the constraints and if accepted by the
+        acceptance function. If the total number of steps has been
+        reached, it raises a StopIteration exception.
+
+        :return: The next state of the Markov chain.
+        :rtype: Optional[Partition]
+
+        :raises StopIteration: If the total number of steps has been reached.
+        """
         if self.counter == 0:
             self.counter += 1
             return self.state
@@ -88,12 +145,26 @@ def __next__(self) -> Optional[Partition]:
         raise StopIteration
 
     def __len__(self) -> int:
+        """
+        Returns the total number of steps in the Markov chain.
+
+        :return: The total number of steps in the Markov chain.
+        :rtype: int
+        """
         return self.total_steps
 
     def __repr__(self) -> str:
         return "<MarkovChain [{} steps]>".format(len(self))
 
     def with_progress_bar(self):
+        """
+        Wraps the Markov chain in a tqdm progress bar.
+
+        Useful for long-running Markov chains where you want to keep track
+        of the progress. Requires the `tqdm` package to be installed.
+
+        :return: A tqdm-wrapped Markov chain.
+        """
         from tqdm.auto import tqdm
 
         return tqdm(self)
diff --git a/gerrychain/constraints/contiguity.py b/gerrychain/constraints/contiguity.py
@@ -3,7 +3,7 @@
 
 import networkx as nx
 
-from ..random import random
+import random
 from .bounds import SelfConfiguringLowerBound
 
 

diff --git a/gerrychain/grid.py b/gerrychain/grid.py
@@ -1,7 +1,18 @@
-import math
+"""
+This module provides a Grid class used for creating and manipulating grid partitions.
+It's part of the GerryChain suite, designed to facilitate experiments with redistricting
+plans without the need for extensive data processing. This module relies on NetworkX for
+graph operations and integrates with GerryChain's Partition class.
+
+Dependencies:
+- math: For math.floor() function.
+- networkx: For graph operations with using the graph structure in
+    :class:`~gerrychain.graph.Graph`.
+- typing: Used for type hints.
+"""
 
+import math
 import networkx
-
 from gerrychain.partition import Partition
 from gerrychain.graph import Graph
 from gerrychain.updaters import (
@@ -14,8 +25,7 @@
     perimeter,
 )
 from gerrychain.metrics import polsby_popper
-
-from typing import Callable, Dict, Optional, Tuple
+from typing import Callable, Dict, Optional, Tuple, Any
 
 
 class Grid(Partition):
@@ -54,15 +64,35 @@ def __init__(
         flips: Optional[Dict[Tuple[int, int], int]] = None,
     ) -> None:
         """
-        :param dimensions: tuple (m,n) of the desired dimensions of the grid.
-        :param with_diagonals: (optional, defaults to False) whether to include diagonals
-            as edges of the graph (i.e., whether to use 'queen' adjacency rather than
-            'rook' adjacency).
-        :param assignment: (optional) dict matching nodes to their districts. If not
-            provided, partitions the grid into 4 quarters of roughly equal size.
-        :param updaters: (optional) dict matching names of attributes of the Partition
-            to functions that compute their values. If not provided, the Grid
-            configures the cut_edges updater for convenience.
+        If the updaters are not specified, the default updaters are used, which are as follows::
+
+            default_updaters = {
+                "cut_edges": cut_edges,
+                "population": Tally("population"),
+                "perimeter": perimeter,
+                "exterior_boundaries": exterior_boundaries,
+                "interior_boundaries": interior_boundaries,
+                "boundary_nodes": boundary_nodes,
+                "area": Tally("area", alias="area"),
+                "polsby_popper": polsby_popper,
+                "cut_edges_by_part": cut_edges_by_part,
+            }
+
+
+        :param dimensions: The grid dimensions (rows, columns), defaults to None.
+        :type dimensions: Tuple[int, int], optional
+        :param with_diagonals: If True, includes diagonal connections, defaults to False.
+        :type with_diagonals: bool, optional
+        :param assignment: Node-to-district assignments, defaults to None.
+        :type assignment: Dict, optional
+        :param updaters: Custom updater functions, defaults to None.
+        :type updaters: Dict[str, Callable], optional
+        :param parent: Parent Grid object for inheritance, defaults to None.
+        :type parent: Grid, optional
+        :param flips: Node flips for partition changes, defaults to None.
+        :type flips: Dict[Tuple[int, int], int], optional
+
+        :raises Exception: If neither dimensions nor parent is provided.
         """
         if dimensions:
             self.dimensions = dimensions
@@ -100,12 +130,32 @@ def as_list_of_lists(self):
         Returns the grid as a list of lists (like a matrix), where the (i,j)th
         entry is the assigned district of the node in position (i,j) on the
         grid.
+
+        :return: List of lists representing the grid.
+        :rtype: List[List[int]]
         """
         m, n = self.dimensions
         return [[self.assignment.mapping[(i, j)] for i in range(m)] for j in range(n)]
 
 
-def create_grid_graph(dimensions: Tuple[int, int], with_diagonals: bool) -> Graph:
+def create_grid_graph(
+    dimensions: Tuple[int, int],
+    with_diagonals: bool
+) -> Graph:
+    """
+    Creates a grid graph with the specified dimensions.
+    Optionally includes diagonal connections between nodes.
+
+    :param dimensions: The grid dimensions (rows, columns).
+    :type dimensions: Tuple[int, int]
+    :param with_diagonals: If True, includes diagonal connections.
+    :type with_diagonals: bool
+
+    :return: A grid graph.
+    :rtype: Graph
+
+    :raises ValueError: If the dimensions are not a tuple of length 2.
+    """
     if len(dimensions) != 2:
         raise ValueError("Expected two dimensions.")
     m, n = dimensions
@@ -133,12 +183,43 @@ def create_grid_graph(dimensions: Tuple[int, int], with_diagonals: bool) -> Grap
     return graph
 
 
-def give_constant_attribute(graph, attribute, value):
+def give_constant_attribute(
+    graph: Graph,
+    attribute: Any,
+    value: Any
+) -> None:
+    """
+    Sets the specified attribute to the specified value for all nodes in the graph.
+
+    :param graph: The graph to modify.
+    :type graph: Graph
+    :param attribute: The attribute to set.
+    :type attribute: Any
+    :param value: The value to set the attribute to.
+    :type value: Any
+
+    :return: None
+    """
     for node in graph.nodes:
         graph.nodes[node][attribute] = value
 
 
-def tag_boundary_nodes(graph: Graph, dimensions: Tuple[int, int]) -> None:
+def tag_boundary_nodes(
+    graph: Graph,
+    dimensions: Tuple[int, int]
+) -> None:
+    """
+    Adds the boolean attribute ``boundary_node`` to each node in the graph.
+    If the node is on the boundary of the grid, that node also gets the attribute
+    ``boundary_perim`` which is determined by the function :func:`get_boundary_perim`.
+
+    :param graph: The graph to modify.
+    :type graph: Graph
+    :param dimensions: The dimensions of the grid.
+    :type dimensions: Tuple[int, int]
+
+    :return: None
+    """
     m, n = dimensions
     for node in graph.nodes:
         if node[0] in [0, m - 1] or node[1] in [0, n - 1]:
@@ -148,7 +229,23 @@ def tag_boundary_nodes(graph: Graph, dimensions: Tuple[int, int]) -> None:
             graph.nodes[node]["boundary_node"] = False
 
 
-def get_boundary_perim(node: Tuple[int, int], dimensions: Tuple[int, int]) -> int:
+def get_boundary_perim(
+    node: Tuple[int, int],
+    dimensions: Tuple[int, int]
+) -> int:
+    """
+    Determines the boundary perimeter of a node on the grid.
+    The boundary perimeter is the number of sides of the node that
+    are on the boundary of the grid.
+
+    :param node: The node to check.
+    :type node: Tuple[int, int]
+    :param dimensions: The dimensions of the grid.
+    :type dimensions: Tuple[int, int]
+
+    :return: The boundary perimeter of the node.
+    :rtype: int
+    """
     m, n = dimensions
     if node in [(0, 0), (m - 1, 0), (0, n - 1), (m - 1, n - 1)]:
         return 2
@@ -158,7 +255,7 @@ def get_boundary_perim(node: Tuple[int, int], dimensions: Tuple[int, int]) -> in
         return 0
 
 
-def color_half(node, threshold=5):
+def color_half(node: Tuple[int, int], threshold: int = 5) -> int:
     x = node[0]
     return 0 if x <= threshold else 1
 
@@ -168,19 +265,3 @@ def color_quadrants(node: Tuple[int, int], thresholds: Tuple[int, int]) -> int:
     x_color = 0 if x < thresholds[0] else 1
     y_color = 0 if y < thresholds[1] else 2
     return x_color + y_color
-
-
-def grid_size(parition):
-    """ This is a hardcoded population function
-    for the grid class"""
-
-    L = parition.as_list_of_lists()
-    permit = [3, 4, 5]
-
-    sizes = [0, 0, 0, 0]
-
-    for i in range(len(L)):
-        for j in range(len(L[0])):
-            sizes[L[i][j]] += 1
-
-    return all(x in permit for x in sizes)