From 44fb2e77b0bff24cc2c27d8ff3d3122802cbfd9b Mon Sep 17 00:00:00 2001 From: "Parker J. Rule" Date: Fri, 3 Dec 2021 22:46:34 -0500 Subject: [PATCH 1/5] [WIP] COI preservation updater --- evaltools/evaluation/coi.py | 102 ++++++++++++++++++++++++++++++++++++ setup.py | 3 +- tests/test_coi.py | 48 +++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 evaltools/evaluation/coi.py create mode 100644 tests/test_coi.py diff --git a/evaltools/evaluation/coi.py b/evaltools/evaluation/coi.py new file mode 100644 index 00000000..39d6bbf6 --- /dev/null +++ b/evaltools/evaluation/coi.py @@ -0,0 +1,102 @@ +"""Community of interest (COI) preservation scores.""" +from typing import Dict, Sequence, Any, Callable +from scipy.sparse import csr_matrix +from gerrychain import Partition +import numpy as np + + +def block_level_coi_preservation( + unit_blocks: Dict[Any, Sequence[Any]], + coi_blocks: Dict[Any, Sequence[Any]], + block_pops: Dict[Any, float], + thresholds: Sequence[float], + partial_districts: bool = False +) -> Callable[[Partition], Dict[float, float]]: + """Makes a COI preservation score function. + + We assume that dual graph units and communities of interest can both be + represented (ideally losslessly) with a smaller common unit + (typically Census blocks). For a given partitition :math:`P`, inclusion + threshold :math:`t` and communities of interest :math:`C_1, \dots, C_n`, + we compute the preservation score + .. math:: f(P, t) = \sum_{i=1}^n (\max(f_1(C_i, P, t),f_2(C_i, P, t)) + where: + * :math:`f_1(C_i, P, t) = 1` when :math:`100t\%%` of the population + of community of interest :math:`C_i` is inside of one district in + :math:`P` (0 otherwise). Intuitively, :math:`f_1` captures how much + a community of interest is split across districts. When the typical + COI population is much smaller than the typical district population, + it is relatively easier to satisfy this criterion. + * :math:`f_2(C_i, P, t) = 1` when :math:`100t\%%` of the population + of some district in :math:`P` is inside of :math:`C_i` (0 otherwise). + Intuitively, :math:`f_2` captures how districts are split across + communities of interest (though this notion is less easy to interpret + than :math:`f_1`). When the typical COI population is much larger + than the typical district population, it is relatively easier to + satisfy this criterion. + + When `partial_districts` is `True`, we use an alternative formula for + :math:`f2`. Specifically, :math:`f_2'(C_i, P, t)` is the number of + districts :math:`100t\%%` contained in :math:`C_i` divided by the + population of :math:`C_i` (in ideal districts). + + COI-unit intersection populations are precomputed, so generating the + score function may be slow for large dual graphs and/or large collections + of COIs. + + :param unit_blocks: A mapping from dual graph units to the blocks + contained in each unit. THe key must be the same as the nodes + in the dual graph. + :param coi_blocks: A mapping from COIs to the blocks contained in + each COI. + :param block_pops: The block populations. + :param thresholds: The threshold values to use (ranging in [0, 1]). + :param partial_districts: If `True`, an alternative (non-integer-valued) + formula is used to compute COI preservation scores. + :return: An updater that computes the COI preservation score for a + partition for each threshold in `thresholds`. + """ + if min(thresholds) <= 0.5: + raise ValueError('Minimum inclusion threshold must be >50%.') + + # We precompute a sparse COI-unit intersection matrix. + node_ordering = {k: idx for idx, k in enumerate(unit_blocks.keys())} + unit_coi_inter_pops = np.zeros((len(coi_blocks), len(unit_blocks))) + for unit_idx, (unit, blocks_in_unit) in enumerate(unit_blocks.items()): + for coi_idx, (coi, blocks_in_coi) in enumerate(coi_blocks.items()): + unit_coi_inter_pops[coi_idx, unit_idx] = sum( + block_pops[b] for b in blocks_in_coi & blocks_in_unit) + unit_coi_inter_pops = csr_matrix(unit_coi_inter_pops) + + coi_pops = np.array( + [sum(block_pops[b] for b in blocks) for blocks in coi_blocks.values()]) + unit_pops = np.array([ + sum(block_pops[b] for b in blocks) + for vtd, blocks in unit_blocks.items() + ]) + total_pop = unit_pops.sum() + + def score_fn(partition: Partition) -> Dict[float, float]: + # Convert the assignment to a matrix encoding. + dist_ordering = { + dist: idx + for idx, dist in enumerate(partition.parts.keys()) + } + dist_mat = np.zeros((len(unit_blocks), len(dist_ordering))) + for node, dist in partition.assignment.items(): + dist_mat[node_ordering[node], dist_ordering[dist]] = 1 + + coi_dist_pops = unit_coi_inter_pops @ dist_mat + max_district_pop_in_coi = np.max(coi_dist_pops, axis=1) + score_by_threshold = {} + ideal_dist_pop = total_pop / dist_mat.shape[1] + for threshold in thresholds: + if not partial_districts: + score_by_threshold[threshold] = np.logical_or( + max_district_pop_in_coi >= threshold * ideal_dist_pop, + max_district_pop_in_coi >= threshold * coi_pops).sum() + else: + raise ValueError('Partial score not implemented yet.') + return score_by_threshold + + return score_fn diff --git a/setup.py b/setup.py index e4f5707f..79ed880e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,8 @@ requirements = [ "pandas", "scipy", "networkx", "geopandas", "shapely", "matplotlib", - "gerrychain", "sortedcontainers", "gurobipy", "jsonlines", "opencv-python" + "gerrychain", "sortedcontainers", "gurobipy", "jsonlines", + "opencv-python", "scipy" ] setup( diff --git a/tests/test_coi.py b/tests/test_coi.py new file mode 100644 index 00000000..4ae85ec8 --- /dev/null +++ b/tests/test_coi.py @@ -0,0 +1,48 @@ +from evaltools.evaluation import block_level_coi_preservation +from gerrychain.grid import Grid + + +def test_block_level_coi_preservation_small_cois(): + n = 10 + grid = Grid((n, n)) # default plan is 4 squares + + def coord_to_blocks(x, y): + return {(2 * n * (2 * x)) + (2 * y), (2 * n * (2 * x)) + (2 * y) + 1, + (2 * n * (2 * x + 1)) + (2 * y), + (2 * n * (2 * x + 1)) + (2 * y) + 1} + + # Let the "blocks" be the 20x20 grid. + unit_blocks = {(x, y): coord_to_blocks(x, y) for x, y in grid.graph.nodes} + block_pops = {b: 1 / 4 for b in range(20**2)} + + # As a contrived example, let the COIs be squares of size 3 + # tiling a 9x9 grid contained within the 10x10 grid. + coi_blocks = { + idx: set.union(*(coord_to_blocks(x + 3 * (idx // 3), y + 3 * (idx % 3)) + for x in range(3) for y in range(3))) + for idx in range(9) + } + + thresholds = [0.6, 0.75, 1.] + # dist 1 is [0, 4] x [0, 4] (inclusive) + # dist 2 is [0, 4] x [5, 9] + # dist 3 is [5, 9] x [0, 4] + # dist 4 is [5, 9] x [5, 9] + # => [60% threshold, 75% threshold, 100% threshold] + # [1, 1, 1] COI 0 is [0, 2] x [0, 2] -> all pop in dist 1 + # [1, 0, 0] COI 1 is [0, 2] x [3, 5] -> 2/3 pop in dist 1, 1/3 pop in dist 2 + # [1, 1, 1] COI 2 is [0, 2] x [6, 8] -> all pop in dist 2 + # [1, 0, 0] COI 3 is [3, 5] x [0, 2] -> 2/3 pop in dist 1, 1/3 pop in dist 3 + # [0, 0, 0] COI 4 is [3, 5] x [3, 5] -> + # [3, 4] x [3, 4] in dist 1 -> 4/9 pop in dist 1 + # [3, 4] x [5, 5] in dist 2 -> 2/9 pop in dist 2 + # [5, 5] x [3, 4] in dist 3 -> 2/9 pop in dist 3 + # [5, 5] x [5, 5] in dist 4 -> 1/9 pop in dist 4 + # [1, 0, 0] COI 5 is [3, 5] x [6, 8] -> 2/3 pop in dist 2, 1/3 pop in dist 4 + # [1, 1, 1] COI 6 is [6, 8] x [0, 2] -> all pop in dist 3 + # [1, 0, 0] COI 7 is [6, 8] x [3, 5] -> 2/3 pop in dist 3, 1/3 pop in dist 4 + # [1, 1, 1] COI 8 is [6, 8] x [6, 8] -> all pop in dist 4 + expected_scores = {0.6: 8, 0.75: 4, 1.0: 4} + coi_score_fn = block_level_coi_preservation(unit_blocks, coi_blocks, + block_pops, thresholds) + assert coi_score_fn(grid) == expected_scores From b8ed6904763b5d43bcf8533ec1f6967ffec5356a Mon Sep 17 00:00:00 2001 From: "Parker J. Rule" Date: Sat, 4 Dec 2021 16:24:31 -0500 Subject: [PATCH 2/5] Refine COI preservation score * Implements partial districts variant * Adds another test (for the case where COI size > district size) * Refines documentation --- evaltools/evaluation/coi.py | 34 +++++++++------ tests/test_coi.py | 86 ++++++++++++++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 15 deletions(-) diff --git a/evaltools/evaluation/coi.py b/evaltools/evaluation/coi.py index 39d6bbf6..4507b336 100644 --- a/evaltools/evaluation/coi.py +++ b/evaltools/evaluation/coi.py @@ -28,11 +28,11 @@ def block_level_coi_preservation( COI population is much smaller than the typical district population, it is relatively easier to satisfy this criterion. * :math:`f_2(C_i, P, t) = 1` when :math:`100t\%%` of the population - of some district in :math:`P` is inside of :math:`C_i` (0 otherwise). - Intuitively, :math:`f_2` captures how districts are split across - communities of interest (though this notion is less easy to interpret - than :math:`f_1`). When the typical COI population is much larger - than the typical district population, it is relatively easier to + of some (ideally sized) district in :math:`P` is inside of :math:`C_i` + (0 otherwise). Intuitively, :math:`f_2` captures how districts are + split across communities of interest (though this notion is less easy + to interpret than :math:`f_1`). When the typical COI population is + much larger than the typical district population, it is easier to satisfy this criterion. When `partial_districts` is `True`, we use an alternative formula for @@ -56,9 +56,6 @@ def block_level_coi_preservation( :return: An updater that computes the COI preservation score for a partition for each threshold in `thresholds`. """ - if min(thresholds) <= 0.5: - raise ValueError('Minimum inclusion threshold must be >50%.') - # We precompute a sparse COI-unit intersection matrix. node_ordering = {k: idx for idx, k in enumerate(unit_blocks.keys())} unit_coi_inter_pops = np.zeros((len(coi_blocks), len(unit_blocks))) @@ -90,13 +87,24 @@ def score_fn(partition: Partition) -> Dict[float, float]: max_district_pop_in_coi = np.max(coi_dist_pops, axis=1) score_by_threshold = {} ideal_dist_pop = total_pop / dist_mat.shape[1] + coi_ideal_districts = coi_pops / ideal_dist_pop + for threshold in thresholds: - if not partial_districts: - score_by_threshold[threshold] = np.logical_or( - max_district_pop_in_coi >= threshold * ideal_dist_pop, - max_district_pop_in_coi >= threshold * coi_pops).sum() + # f_1: At least (100 * threshold)% of the population + # of the COI is contained within one district. + f1_scores = max_district_pop_in_coi >= threshold * coi_pops + if partial_districts: + # f_2: Number of districts (100 % threshold)% contained in + # in C_i, divided by the number of ideal districts in C_i. + over_threshold = (coi_dist_pops >= + threshold * ideal_dist_pop).sum(axis=1) + f2_scores = over_threshold / coi_ideal_districts else: - raise ValueError('Partial score not implemented yet.') + # f_2: At least (100 * threshold)% of the population of + # an ideal district is contained within the COI. + f2_scores = max_district_pop_in_coi >= threshold * ideal_dist_pop + score_by_threshold[threshold] = np.maximum(f1_scores, + f2_scores).sum() return score_by_threshold return score_fn diff --git a/tests/test_coi.py b/tests/test_coi.py index 4ae85ec8..aeed1fea 100644 --- a/tests/test_coi.py +++ b/tests/test_coi.py @@ -15,8 +15,8 @@ def coord_to_blocks(x, y): unit_blocks = {(x, y): coord_to_blocks(x, y) for x, y in grid.graph.nodes} block_pops = {b: 1 / 4 for b in range(20**2)} - # As a contrived example, let the COIs be squares of size 3 - # tiling a 9x9 grid contained within the 10x10 grid. + # Let the COIs be squares of size 3 tiling a 9x9 grid contained + # within the 10x10 grid. coi_blocks = { idx: set.union(*(coord_to_blocks(x + 3 * (idx // 3), y + 3 * (idx % 3)) for x in range(3) for y in range(3))) @@ -46,3 +46,85 @@ def coord_to_blocks(x, y): coi_score_fn = block_level_coi_preservation(unit_blocks, coi_blocks, block_pops, thresholds) assert coi_score_fn(grid) == expected_scores + + +def test_block_level_coi_preservation_large_cois(): + n = 10 + grid = Grid((n, n)) # default plan is 4 squares + + def coord_to_blocks(x, y): + return {(2 * n * (2 * x)) + (2 * y), (2 * n * (2 * x)) + (2 * y) + 1, + (2 * n * (2 * x + 1)) + (2 * y), + (2 * n * (2 * x + 1)) + (2 * y) + 1} + + # Let the "blocks" be the 20x20 grid. + unit_blocks = {(x, y): coord_to_blocks(x, y) for x, y in grid.graph.nodes} + block_pops = {b: 1 / 4 for b in range(20**2)} + + # Let the COIs be three 10x3 horizonal strips. + coi_blocks = { + idx: set.union(*(coord_to_blocks(x, y + 3 * idx) for x in range(10) + for y in range(3))) + for idx in range(3) + } + + thresholds = [0.5, 0.6, 0.75, 1] + # dist 1 is [0, 4] x [0, 4] (inclusive) + # dist 2 is [0, 4] x [5, 9] + # dist 3 is [5, 9] x [0, 4] + # dist 4 is [5, 9] x [5, 9] + # + # f_1 (COI containment within district): + # COI 0 is [0, 9] x [0, 2] -> 1/2 in dist 1, 1/2 in dist 2 + # COI 1 is [0, 9] x [3, 5] -> + # [0, 4] x [3, 4] -> 1/3 in dist 1 + # [5, 9] x [3, 4] -> 1/3 in dist 2 + # [0, 4] x [5, 5] -> 1/6 in dist 3 + # [5, 9] x [5, 5] -> 1/6 in dist 4 + # COI 2 is [0, 9] x [6, 8] -> 1/2 in dist 3, 1/2 in dist 4 + # Thus, f_1 contributes at most 2 points with a threshold of + # ≤50% and 0 points otherwise. + + # f_2 (district containment within COI): + # COI 0 is [0, 9] x [0, 2] -> + # [0, 4] x [0, 2] -> contains 60% of dist 1 pop + # [5, 9] x [0, 2] -> contains 60% of dist 2 pop + # COI 1 is [0, 9] x [3, 5] -> + # [0, 4] x [3, 4] -> contains 40% of dist 1 pop + # [5, 9] x [3, 4] -> contains 40% of dist 2 pop + # [0, 4] x [5, 5] -> contains 20% of dist 3 pop + # [5, 9] x [5, 5] -> contains 20% of dist 4 pop + # COI 2 is [0, 9] x [6, 8] -> + # [0, 4] x [6, 8] -> contains 60% of dist 3 pop + # [5, 9] x [6, 8] -> contains 60% of dist 4 pop + # Thus, f_2 contributes at most 2 points with a threshold of + # ≤60% and 0 points otherwise. + expected_scores = {0.5: 2, 0.6: 2, 0.75: 0, 1.0: 0} + score_fn = block_level_coi_preservation(unit_blocks=unit_blocks, + coi_blocks=coi_blocks, + block_pops=block_pops, + thresholds=thresholds, + partial_districts=False) + assert score_fn(grid) == expected_scores + + # For the alternative version of f_2, we have + # 2 districts ≤60% contained in COI 0 / 1.2 districts' pop in COI 0 + # => 2/1.2 for COI 0 when threshold ≤60%, 0 otherwise + # ...and similarly for COI 2. + expected_scores_partial_dists = { + 0.5: 4 / 1.2, + 0.6: 4 / 1.2, + 0.75: 0, + 1.0: 0 + } + score_fn_partial_dists = block_level_coi_preservation( + unit_blocks=unit_blocks, + coi_blocks=coi_blocks, + block_pops=block_pops, + thresholds=thresholds, + partial_districts=True) + scores_partial_dists = score_fn_partial_dists(grid) + assert scores_partial_dists.keys() == expected_scores_partial_dists.keys() + assert all( + abs(expected - scores_partial_dists[t]) < 1e-10 + for t, expected in expected_scores_partial_dists.items()) From ce58ac848e690ba482b882014ad84718f5c7398a Mon Sep 17 00:00:00 2001 From: "Parker J. Rule" Date: Sat, 4 Dec 2021 16:31:35 -0500 Subject: [PATCH 3/5] Tighten type annotations for unit_blocks and coi_blocks --- evaltools/evaluation/coi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evaltools/evaluation/coi.py b/evaltools/evaluation/coi.py index 4507b336..bc2c8db4 100644 --- a/evaltools/evaluation/coi.py +++ b/evaltools/evaluation/coi.py @@ -6,8 +6,8 @@ def block_level_coi_preservation( - unit_blocks: Dict[Any, Sequence[Any]], - coi_blocks: Dict[Any, Sequence[Any]], + unit_blocks: Dict[Any, set], + coi_blocks: Dict[Any, set], block_pops: Dict[Any, float], thresholds: Sequence[float], partial_districts: bool = False From e37881980543d81012ca5b9f03d7a2bb99222b98 Mon Sep 17 00:00:00 2001 From: "Parker J. Rule" Date: Fri, 10 Dec 2021 15:20:54 -0500 Subject: [PATCH 4/5] Update module for block_level_coi_processing --- evaltools/evaluation/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/evaltools/evaluation/__init__.py b/evaltools/evaluation/__init__.py index af7ff93a..4b617328 100644 --- a/evaltools/evaluation/__init__.py +++ b/evaltools/evaluation/__init__.py @@ -6,6 +6,7 @@ from .splits import splits, pieces from .population import deviations, unassigned_population from .contiguity import unassigned_units, contiguous +from .coi import block_level_coi_preservation __all__ = [ "splits", @@ -13,5 +14,6 @@ "deviations", "unassigned_population", "unassigned_units", - "contiguous" + "contiguous", + "block_level_coi_preservation" ] From 6211b4ec56f6cbef80d4be37f11615dacf9d9812 Mon Sep 17 00:00:00 2001 From: "Parker J. Rule" Date: Fri, 10 Dec 2021 16:08:33 -0500 Subject: [PATCH 5/5] Cleanup: replace .items() with .values() --- evaltools/evaluation/coi.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/evaltools/evaluation/coi.py b/evaltools/evaluation/coi.py index bc2c8db4..92eb1fec 100644 --- a/evaltools/evaluation/coi.py +++ b/evaltools/evaluation/coi.py @@ -68,8 +68,7 @@ def block_level_coi_preservation( coi_pops = np.array( [sum(block_pops[b] for b in blocks) for blocks in coi_blocks.values()]) unit_pops = np.array([ - sum(block_pops[b] for b in blocks) - for vtd, blocks in unit_blocks.items() + sum(block_pops[b] for b in blocks) for blocks in unit_blocks.values() ]) total_pop = unit_pops.sum()