Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COI preservation updater #9

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion evaltools/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from .splits import splits, pieces
from .population import deviations, unassigned_population
from .contiguity import unassigned_units, contiguous
from .coi import block_level_coi_preservation

__all__ = [
"splits",
"pieces",
"deviations",
"unassigned_population",
"unassigned_units",
"contiguous"
"contiguous",
"block_level_coi_preservation"
]
109 changes: 109 additions & 0 deletions evaltools/evaluation/coi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Community of interest (COI) preservation scores."""
from typing import Dict, Sequence, Any, Callable
from scipy.sparse import csr_matrix
from gerrychain import Partition
import numpy as np


def block_level_coi_preservation(
unit_blocks: Dict[Any, set],
coi_blocks: Dict[Any, set],
block_pops: Dict[Any, float],
thresholds: Sequence[float],
partial_districts: bool = False
) -> Callable[[Partition], Dict[float, float]]:
"""Makes a COI preservation score function.

We assume that dual graph units and communities of interest can both be
represented (ideally losslessly) with a smaller common unit
(typically Census blocks). For a given partitition :math:`P`, inclusion
threshold :math:`t` and communities of interest :math:`C_1, \dots, C_n`,
we compute the preservation score
.. math:: f(P, t) = \sum_{i=1}^n (\max(f_1(C_i, P, t),f_2(C_i, P, t))
where:
* :math:`f_1(C_i, P, t) = 1` when :math:`100t\%%` of the population
of community of interest :math:`C_i` is inside of one district in
:math:`P` (0 otherwise). Intuitively, :math:`f_1` captures how much
a community of interest is split across districts. When the typical
COI population is much smaller than the typical district population,
it is relatively easier to satisfy this criterion.
* :math:`f_2(C_i, P, t) = 1` when :math:`100t\%%` of the population
of some (ideally sized) district in :math:`P` is inside of :math:`C_i`
(0 otherwise). Intuitively, :math:`f_2` captures how districts are
split across communities of interest (though this notion is less easy
to interpret than :math:`f_1`). When the typical COI population is
much larger than the typical district population, it is easier to
satisfy this criterion.

When `partial_districts` is `True`, we use an alternative formula for
:math:`f2`. Specifically, :math:`f_2'(C_i, P, t)` is the number of
districts :math:`100t\%%` contained in :math:`C_i` divided by the
population of :math:`C_i` (in ideal districts).

COI-unit intersection populations are precomputed, so generating the
score function may be slow for large dual graphs and/or large collections
of COIs.

:param unit_blocks: A mapping from dual graph units to the blocks
contained in each unit. THe key must be the same as the nodes
in the dual graph.
:param coi_blocks: A mapping from COIs to the blocks contained in
each COI.
:param block_pops: The block populations.
:param thresholds: The threshold values to use (ranging in [0, 1]).
:param partial_districts: If `True`, an alternative (non-integer-valued)
formula is used to compute COI preservation scores.
:return: An updater that computes the COI preservation score for a
partition for each threshold in `thresholds`.
"""
# We precompute a sparse COI-unit intersection matrix.
node_ordering = {k: idx for idx, k in enumerate(unit_blocks.keys())}
unit_coi_inter_pops = np.zeros((len(coi_blocks), len(unit_blocks)))
for unit_idx, (unit, blocks_in_unit) in enumerate(unit_blocks.items()):
for coi_idx, (coi, blocks_in_coi) in enumerate(coi_blocks.items()):
unit_coi_inter_pops[coi_idx, unit_idx] = sum(
block_pops[b] for b in blocks_in_coi & blocks_in_unit)
unit_coi_inter_pops = csr_matrix(unit_coi_inter_pops)

coi_pops = np.array(
[sum(block_pops[b] for b in blocks) for blocks in coi_blocks.values()])
unit_pops = np.array([
sum(block_pops[b] for b in blocks) for blocks in unit_blocks.values()
])
total_pop = unit_pops.sum()

def score_fn(partition: Partition) -> Dict[float, float]:
# Convert the assignment to a matrix encoding.
dist_ordering = {
dist: idx
for idx, dist in enumerate(partition.parts.keys())
}
dist_mat = np.zeros((len(unit_blocks), len(dist_ordering)))
for node, dist in partition.assignment.items():
dist_mat[node_ordering[node], dist_ordering[dist]] = 1

coi_dist_pops = unit_coi_inter_pops @ dist_mat
max_district_pop_in_coi = np.max(coi_dist_pops, axis=1)
score_by_threshold = {}
ideal_dist_pop = total_pop / dist_mat.shape[1]
coi_ideal_districts = coi_pops / ideal_dist_pop

for threshold in thresholds:
# f_1: At least (100 * threshold)% of the population
# of the COI is contained within one district.
f1_scores = max_district_pop_in_coi >= threshold * coi_pops
if partial_districts:
# f_2: Number of districts (100 % threshold)% contained in
# in C_i, divided by the number of ideal districts in C_i.
over_threshold = (coi_dist_pops >=
threshold * ideal_dist_pop).sum(axis=1)
f2_scores = over_threshold / coi_ideal_districts
else:
# f_2: At least (100 * threshold)% of the population of
# an ideal district is contained within the COI.
f2_scores = max_district_pop_in_coi >= threshold * ideal_dist_pop
score_by_threshold[threshold] = np.maximum(f1_scores,
f2_scores).sum()
return score_by_threshold

return score_fn
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

requirements = [
"pandas", "scipy", "networkx", "geopandas", "shapely", "matplotlib",
"gerrychain", "sortedcontainers", "gurobipy", "jsonlines", "opencv-python"
"gerrychain", "sortedcontainers", "gurobipy", "jsonlines",
"opencv-python", "scipy"
]

setup(
Expand Down
130 changes: 130 additions & 0 deletions tests/test_coi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from evaltools.evaluation import block_level_coi_preservation
from gerrychain.grid import Grid


def test_block_level_coi_preservation_small_cois():
n = 10
grid = Grid((n, n)) # default plan is 4 squares

def coord_to_blocks(x, y):
return {(2 * n * (2 * x)) + (2 * y), (2 * n * (2 * x)) + (2 * y) + 1,
(2 * n * (2 * x + 1)) + (2 * y),
(2 * n * (2 * x + 1)) + (2 * y) + 1}

# Let the "blocks" be the 20x20 grid.
unit_blocks = {(x, y): coord_to_blocks(x, y) for x, y in grid.graph.nodes}
block_pops = {b: 1 / 4 for b in range(20**2)}

# Let the COIs be squares of size 3 tiling a 9x9 grid contained
# within the 10x10 grid.
coi_blocks = {
idx: set.union(*(coord_to_blocks(x + 3 * (idx // 3), y + 3 * (idx % 3))
for x in range(3) for y in range(3)))
for idx in range(9)
}

thresholds = [0.6, 0.75, 1.]
# dist 1 is [0, 4] x [0, 4] (inclusive)
# dist 2 is [0, 4] x [5, 9]
# dist 3 is [5, 9] x [0, 4]
# dist 4 is [5, 9] x [5, 9]
# => [60% threshold, 75% threshold, 100% threshold]
# [1, 1, 1] COI 0 is [0, 2] x [0, 2] -> all pop in dist 1
# [1, 0, 0] COI 1 is [0, 2] x [3, 5] -> 2/3 pop in dist 1, 1/3 pop in dist 2
# [1, 1, 1] COI 2 is [0, 2] x [6, 8] -> all pop in dist 2
# [1, 0, 0] COI 3 is [3, 5] x [0, 2] -> 2/3 pop in dist 1, 1/3 pop in dist 3
# [0, 0, 0] COI 4 is [3, 5] x [3, 5] ->
# [3, 4] x [3, 4] in dist 1 -> 4/9 pop in dist 1
# [3, 4] x [5, 5] in dist 2 -> 2/9 pop in dist 2
# [5, 5] x [3, 4] in dist 3 -> 2/9 pop in dist 3
# [5, 5] x [5, 5] in dist 4 -> 1/9 pop in dist 4
# [1, 0, 0] COI 5 is [3, 5] x [6, 8] -> 2/3 pop in dist 2, 1/3 pop in dist 4
# [1, 1, 1] COI 6 is [6, 8] x [0, 2] -> all pop in dist 3
# [1, 0, 0] COI 7 is [6, 8] x [3, 5] -> 2/3 pop in dist 3, 1/3 pop in dist 4
# [1, 1, 1] COI 8 is [6, 8] x [6, 8] -> all pop in dist 4
expected_scores = {0.6: 8, 0.75: 4, 1.0: 4}
coi_score_fn = block_level_coi_preservation(unit_blocks, coi_blocks,
block_pops, thresholds)
assert coi_score_fn(grid) == expected_scores


def test_block_level_coi_preservation_large_cois():
n = 10
grid = Grid((n, n)) # default plan is 4 squares

def coord_to_blocks(x, y):
return {(2 * n * (2 * x)) + (2 * y), (2 * n * (2 * x)) + (2 * y) + 1,
(2 * n * (2 * x + 1)) + (2 * y),
(2 * n * (2 * x + 1)) + (2 * y) + 1}

# Let the "blocks" be the 20x20 grid.
unit_blocks = {(x, y): coord_to_blocks(x, y) for x, y in grid.graph.nodes}
block_pops = {b: 1 / 4 for b in range(20**2)}

# Let the COIs be three 10x3 horizonal strips.
coi_blocks = {
idx: set.union(*(coord_to_blocks(x, y + 3 * idx) for x in range(10)
for y in range(3)))
for idx in range(3)
}

thresholds = [0.5, 0.6, 0.75, 1]
# dist 1 is [0, 4] x [0, 4] (inclusive)
# dist 2 is [0, 4] x [5, 9]
# dist 3 is [5, 9] x [0, 4]
# dist 4 is [5, 9] x [5, 9]
#
# f_1 (COI containment within district):
# COI 0 is [0, 9] x [0, 2] -> 1/2 in dist 1, 1/2 in dist 2
# COI 1 is [0, 9] x [3, 5] ->
# [0, 4] x [3, 4] -> 1/3 in dist 1
# [5, 9] x [3, 4] -> 1/3 in dist 2
# [0, 4] x [5, 5] -> 1/6 in dist 3
# [5, 9] x [5, 5] -> 1/6 in dist 4
# COI 2 is [0, 9] x [6, 8] -> 1/2 in dist 3, 1/2 in dist 4
# Thus, f_1 contributes at most 2 points with a threshold of
# ≤50% and 0 points otherwise.

# f_2 (district containment within COI):
# COI 0 is [0, 9] x [0, 2] ->
# [0, 4] x [0, 2] -> contains 60% of dist 1 pop
# [5, 9] x [0, 2] -> contains 60% of dist 2 pop
# COI 1 is [0, 9] x [3, 5] ->
# [0, 4] x [3, 4] -> contains 40% of dist 1 pop
# [5, 9] x [3, 4] -> contains 40% of dist 2 pop
# [0, 4] x [5, 5] -> contains 20% of dist 3 pop
# [5, 9] x [5, 5] -> contains 20% of dist 4 pop
# COI 2 is [0, 9] x [6, 8] ->
# [0, 4] x [6, 8] -> contains 60% of dist 3 pop
# [5, 9] x [6, 8] -> contains 60% of dist 4 pop
# Thus, f_2 contributes at most 2 points with a threshold of
# ≤60% and 0 points otherwise.
expected_scores = {0.5: 2, 0.6: 2, 0.75: 0, 1.0: 0}
score_fn = block_level_coi_preservation(unit_blocks=unit_blocks,
coi_blocks=coi_blocks,
block_pops=block_pops,
thresholds=thresholds,
partial_districts=False)
assert score_fn(grid) == expected_scores

# For the alternative version of f_2, we have
# 2 districts ≤60% contained in COI 0 / 1.2 districts' pop in COI 0
# => 2/1.2 for COI 0 when threshold ≤60%, 0 otherwise
# ...and similarly for COI 2.
expected_scores_partial_dists = {
0.5: 4 / 1.2,
0.6: 4 / 1.2,
0.75: 0,
1.0: 0
}
score_fn_partial_dists = block_level_coi_preservation(
unit_blocks=unit_blocks,
coi_blocks=coi_blocks,
block_pops=block_pops,
thresholds=thresholds,
partial_districts=True)
scores_partial_dists = score_fn_partial_dists(grid)
assert scores_partial_dists.keys() == expected_scores_partial_dists.keys()
assert all(
abs(expected - scores_partial_dists[t]) < 1e-10
for t, expected in expected_scores_partial_dists.items())