# Running the gerrychain to find a map without holes on congressional districts

@authors: vcle, bpuhani

In [1]:
import io
import random
from contextlib import redirect_stdout

import maup
import pandas as pd
from shapely.ops import unary_union

import utilities as util
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
from gerrychain import Graph, Partition, proposals, updaters, constraints, accept, MarkovChain, Election
from gerrychain.tree import bipartition_tree
from gerrychain.updaters import cut_edges, Tally
from gerrychain.proposals import recom, propose_random_flip
from gerrychain.accept import always_accept
from functools import partial
from gerrychain.metrics import efficiency_gap  # get the efficiency gap directly from gerrychain

## Loading the needed data.
For this notebook to work we assume, that you ran the following notebooks first:
* `0_IL_import_and_explore_data.ipynb`
* `B_2_IL_clean_maup_with_congress.ipynb`
* `B_4_IL_find_map_without_holes_vest20.ipynb`

In [2]:
il_df: gpd.GeoDataFrame = util.load_shapefile("il_data/IL_congress_without_holes.shp")
il_graph: Graph = util.load_graph("il_data/IL_congress_without_holes.shp")

Loading shapefile from il_data/IL_congress_without_holes.shp...
Shapefile data loaded from cache.
Loading shapefile graph from il_data/IL_congress_without_holes.shp...
Shapefile data loaded from cache.


In [21]:
partition_at_5_000 = None
partition_at_10_000 = None
partition_at_15_000 = None
partition_at_20_000 = None
partition_at_25_000 = None
partition_at_30_000 = None
partition_at_35_000 = None
partition_at_40_000 = None

Setup Updaters

In [4]:
def has_holes(partition, district) -> bool:
    # Merge all geometries in the district into a single polygon/multipolygon
    raw_geometry = unary_union([partition.graph.nodes[v]["geometry"]
                                for v in partition.parts[district]])

    # Try to repair invalid geometry
    geom_fixed = raw_geometry.buffer(0)

    # A simple hole check: does the geometry have interior rings?
    # (for Polygon: check .interiors; for MultiPolygon: check if any part has interiors)
    if geom_fixed.geom_type == "Polygon":
        return len(geom_fixed.interiors) > 0
    elif geom_fixed.geom_type == "MultiPolygon":
        return any(len(p.interiors) > 0 for p in geom_fixed.geoms)
    else:
        print(f"Not a polygon geometry: {geom_fixed.geom_type}")
        return False  # Not a polygon geometry? Then we ignore it.

In [5]:
il_updaters = {
    "total_population": Tally("TOTPOP", alias="total_population"),
    # "hisp_population": Tally("HISP", alias="hisp_population"), # not needed apparently
    "cut_edges": cut_edges,
    # calculate if a district has holes
    "district_has_holes": lambda p: [int(has_holes(p, d)) for d in p.parts],
}

In [6]:
elections = [
    Election("PRE20", {"Dem": "G20PRED", "Rep": "G20PRER"}),
    Election("USS20", {"Dem": "G20USSD", "Rep": "G20USSR"}),
]

In [7]:
# adding the elections to the updaters
election_updaters = {election.name: election for election in elections}
il_updaters.update(election_updaters)

In [8]:
# Set up the initial partition object
initial_partition = Partition(
    il_graph,
    assignment="district",  # use the "district" column because this is the new one without holes.
    updaters=il_updaters,
)

In [9]:
# Define the ideal population
ideal_population = sum(initial_partition["total_population"].values()) / len(initial_partition)
print("Nr of districts:", len(initial_partition))
print("Ideal population:", ideal_population)

Nr of districts: 17
Ideal population: 753676.9411764706


In [10]:
# Define the recom proposal
proposal = partial(
    recom,
    pop_col="TOTPOP",
    pop_target=ideal_population,
    epsilon=0.02,
    method=partial(
        bipartition_tree,
        max_attempts=100,
        allow_pair_reselection=True
    )
)

In [11]:
# define the lists that are needed to track the changes
list_of_nr_of_cut_edges = []

list_of_dem_won_districts_pre20 = []
list_of_dem_won_districts_uss20 = []

list_of_eg_pre20 = []
list_of_eg_uss20 = []

list_of_dem_percents_pre20 = []
list_of_dem_percents_uss20 = []

In [12]:
# create a checkpoint for all the lists in one big dictionary
checkpoint_dict = {
    "list_of_nr_of_cut_edges": list_of_nr_of_cut_edges,
    "list_of_dem_won_districts_pre20": list_of_dem_won_districts_pre20,
    "list_of_dem_won_districts_uss20": list_of_dem_won_districts_uss20,
    "list_of_eg_pre20": list_of_eg_pre20,
    "list_of_eg_uss20": list_of_eg_uss20,
    "list_of_dem_percents_pre20": list_of_dem_percents_pre20,
    "list_of_dem_percents_uss20": list_of_dem_percents_uss20
}

In [13]:
def run_the_chain(nr_of_total_steps: int, start_partition: Partition, offset: int = 0) -> Partition:
    """Runs the chain for the specified number of steps. Returns the last partition"""

    # Set up the chain
    chain = MarkovChain(
        proposal=proposal,
        constraints=[
            # Compactness constraint
            constraints.UpperBound(lambda p: len(p["cut_edges"]), 2 * len(initial_partition["cut_edges"])),
            # Population constraint
            constraints.within_percent_of_ideal_population(initial_partition, 0.02, "total_population"),
            # set constraint for the map not to allow holes (lower and upper bound is 1 == (True) == no Holes)
            constraints.Bounds(lambda p: p["district_has_holes"], (0, 0))
        ],
        accept=always_accept,
        initial_state=start_partition,
        total_steps=nr_of_total_steps - offset
    )
    last_partition: Partition = start_partition

    for (i, partition) in enumerate(chain.with_progress_bar()):
        last_partition = partition

        # Calculate and append the efficiency gap values for each election to checkpoint_dict
        checkpoint_dict["list_of_eg_pre20"].append(efficiency_gap(partition["PRE20"]))
        checkpoint_dict["list_of_eg_uss20"].append(efficiency_gap(partition["USS20"]))

        # Append the sorted percentages of Democratic votes for each election to checkpoint_dict
        checkpoint_dict["list_of_dem_percents_pre20"].append(sorted(partition["PRE20"].percents("Dem")))
        checkpoint_dict["list_of_dem_percents_uss20"].append(sorted(partition["USS20"].percents("Dem")))

        # Append the number of districts won by the Democratic Party for each election to checkpoint_dict
        checkpoint_dict["list_of_dem_won_districts_pre20"].append(partition["PRE20"].wins("Dem"))
        checkpoint_dict["list_of_dem_won_districts_uss20"].append(partition["USS20"].wins("Dem"))

        # Append the number of cut edges for this partition to checkpoint_dict
        checkpoint_dict["list_of_nr_of_cut_edges"].append(len(partition["cut_edges"]))

    return last_partition

## RUN FIRST 5_000 STEPS

In [14]:
partition_at_5_000 = run_the_chain(5_000, initial_partition)

### Saving the progress for the first 5_000 steps

In [15]:
# load the checkpoint if it exists
checkpoint_dict = util.checkpoint("IL_plot_results_5_000", checkpoint_dict)

Checkpoint: IL_plot_results_5_000
Data loaded from cache.


In [16]:
assignment_at_5_000 = util.checkpoint("IL_Gerrychain_step_5_000", partition_at_5_000)
partition_at_5_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_5_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_5_000
Data loaded from cache.


## RUN NEXT 5_000 STEPS

In [18]:
partition_at_10_000 = run_the_chain(10_000, partition_at_5_000, 5_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### save the progress for the next 5_000 steps

In [19]:
assignment_at_10_000 = util.checkpoint("IL_Gerrychain_step_10_000", partition_at_10_000)
partition_at_10_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_10_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_10_000
Saving data...
Data saved successfully to checkpoints/IL_Gerrychain_step_10_000.pkl.


In [20]:
# load the checkpoint if it exists
checkpoint_dict = util.checkpoint("IL_plot_results_10_000", checkpoint_dict)

Checkpoint: IL_plot_results_10_000
Saving data...
Data saved successfully to checkpoints/IL_plot_results_10_000.pkl.


## Run the next 5_000 steps

In [21]:
partition_at_15_000 = run_the_chain(15_000, partition_at_10_000, 10_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### save the progress for the next 5_000 steps

In [14]:
assignment_at_15_000 = util.checkpoint("IL_Gerrychain_step_15_000", partition_at_15_000)
partition_at_15_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_15_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_15_000
Data loaded from cache.


In [15]:
# load the checkpoint if it exists
checkpoint_dict = util.checkpoint("IL_plot_results_15_000", checkpoint_dict)

Checkpoint: IL_plot_results_15_000
Data loaded from cache.


## Run the steps from 15_000 to 20_000

In [17]:
partition_at_20_000 = run_the_chain(20_000, partition_at_15_000, 15_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### Save the progress for the 20_000 steps mark

In [18]:
assignment_at_20_000 = util.checkpoint("IL_Gerrychain_step_20_000", partition_at_20_000)
partition_at_20_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_20_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_20_000
Saving data...
Data saved successfully to checkpoints/IL_Gerrychain_step_20_000.pkl.


In [19]:
# load the checkpoint if it exists
checkpoint_dict = util.checkpoint("IL_plot_results_20_000", checkpoint_dict)

Checkpoint: IL_plot_results_20_000
Saving data...
Data saved successfully to checkpoints/IL_plot_results_20_000.pkl.


## Run the steps from 20_000 to 25_000

In [20]:
partition_at_25_000 = run_the_chain(25_000, partition_at_20_000, 20_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### Save the progress for the 25_000 steps mark

In [14]:
assignment_at_25_000 = util.checkpoint("IL_Gerrychain_step_25_000", partition_at_25_000)
partition_at_25_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_25_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_25_000
Data loaded from cache.


In [15]:
# load the checkpoint if it exists
checkpoint_dict = util.checkpoint("IL_plot_results_25_000", checkpoint_dict)

Checkpoint: IL_plot_results_25_000
Data loaded from cache.


## Run the steps from 25_000 to 30_000

In [17]:
partition_at_30_000 = run_the_chain(30_000, partition_at_25_000, 25_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### Save the progress for the 30_000 steps mark

In [18]:
assignment_at_30_000 = util.checkpoint("IL_Gerrychain_step_30_000", partition_at_30_000)
partition_at_30_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_30_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_30_000
Saving data...
Data saved successfully to checkpoints/IL_Gerrychain_step_30_000.pkl.


In [19]:
# load the checkpoint if it exists
checkpoint_dict = util.checkpoint("IL_plot_results_30_000", checkpoint_dict)

Checkpoint: IL_plot_results_30_000
Saving data...
Data saved successfully to checkpoints/IL_plot_results_30_000.pkl.


## Run the steps from 30_000 to 35_000

In [20]:
partition_at_35_000 = run_the_chain(35_000, partition_at_30_000, 30_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### Save the progress for the 35_000 steps mark

In [14]:
assignment_at_35_000 = util.checkpoint("IL_Gerrychain_step_35_000", partition_at_35_000)
partition_at_35_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_35_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_35_000
Data loaded from cache.


In [15]:
checkpoint_dict = util.checkpoint("IL_plot_results_35_000", checkpoint_dict)

Checkpoint: IL_plot_results_35_000
Data loaded from cache.


## Run the steps from 35_000 to 40_000

In [18]:
partition_at_40_000 = run_the_chain(40_000, partition_at_35_000, 35_000)

  0%|          | 0/5000 [00:00<?, ?it/s]

### Save the progress for the 40_000 steps mark

In [24]:
assignment_at_40_000 = util.checkpoint("IL_Gerrychain_step_40_000", partition_at_40_000)
partition_at_40_000 = Partition(
    graph=il_graph,
    assignment=assignment_at_40_000,
    updaters=il_updaters,
)

Checkpoint: IL_Gerrychain_step_40_000
Data loaded from cache.


In [20]:
checkpoint_dict = util.checkpoint("IL_plot_results_40_000", checkpoint_dict)

Checkpoint: IL_plot_results_40_000
Saving data...
Data saved successfully to checkpoints/IL_plot_results_40_000.pkl.
