In [1]:
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
from gerrychain import Graph, Partition, proposals, updaters, constraints, accept, MarkovChain, Election
from gerrychain.tree import bipartition_tree
from gerrychain.updaters import cut_edges, Tally
from gerrychain.proposals import recom
from gerrychain.accept import always_accept
from functools import partial
import utilities

In [2]:
or_graph = Graph.from_file('or_data/OR.shp')
or_df = gpd.read_file("or_data/OR.shp")

In [3]:
# Set up Election updater
election = Election("PRE20", {"Dem": "G20PRED", "Rep": "G20PRER"})

In [4]:
# Set up the initial partition object
initial_partition = Partition(
    or_graph,
    assignment="SEND",  # as per assignment
    updaters={
        # setup updaters, that get updated per run of the chain
        "total_population": Tally("TOTPOP", alias="total_population"),
        "hisp_population": Tally("HISP", alias="hisp_population"),
        "cut_edges": cut_edges,
        "PRE20": election
    }
)

In [5]:
# Define the ideal population
ideal_population = sum(initial_partition["total_population"].values()) / len(initial_partition)
print(len(initial_partition))
print(ideal_population)

30
141241.86666666667


In [6]:
# Define the recom proposal
proposal = partial(
    recom,
    pop_col="TOTPOP",
    pop_target=ideal_population,
    epsilon=0.02, # changed from 0.02 to 0.1 because the initial partition was not within 2% of the ideal population
    method = partial(
        bipartition_tree,
        max_attempts=100,
        allow_pair_reselection=True  # <-- This is the only change
    )
)

In [7]:
def calculate_nr_of_majority_hisp_pop(partition: Partition) -> int:
    """Calculates the number of districts with majority hispanic population"""
    hisp_population = partition["hisp_population"]
    total_population = partition["total_population"]
    majority_hisp_pop = 0
    for i in hisp_population.keys():
        if hisp_population[i] > (0.5 * total_population[i]):
            majority_hisp_pop += 1
    return majority_hisp_pop

In [8]:
def draw_histogram_of_nr_of_cut_edges(_list_of_nr_of_cut_edges: [], nr_of_total_steps: int) -> None:
    """Draws the histogram for the number of cut edges"""
    plt.hist(_list_of_nr_of_cut_edges)
    plt.title(f"{nr_of_total_steps} steps - cut edges")
    plt.xlabel("Number of cut edges")
    plt.ylabel("Frequency")
    plt.savefig(f"images/OR_{nr_of_total_steps}_histogram_of_nr_of_cut_edges.png")
    plt.show()
    pass

In [9]:
def draw_histogram_of_nr_of_majority_hisp_pop(_list_of_majority_hisp_pop: [], nr_of_total_steps: int) -> None:
    """Draws the histogram for the number of districts with majority hispanic population"""
    plt.hist(_list_of_majority_hisp_pop)
    plt.title(f"{nr_of_total_steps} steps - districts with majority hispanic population")
    plt.xlabel("Number of districts with majority hispanic population")
    plt.ylabel("Frequency")
    plt.savefig(f"images/OR_{nr_of_total_steps}_histogram_of_nr_of_majority_hisp_pop.png")
    plt.show()
    pass

In [10]:
def draw_histogram_of_nr_of_dem_won_districts(_list_of_dem_won_districts: [], nr_of_total_steps: int) -> None:
    """Draws the histogram for the number of districts won by the Democratic party"""
    plt.hist(_list_of_dem_won_districts)
    plt.title(f"{nr_of_total_steps} steps - districts won by the Democratic party")
    plt.xlabel("Number of districts won by the Democratic party")
    plt.ylabel("Frequency")
    plt.savefig(f"images/OR_{nr_of_total_steps}_histogram_of_nr_of_dem_won_districts.png")
    plt.show()
    pass

In [11]:
# define the three lists that are needed to track the changes
list_of_nr_of_cut_edges = []
list_of_majority_hisp_pop = []
list_of_dem_won_districts = []

In [12]:
def run_the_chain(nr_of_total_steps: int, start_partition: Partition, offset: int = 0) -> Partition:
    """Runs the chain for the specified number of steps. Returns the last partition"""

    # Set up the chain
    chain = MarkovChain(
        proposal=proposal,
        constraints=[
            # Compactness constraint
            constraints.UpperBound(lambda p: len(p["cut_edges"]), 2 * len(initial_partition["cut_edges"])),
            # Population constraint
            # had to change the percentage to 0.1, because the initial partition was not within 2% of the ideal population
            constraints.within_percent_of_ideal_population(initial_partition, 0.1, "total_population")
        ],
        accept=always_accept,
        initial_state=start_partition,
        total_steps=nr_of_total_steps
    )
    last_partition: Partition = start_partition

    for (i, partition) in enumerate(chain.with_progress_bar()):
        # append the number of cut edges for this proposal to the list
        list_of_nr_of_cut_edges.append(len(partition.cut_edges))
        # append the number of majority hispanic population for this proposal to the list
        list_of_majority_hisp_pop.append(calculate_nr_of_majority_hisp_pop(partition))
        # append the number of districts won by the Democratic party for this proposal to the list
        list_of_dem_won_districts.append(partition["PRE20"].wins("Dem"))

        # draw every 1000 steps the diagram
        if i % 1000 == 0:
            print(f"Drawing diagram for step {i + offset}")
            utilities.render_oregon_partition(or_df, partition, f"OR_Gerrychain_step_{i + offset}", show=False)

        # store the last partition
        last_partition = partition

    draw_histogram_of_nr_of_cut_edges(list_of_nr_of_cut_edges, nr_of_total_steps + offset)
    draw_histogram_of_nr_of_majority_hisp_pop(list_of_majority_hisp_pop, nr_of_total_steps + offset)
    draw_histogram_of_nr_of_dem_won_districts(list_of_dem_won_districts, nr_of_total_steps + offset)

    utilities.render_oregon_partition(or_df, last_partition, f"OR_Gerrychain_step_{nr_of_total_steps + offset}", show=False)
    return last_partition

In [None]:
partition_at_20_000 = run_the_chain(20_000, initial_partition)

  0%|          | 0/20000 [00:00<?, ?it/s]

Drawing diagram for step 0
Drawing diagram for step 1000
Drawing diagram for step 2000
Drawing diagram for step 3000


## Checkpoint for 20_000 steps

In [16]:
assignment_at_20_000 = utilities.checkpoint("OR_Gerrychain_step_20_000", partition_at_20_000.assignment)
partition_at_20_000 = Partition(
    graph=or_graph,
    assignment=assignment_at_20_000,
    updaters={
        "total_population": Tally("TOTPOP", alias="total_population"),
        "hisp_population": Tally("HISP", alias="hisp_population"),
        "cut_edges": cut_edges,
        "PRE20": election
    }
)

Checkpoint: OR_Gerrychain_step_20_000
Data loaded from cache.


In [17]:
partition_at_40_000 = run_the_chain(40_000, partition_at_20_000, offset=20_000)

  0%|          | 0/40000 [00:00<?, ?it/s]

Drawing diagram for step 20000


KeyboardInterrupt: 

## Checkpoint for 40_000 steps

In [None]:
assignment_at_40_000 = utilities.checkpoint("OR_Gerrychain_step_20_000", partition_at_40_000.assignment)
partition_at_40_000 = Partition(
    graph=or_graph,
    assignment=assignment_at_40_000,
    updaters={
        "total_population": Tally("TOTPOP", alias="total_population"),
        "hisp_population": Tally("HISP", alias="hisp_population"),
        "cut_edges": cut_edges,
        "PRE20": election
    }
)