# Data analysis 

## Read in data

In [8]:
import pandas as pd
import geopandas as gpd
dayton_elections = pd.read_parquet("Dayton_elections.parquet")
dayton_population = pd.read_parquet("Dayton_pop.parquet")
dayton_precincts = gpd.read_parquet("Dayton_precincts.parquet")
dayton_vap = pd.read_parquet("Dayton_vap.parquet")

## Use data to infer parameters

In [30]:
w_percents = []
for i in range(len(dayton_population)):
    tot_pop = dayton_population['tot_pop_20'].iloc[i].item()
    w_pop = dayton_population['white_pop_20'].iloc[i].item()
    w_percents.append(w_pop/tot_pop *100)

dayton_population['w_percent'] = w_percents
average_w_percent = sum(w_percents)/len(dayton_population)

In [152]:
d_percents = []
for i in range(len(dayton_elections)):
    tot_turn = dayton_elections['pres_20_dem'].iloc[i].item() + dayton_elections['pres_20_rep'].iloc[i].item()
    d_pop = dayton_elections['pres_20_dem'].iloc[i].item()
    d_percents.append(d_pop/tot_turn *100)
average_d_percent = sum(d_percents)/len(dayton_elections)

In [154]:
average_w_percent

41.114500623857374

In [156]:
average_d_percent

73.73250728641607

# Run Election

This will simulate an election for council. 

In [231]:
from gerrychain import Graph
import jsonlines as jl
import votekit.elections as elec
from votekit import PreferenceProfile
import votekit.ballot_generator as bg
from tqdm.notebook import tqdm

First, I just used parameters that are relatively random--I wanted to make the voter blocs representative of the population, but otherwise have no real preferences. This election slate of candidates is based off of the 2021 primary ballot. 

In [234]:
bloc_voter_prop = {
    "W": average_w_percent/100,
    "C": 1-average_w_percent/100
}
cohesion_parameters = {
    "W": {
        "W": 0.7,
        "C": 0.3
    },
    "C": {
        "W": 0.7,
        "C": 0.3
    }
}
alphas = {
    "W": {
        "W": 1,
        "C": 1
    },
    "C": {
        "W": 1,
        "C": 1
    }
}
slate_to_candidates = {
    "W": [
        "W1",
        "W2",
        "W3",
        "W4"
    ],
    "C": [
        "C1",
        "C2",
        "C3"
    ]
}

In [236]:
with jl.open('election_results_defaults.jsonl', 'w') as writer:
    for _ in tqdm(range(3000)): #running 30000 different instances of the election
        profile = bg.slate_PlackettLuce.from_params(
            bloc_voter_prop=bloc_voter_prop,
            cohesion_parameters=cohesion_parameters,
            alphas=alphas,
            slate_to_candidates=slate_to_candidates
        ).generate_profile(
            number_of_ballots=10000
        )
        election = elec.STV(profile, m=4)

        writer.write({
            "winners": [winner for winner_set in election.get_elected() for winner in winner_set],
        })

  0%|          | 0/3000 [00:00<?, ?it/s]

In [237]:
e_r = pd.read_json('election_results_defaults.jsonl', lines = True)

The idea behind this was just to see how the committee elections would look if Dayton elected all 4 representatives in the same year. 

One idea to change the data is to look at turnout. By looking at the pictures that Max has, we see that the western side of Dayton, which has a predominantly POC population, tends to vote Democrat while the eastern side, predominantly white, tends to vote Republican. So, we add that into our profile in the bloc.  

In [239]:
bloc_voter_prop = {
    "W": 1-average_d_percent/100,
    "C": average_d_percent/100
}
cohesion_parameters = {
    "W": {
        "W": 0.7,
        "C": 0.3
    },
    "C": {
        "W": 0.7,
        "C": 0.3
    }
}
alphas = {
    "W": {
        "W": 1,
        "C": 1
    },
    "C": {
        "W": 1,
        "C": 1
    }
}
slate_to_candidates = {
    "W": [
        "W1",
        "W2",
        "W3",
        "W4"
    ],
    "C": [
        "C1",
        "C2",
        "C3"
    ]
}

In [240]:
with jl.open('election_results_turnout.jsonl', 'w') as writer:
    for _ in tqdm(range(3000)): #running 30000 different instances of the election
        profile = bg.slate_PlackettLuce.from_params(
            bloc_voter_prop=bloc_voter_prop,
            cohesion_parameters=cohesion_parameters,
            alphas=alphas,
            slate_to_candidates=slate_to_candidates
        ).generate_profile(
            number_of_ballots=10000
        )
        election = elec.STV(profile, m=4)

        writer.write({
            "winners": [winner for winner_set in election.get_elected() for winner in winner_set],
        })

  0%|          | 0/3000 [00:00<?, ?it/s]

In [241]:
e_r_t = pd.read_json('election_results_turnout.jsonl', lines=True)

Now, we note that there seems to be a high preference for incumbents. As a result, we will change our alpha values to indicate this preference. 

In [243]:
bloc_voter_prop = {
    "W": average_w_percent/100,
    "C": 1-average_w_percent/100
}
cohesion_parameters = {
    "W": {
        "W": 0.7,
        "C": 0.3
    },
    "C": {
        "W": 0.7,
        "C": 0.3
    }
}
alphas = {
    "W": {
        "W": .3,
        "C": 1
    },
    "C": {
        "W": .3,
        "C": 1
    }
}
slate_to_candidates = {
    "W": [
        "W1",
        "W2",
        "W3",
        "W4"
    ],
    "C": [
        "C1",
        "C2",
        "C3"
    ]
}

In [244]:
with jl.open('election_results_incumbents.jsonl', 'w') as writer:
    for _ in tqdm(range(3000)): #running 30000 different instances of the election
        profile = bg.slate_PlackettLuce.from_params(
            bloc_voter_prop=bloc_voter_prop,
            cohesion_parameters=cohesion_parameters,
            alphas=alphas,
            slate_to_candidates=slate_to_candidates
        ).generate_profile(
            number_of_ballots=10000
        )
        election = elec.STV(profile, m=4)

        writer.write({
            "winners": [winner for winner_set in election.get_elected() for winner in winner_set],
        })

  0%|          | 0/3000 [00:00<?, ?it/s]

Now, let us look at changing the cohesion; we flip that of the "C" group.

In [246]:
bloc_voter_prop = {
    "W": average_w_percent/100,
    "C": 1-average_w_percent/100
}
cohesion_parameters = {
    "W": {
        "W": 0.7,
        "C": 0.3
    },
    "C": {
        "W": 0.3,
        "C": 0.7
    }
}
alphas = {
    "W": {
        "W": .3,
        "C": 1
    },
    "C": {
        "W": .3,
        "C": 1
    }
}
slate_to_candidates = {
    "W": [
        "W1",
        "W2",
        "W3",
        "W4"
    ],
    "C": [
        "C1",
        "C2",
        "C3"
    ]
}

In [247]:
with jl.open('election_results_cohesion.jsonl', 'w') as writer:
    for _ in tqdm(range(3000)): #running 30000 different instances of the election
        profile = bg.slate_PlackettLuce.from_params(
            bloc_voter_prop=bloc_voter_prop,
            cohesion_parameters=cohesion_parameters,
            alphas=alphas,
            slate_to_candidates=slate_to_candidates
        ).generate_profile(
            number_of_ballots=10000
        )
        election = elec.STV(profile, m=4)

        writer.write({
            "winners": [winner for winner_set in election.get_elected() for winner in winner_set],
        })

  0%|          | 0/3000 [00:00<?, ?it/s]

The last thing that we will look at is in the setting that Dayton actually has--only electing 2 representatives. The primaries involve narrowing down to 4 candidates, hence the change in number of candidates. Notice that this balances the two groups being represented in terms of the candidate slate. 

In [249]:
bloc_voter_prop = {
    "W": average_w_percent/100,
    "C": 1-average_w_percent/100
}
cohesion_parameters = {
    "W": {
        "W": 0.7,
        "C": 0.3
    },
    "C": {
        "W": 0.7,
        "C": 0.3
    }
}
alphas = {
    "W": {
        "W": 1,
        "C": 1
    },
    "C": {
        "W": 1,
        "C": 1
    }
}
slate_to_candidates = {
    "W": [
        "W1",
        "W2",
    ],
    "C": [
        "C1",
        "C2",
    ]
}

In [250]:
with jl.open('election_results_smaller.jsonl', 'w') as writer:
    for _ in tqdm(range(3000)): #running 30000 different instances of the election
        profile = bg.slate_PlackettLuce.from_params(
            bloc_voter_prop=bloc_voter_prop,
            cohesion_parameters=cohesion_parameters,
            alphas=alphas,
            slate_to_candidates=slate_to_candidates
        ).generate_profile(
            number_of_ballots=10000
        )
        election = elec.STV(profile, m=2)

        writer.write({
            "winners": [winner for winner_set in election.get_elected() for winner in winner_set],
        })

  0%|          | 0/3000 [00:00<?, ?it/s]

Then, for the fun of it all, I wanted to combine all of the edits I made to see how it reflects "reality" of the elections. 

In [252]:
bloc_voter_prop = {
    "W": 1-average_d_percent/100,
    "C": average_d_percent/100
}
cohesion_parameters = {
    "W": {
        "W": 0.7,
        "C": 0.3
    },
    "C": {
        "W": 0.3,
        "C": 0.7
    }
}
alphas = {
    "W": {
        "W": .3,
        "C": 1
    },
    "C": {
        "W": .3,
        "C": 1
    }
}
slate_to_candidates = {
    "W": [
        "W1",
        "W2",
    ],
    "C": [
        "C1",
        "C2",
    ]
}

In [253]:
with jl.open('election_results_combination.jsonl', 'w') as writer:
    for _ in tqdm(range(3000)): #running 3000 different instances of the election
        profile = bg.slate_PlackettLuce.from_params(
            bloc_voter_prop=bloc_voter_prop,
            cohesion_parameters=cohesion_parameters,
            alphas=alphas,
            slate_to_candidates=slate_to_candidates
        ).generate_profile(
            number_of_ballots=10000
        )
        election = elec.STV(profile, m=2)

        writer.write({
            "winners": [winner for winner_set in election.get_elected() for winner in winner_set],
        })

  0%|          | 0/3000 [00:00<?, ?it/s]

Initial tiebreak was unsuccessful, performing random tiebreak
