**Note, the cycle computation below is hard-coded as four-year cycles, for fast-tracking tests**

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import gc
from shapely import wkt

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
pd.set_option("display.max_columns", None)

In [None]:
# President: 2016 (Trump), 2020 (Biden), 2024 (Trump)
# Governor: 2018 (Whitmer), 2022 (Whitmer)
# Secretary of State: 2018 (Benson), 2022 (Benson)
# Attorney General: 2018 (Nessel), 2022 (Nessel)
# U.S. Senate: 2014 (Peters), 2018 (Stabenow), 2020 (Peters), 2024 (Slotkin)
# U.S. House: every cycle
# State Senate: 2014, 2018, 2022
# State House: every cycle
# State Board of Education: 2014, 2016, 2018, 2020, 2022, 2024 (every even number year)
# University of Michigan Board of Regents: 2014, 2016, 2018, 2020, 2022, 2024 (every two years)
# Michigan State University Board of Trustees: 2014, 2016, 2018, 2020, 2022, 2024 (every two years)
# Wayne State University Board of Governors: 2014, 2016, 2018, 2020, 2022, 2024 (every two years)
# Straight Party – 2020 and later (OpenElection daty only)

# OFFICES = ['U.S. House', 'State House']
# YEARS = ['2014', '2016', '2018', '2020', '2022', '2024']

# OFFICES = ['U.S. House']
# YEARS = ['2014', '2016', '2018', '2020', '2022', '2024']

OFFICES = ['U.S. Senate']
YEARS = ['2014', '2018', '2020', '2024']
# YEARS = ['2014', '2018', '2020', '2024']

# OFFICES = ['State Senate']
# YEARS = ['2014', '2018', '2022']

# OFFICES = ['President']
# YEARS = ['2016', '2020', '2024'] # this will compute changes for 2024, using the previous two cycles

# OFFICES = ['Governor', 'Secretary of State', 'Attorney General']
# YEARS = ['2018', '2022']

In [None]:
dfs = {}

for year in YEARS:
    for office in OFFICES:
        df_precinct_vote = pd.read_csv('data/generated_data/df_01_election_' + str(year) + '_' + office.replace('.', '').replace(' ', '_') + '.csv')

        df_precinct_vote['geometry'] = df_precinct_vote['geometry'].apply(wkt.loads)
        
        df_precinct_vote = gpd.GeoDataFrame(df_precinct_vote, geometry='geometry')
        df_precinct_vote.set_crs(epsg=4326, inplace=True)
        
        dfs[year] = df_precinct_vote
        del(df_precinct_vote)

for key in dfs:
    for office in OFFICES:
        df = dfs[key]
        
        df = df[['standardized_id_num', 
            'dem_votes', 'rep_votes', 'oth_votes', 
            'dem_share', 'rep_share', 'oth_share', 
            'registered_voters', 'turnout_pct']]
        
        df = df[df["standardized_id_num"].notna() & (df["standardized_id_num"] != "")]
        df.to_csv('data/generated_data/df_02_vote_changes_' + str(key) + '_' + office.replace('.', '').replace(' ', '_') + '.csv', index=False)
        del(df)

del(dfs)
gc.collect()

### Add vote shifts

#### Calculate *past* cycle outcomes
Use previous two elections from the current election to guage historical changes.

In [None]:
# THIS THING ASSUMES 2-YEAR CYCLES, WHEN SOME OFFICES
# ARE FOUR-YEAR CYCLES OR EVENT VARIABLE CYCLES (US SENATE)

for year in YEARS:
    for office in OFFICES:
        # No previous changes to compute until the third dataset.
        # Third dataset and beyond will include the change in partisanship
        # from the previous two cycles, so that we are only looking backwards
        # and not looking at "unseen" information for new data.
        if year <= YEARS[1]:
            continue # Third dataset not yet reached.

        # Current cycle
        df_curr = pd.read_csv(f"data/generated_data/df_02_vote_changes_{year}_{office.replace('.', '').replace(' ', '_')}.csv")

        # Insert electoral changes without looking into the future.
        # We add historical changes to the current dataset by looking at
        # the prior two election cycles, computing the change between those,
        # and recording those historical changes to the current cycle's features.
        
        # Previous-previous cycle – 4 years ago IF 4-year cycle, then 8 yearws
        prev_prev_year = str(int(year) - 8)
        df_prev_prev = pd.read_csv(f"data/generated_data/df_02_vote_changes_{prev_prev_year}_{office.replace('.', '').replace(' ', '_')}.csv")
        df_prev_prev = df_prev_prev.rename(columns={
            'dem_votes': 'dem_votes_prev_prev', 'rep_votes': 'rep_votes_prev_prev', 'oth_votes': 'oth_votes_prev_prev', 
            'dem_share': 'dem_share_prev_prev', 'rep_share': 'rep_share_prev_prev', 'oth_share': 'oth_share_prev_prev',
            'registered_voters': 'registered_voters_prev_prev', 'turnout_pct': 'turnout_pct_prev_prev',
        })
        
        # Previous cycle – 2 years ago IF 4-year cycle, then 4 yearws
        prev_year = str(int(year) - 4)
        df_prev = pd.read_csv(f"data/generated_data/df_02_vote_changes_{prev_year}_{office.replace('.', '').replace(' ', '_')}.csv")
        df_prev = df_prev.rename(columns={
            'dem_votes': 'dem_votes_prev', 'rep_votes': 'rep_votes_prev', 'oth_votes': 'oth_votes_prev', 
            'dem_share': 'dem_share_prev', 'rep_share': 'rep_share_prev', 'oth_share': 'oth_share_prev',
            'registered_voters': 'registered_voters_prev', 'turnout_pct': 'turnout_pct_prev',
        })

        # Make sure standardized_id_num is 13-character left-zero-padded string
        df_prev_prev['standardized_id_num'] = df_prev_prev['standardized_id_num'].apply(lambda x: str(x).zfill(13))
        df_prev['standardized_id_num'] = df_prev['standardized_id_num'].apply(lambda x: str(x).zfill(13))

        # Merge previous and current
        df_prev_merged = pd.merge(df_prev_prev, df_prev, on="standardized_id_num", how="left") # inner?
        df_all_merged = pd.merge(df_curr, df_prev_merged, on="standardized_id_num", how="left") # inner?

        # Compute share changes
        df_all_merged["dem_share_change"] = df_all_merged["dem_share_prev_prev"] - df_all_merged["dem_share_prev"]
        df_all_merged["rep_share_change"] = df_all_merged["rep_share_prev_prev"] - df_all_merged["rep_share_prev"]
        df_all_merged["oth_share_change"] = df_all_merged["oth_share_prev_prev"] - df_all_merged["oth_share_prev"]

        # Compute total changes
        df_all_merged["dem_votes_change"] = df_all_merged["dem_votes_prev_prev"] - df_all_merged["dem_votes_prev"]
        df_all_merged["rep_votes_change"] = df_all_merged["rep_votes_prev_prev"] - df_all_merged["rep_votes_prev"]
        df_all_merged["oth_votes_change"] = df_all_merged["oth_votes_prev_prev"] - df_all_merged["oth_votes_prev"]

        # Compute turnout
        df_all_merged["registered_voters_change"] = df_all_merged["registered_voters_prev_prev"] - df_all_merged["registered_voters_prev"]
        df_all_merged["turnout_pct_change"] = df_all_merged["turnout_pct_prev_prev"] - df_all_merged["turnout_pct_prev"]

        # Save as parquet for efficiency
        df_all_merged.to_parquet(f"data/generated_data/df_02_vote_changes_calc_{year}_{office.replace('.', '').replace(' ', '_')}.parquet", index=False)
        
        # Free memory
        del df_prev_prev, df_prev, df_curr, df_all_merged
        gc.collect()

In [None]:
for year in YEARS:
    for office in OFFICES:
        df_precinct_original = gpd.read_file('data/generated_data/df_01_election_' + str(year) + '_' + office.replace('.', '').replace(' ', '_') + '.geojson', driver='GeoJSON')
        
        # No change for first and second cycle.
        if year <= YEARS[1]:
            df_precinct_original['dem_share_change'] = 0.
            df_precinct_original['rep_share_change'] = 0.
            df_precinct_original['oth_share_change'] = 0.
            df_precinct_original['dem_votes_change'] = 0.
            df_precinct_original['rep_votes_change'] = 0.
            df_precinct_original['oth_votes_change'] = 0.
            df_precinct_original['registered_voters_change'] = 0.
            df_precinct_original['turnout_pct_change'] = 0.
            df_precinct_original.to_file('data/generated_data/df_02_vote_changes_calc_' + str(year) + '_' + office.replace('.', '').replace(' ', '_') +'.geojson', driver='GeoJSON')
            continue
    
        df_precinct_change = pd.read_parquet('data/generated_data/df_02_vote_changes_calc_' + str(year) + '_' + office.replace('.', '').replace(' ', '_') +'.parquet')
        df_precinct_change = df_precinct_change[['standardized_id_num', 'dem_share_prev', 'rep_share_prev', 'oth_share_prev', 'dem_share_change', 'rep_share_change', 'oth_share_change', 'dem_votes_change', 'rep_votes_change', 'oth_votes_change', 'registered_voters_change', 'turnout_pct_change']]
        df_precinct_change['standardized_id_num'] = df_precinct_change['standardized_id_num'].astype(str).str.zfill(13)
    
        df_precinct_results = pd.merge(df_precinct_original, df_precinct_change, on='standardized_id_num', how='left')

        # THIS GOES TO THE "NORMAL" DATA FOLDER FOR SUBSEQUENT NOTEBOOKS.
        df_precinct_results.to_file('data/generated_data/df_02_vote_changes_calc_' + str(year) + '_' + office.replace('.', '').replace(' ', '_') +'.geojson', driver='GeoJSON')

In [None]:
df_precinct_results.sample()