In [1]:
import geopandas as gp
import pandas as pd

# 2020 Presidential Democratic & Republican Vote Share Aggregated to 2022 Legislative Districts

## Background:
- Received a request wanting % Democratic vote share on the new legislative districts

## Approach:
- Combine all of the block-level VEST disaggregated 2020 general election results
- Filter down to the votes for just Biden and Trump
- Join to the national BAF
- Aggregate into new districts
- Create the vote share percentages

## Links to datasets used:
[2022 National Block Assignment File](https://redistrictingdatahub.org/dataset/national-block-assignment-file-for-2022-state-legislative-and-congressional-districts/)

Block-level 2020 election results (multiple links)


For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

### Load and Clean Block-Level Votes and District Assignments

In [2]:
# Note: Created this file by combining our disaggregated 2020 VEST files
national_block_election = pd.read_csv("./raw-from-source/national_block_election.csv")
national_baf = pd.read_csv("./raw-from-source/national_baf_boundary/national_baf_boundary.csv")

national_baf["GEOID20"] = national_baf["GEOID20"].astype(str).str.zfill(15)
national_block_election["GEOID20"] = national_block_election["GEOID20"].astype(str).str.zfill(15)

  national_baf = pd.read_csv("./raw-from-source/national_baf_boundary/national_baf_boundary.csv")


### Join Files Together, Check Merge

In [3]:
combined = pd.merge(national_baf, national_block_election, how = "outer", on = "GEOID20", indicator = True)
combined["_merge"].value_counts()

both          8126956
left_only           0
right_only          0
Name: _merge, dtype: int64

### Aggregate to Districts, Filter Out "Unassigned Areas", Create Vote Shares

In [5]:
combined["CONG-ID"] = combined["STATE"] + "-" + combined["CONG"].astype(str)
combined_cong = combined.groupby(['STATE',"CONG","CONG-ID"])[['P0010001', 'VAP_MOD', 'G20PRERTRU', 'G20PREDBID']].sum()
combined_cong.reset_index(inplace = True, drop = False)

In [7]:
combined["SLDL-ID"] = combined["STATE"] + "-" + combined["SLDL"].astype(str)
combined_SLDL = combined.groupby(['STATE',"SLDL","SLDL-ID"])[['P0010001', 'VAP_MOD', 'G20PRERTRU', 'G20PREDBID']].sum()
combined_SLDL.reset_index(inplace = True, drop = False)

In [8]:
combined["SLDU-ID"] = combined["STATE"] + "-" + combined["SLDU"].astype(str)
combined_SLDU = combined.groupby(['STATE',"SLDU","SLDU-ID"])[['P0010001', 'VAP_MOD', 'G20PRERTRU', 'G20PREDBID']].sum()
combined_SLDU.reset_index(inplace = True, drop = False)

In [9]:
combined_SLDL = combined_SLDL[~(combined_SLDL["SLDL-ID"].str.contains("NO VALUE"))&~(combined_SLDL["SLDL-ID"].str.contains("NO SLDL"))]
combined_SLDU = combined_SLDU[~(combined_SLDU["SLDU-ID"].str.contains("NO VALUE"))&~(combined_SLDU["SLDU-ID"].str.contains("NO SLDU"))]
combined_cong = combined_cong[~combined_cong["CONG-ID"].str.contains("NO VALUE")]

In [10]:
combined_SLDL["DEM_pct"] = round(combined_SLDL["G20PREDBID"] / (combined_SLDL["G20PRERTRU"] + combined_SLDL["G20PREDBID"]),3) 
combined_SLDL["REP_pct"] = round(combined_SLDL["G20PRERTRU"] / (combined_SLDL["G20PRERTRU"] + combined_SLDL["G20PREDBID"]),3)

combined_SLDU["DEM_pct"] = round(combined_SLDU["G20PREDBID"] / (combined_SLDU["G20PRERTRU"] + combined_SLDU["G20PREDBID"]),3)
combined_SLDU["REP_pct"] = round(combined_SLDU["G20PRERTRU"] / (combined_SLDU["G20PRERTRU"] + combined_SLDU["G20PREDBID"]),3)

combined_cong["DEM_pct"] = round(combined_cong["G20PREDBID"] / (combined_cong["G20PRERTRU"] + combined_cong["G20PREDBID"]),3)
combined_cong["REP_pct"] = round(combined_cong["G20PRERTRU"] / (combined_cong["G20PRERTRU"] + combined_cong["G20PREDBID"]),3)


### Export to csv

In [12]:
combined_cong.to_csv('./2022_cong_results.csv', index = False)
combined_SLDU.to_csv('./2022_sldu_results.csv', index = False)
combined_SLDL.to_csv('./2022_sldl_results.csv', index = False)