In [1]:
import geopandas as gp
import pandas as pd
import os

# 2020 Presidential Democratic & Republican Vote Share Aggregated to 2022 Legislative Districts

## Background:
- Received a request wanting % Democratic vote share on the new legislative districts

## Approach:
- Combine all of the block-level VEST disaggregated 2020 general election results
- Filter down to the votes for just Biden and Trump
- Join to the national BAF
- Aggregate to new districts
- Create the vote share percentages

## Links to datasets used:
[2022 National Block Assignment File](https://redistrictingdatahub.org/dataset/national-block-assignment-file-for-2022-state-legislative-and-congressional-districts/)

Block-level 2020 election results (multiple links)

For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

### Load and Clean Block-Level Votes and District Assignments

In [2]:
# Note: Created this file by combining our disaggregated 2020 VEST files
national_block_election = pd.read_csv("./raw-from-source/national_block_election.csv")
national_baf = pd.read_csv("./raw-from-source/national_baf_boundary/national_baf_boundary.csv")

national_baf["GEOID20"] = national_baf["GEOID20"].astype(str).str.zfill(15)
national_block_election["GEOID20"] = national_block_election["GEOID20"].astype(str).str.zfill(15)

  national_baf = pd.read_csv("./raw-from-source/national_baf_boundary/national_baf_boundary.csv")


### Join Files Together, Check Merge

In [3]:
combined = pd.merge(national_baf, national_block_election, how = "outer", on = "GEOID20", indicator = True)
combined["_merge"].value_counts()

both          8126956
left_only           0
right_only          0
Name: _merge, dtype: int64

### Aggregate to Districts, Filter Out "Unassigned Areas", Create Vote Shares

In [4]:
combined["CONG-ID"] = combined["STATE"] + "-" + combined["CONG"].astype(str)
combined_cong = combined.groupby(['STATE',"CONG","CONG-ID"])[['P0010001', 'VAP_MOD', 'G20PRERTRU', 'G20PREDBID']].sum()
combined_cong.reset_index(inplace = True, drop = False)

In [5]:
combined["SLDL-ID"] = combined["STATE"] + "-" + combined["SLDL"].astype(str)
combined_SLDL = combined.groupby(['STATE',"SLDL","SLDL-ID"])[['P0010001', 'VAP_MOD', 'G20PRERTRU', 'G20PREDBID']].sum()
combined_SLDL.reset_index(inplace = True, drop = False)

In [6]:
combined["SLDU-ID"] = combined["STATE"] + "-" + combined["SLDU"].astype(str)
combined_SLDU = combined.groupby(['STATE',"SLDU","SLDU-ID"])[['P0010001', 'VAP_MOD', 'G20PRERTRU', 'G20PREDBID']].sum()
combined_SLDU.reset_index(inplace = True, drop = False)

In [7]:
combined_SLDL = combined_SLDL[~(combined_SLDL["SLDL-ID"].str.contains("NO VALUE"))&~(combined_SLDL["SLDL-ID"].str.contains("NO SLDL"))]
combined_SLDU = combined_SLDU[~(combined_SLDU["SLDU-ID"].str.contains("NO VALUE"))&~(combined_SLDU["SLDU-ID"].str.contains("NO SLDU"))]
combined_cong = combined_cong[~combined_cong["CONG-ID"].str.contains("NO VALUE")]

In [8]:
combined_SLDL["REP_pct"] = round(combined_SLDL["G20PRERTRU"] / (combined_SLDL["G20PRERTRU"] + combined_SLDL["G20PREDBID"]),3)
combined_SLDL["DEM_pct"] = round(combined_SLDL["G20PREDBID"] / (combined_SLDL["G20PRERTRU"] + combined_SLDL["G20PREDBID"]),3) 

combined_SLDU["REP_pct"] = round(combined_SLDU["G20PRERTRU"] / (combined_SLDU["G20PRERTRU"] + combined_SLDU["G20PREDBID"]),3)
combined_SLDU["DEM_pct"] = round(combined_SLDU["G20PREDBID"] / (combined_SLDU["G20PRERTRU"] + combined_SLDU["G20PREDBID"]),3)

combined_cong["REP_pct"] = round(combined_cong["G20PRERTRU"] / (combined_cong["G20PRERTRU"] + combined_cong["G20PREDBID"]),3)
combined_cong["DEM_pct"] = round(combined_cong["G20PREDBID"] / (combined_cong["G20PRERTRU"] + combined_cong["G20PREDBID"]),3)


### Export to csv

In [9]:
if not os.path.exists("./national_20_pres_dem_rep_vote_share_22_leg_boundary"):
    os.mkdir("./national_20_pres_dem_rep_vote_share_22_leg_boundary")

combined_cong.to_csv('./national_20_pres_dem_rep_vote_share_22_leg_boundary/2022_cong_results.csv', index = False)
combined_SLDU.to_csv('./national_20_pres_dem_rep_vote_share_22_leg_boundary/2022_sldu_results.csv', index = False)
combined_SLDL.to_csv('./national_20_pres_dem_rep_vote_share_22_leg_boundary/2022_sldl_results.csv', index = False)

In [11]:
fields_dict = {'STATE':"State Abbreviation",
               'SLDL':"State Legislative District (Lower House) Name or Number",
               'SLDU':"State Legislative District (Upper House) Name or Number",
               'CONG':"Congressional District Name or Number",
               'SLDL-ID':"Unique State Legislative District (Lower House) ID",
               'SLDU-ID':"Unique State Legislative District (Upper House) ID",
               'CONG-ID':"Unique Congressional District ID",
               'P0010001':"Total Population from 2020 Census",
               'VAP_MOD':"Total Voting Age Population minus incarcerated adult population",
               'G20PRERTRU':"Total 2020 Trump Votes",
               'G20PREDBID':"Total 2020 Biden Votes",
               'DEM_pct':"Pct 2020 Biden Votes",
               'REP_pct':"Total 2020 Trump Votes"}

In [12]:
### Create README

title = "2020 Presidential Democratic & Republican Vote Share Aggregated to 2022 Legislative Districts"
retrieval_date = "01/17/23"
fields_dict = fields_dict
github_link = "https://github.com/nonpartisan-redistricting-datahub/Processing-Requests/tree/main/Dem_Rep_20_Pres_Results_22_Leg_Districts_01_13_23"
file_folder = "./national_20_pres_dem_rep_vote_share_22_leg_boundary/"

In [13]:
def full_readme_text(title, retrieval_date, fields_dict, github_link):

#First section of README
    readme_p1 = '''{title}\n
## RDH Date Retrieval
{retrieval_date}

## Sources
ADD SOURCE DATA HERE

## Fields:
'''.format(title = title, retrieval_date = retrieval_date)

#Second section of README
    fields_table = pd.DataFrame.from_dict(fields_dict.items())
    fields_table.columns = ["Field Name", "Description"]
    readme_p2 = fields_table.to_string(formatters={'Description':'{{:<{}s}}'.format(fields_table['Description'].str.len().max()).format, 'Field Name':'{{:<{}s}}'.format(fields_table['Field Name'].str.len().max()).format}, index=False, justify = "left")

#Third section of README
    readme_p3 = '''\n
## Processing Steps
Visit the RDH GitHub and the processing script for this code [here]({github_link})

Please direct questions related to processing this dataset to info@redistrictingdatahub.org.
'''.format(github_link=github_link)
    
    full_readme = str(readme_p1)+str(readme_p2)+str(readme_p3)
    return full_readme



In [14]:
with open(file_folder+"README.txt", 'w') as tf:
        tf.write(full_readme_text(title, retrieval_date, fields_dict, github_link))