### A small script to calculate the county coverage that we have from counties utilized in our analysis.

In [1]:
import pandas as pd

In [2]:
# Load voting data
elections = pd.read_csv("county_2020_elections.csv", dtype={"county_fips":str})
elections.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff
0,Alabama,1001,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184
1,Alabama,1003,Baldwin County,83544,24578,109679,58966,0.761714,0.22409,0.537623
2,Alabama,1005,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631
3,Alabama,1007,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.57728
4,Alabama,1009,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022


In [3]:
# Load selected county FIP codes
with open("list_of_counties.txt", "r") as f:
    selected_fips = [fip.rstrip() for fip in f]
    
print(selected_fips[:5])

['12095', '26163', '40109', '12086', '13121']


In [4]:
# Calculate the total number of voters in the country
total_voters = elections.total_votes.sum()

print(f"Total US voters: {total_voters:,}")

Total US voters: 158,433,557


In [5]:
# Select only the counties we use in our analyses
subset_elections = elections[elections.county_fips.isin(selected_fips)]

# Calculate the percentage of coverage relative to the total voting population
propotion_of_country_covered = subset_elections.total_votes.sum() / total_voters
print(f"Percent coverage: {propotion_of_country_covered:.0%}")

Percent coverage: 64%


In [6]:
# Check the number of counties matches
assert len(subset_elections) == len(selected_fips)

# Check the actual county fips are the same
assert sorted(subset_elections['county_fips']) == sorted(selected_fips)