# Merge presidential election with vax rates

### Import Python tools

In [1]:
# !pip install nb_black
%load_ext lab_black

In [2]:
import pandas as pd

---

### Read elections data cleaned in 04 notebook

In [3]:
df = pd.read_json("../data/elections/counties_election_results_2020.json")

In [4]:
df.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff,winner
0,Alabama,1001,Autauga,19838,7503,27770,-12335,12335,27,71,-44,44,gop
1,Alabama,1003,Baldwin,83544,24578,109679,-58966,58966,22,76,-54,54,gop
2,Alabama,1005,Barbour,5622,4816,10518,-806,806,46,53,-7,7,gop
3,Alabama,1007,Bibb,7525,1986,9595,-5539,5539,21,78,-57,57,gop
4,Alabama,1009,Blount,24711,2640,27588,-22071,22071,10,90,-80,80,gop


In [5]:
df["county_fips"] = df["county_fips"].astype(str).str.zfill(5)

### Get vax data from CDC

In [6]:
# Cleaned up version (and most recent week) of this table
# vax_pct = pct fully vaccinated
# https://data.cdc.gov/Vaccinations/COVID-19-Vaccinations-in-the-United-States-County/8xkx-amqh

In [7]:
vax_counties = pd.read_json("../data/vaccinations/usa-vaccination-county-totals.json")

In [8]:
vax_counties.head()

Unnamed: 0,fips,county_name,state_name,vax_pct
0,41061,Union County,OR,43.2
1,29093,Iron County,MO,33.9
2,38091,Steele County,ND,41.3
3,18085,Kosciusko County,IN,36.8
4,36075,Oswego County,NY,53.0


In [9]:
vax_counties.dtypes

fips            object
county_name     object
state_name      object
vax_pct        float64
dtype: object

### Merge vax with election results

In [10]:
merge = pd.merge(
    df,
    vax_counties[["fips", "vax_pct"]],
    left_on="county_fips",
    right_on="fips",
    how="left",
)

In [11]:
merge.drop(["fips"], axis=1, inplace=True)

In [12]:
len(merge)

3152

### What does the merged file look like? 

In [13]:
merge.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,margin_dem,margin_gop,pct_dem,pct_gop,dem_diff,gop_diff,winner,vax_pct
0,Alabama,1001,Autauga,19838,7503,27770,-12335,12335,27,71,-44,44,gop,34.7
1,Alabama,1003,Baldwin,83544,24578,109679,-58966,58966,22,76,-54,54,gop,43.4
2,Alabama,1005,Barbour,5622,4816,10518,-806,806,46,53,-7,7,gop,36.6
3,Alabama,1007,Bibb,7525,1986,9595,-5539,5539,21,78,-57,57,gop,30.2
4,Alabama,1009,Blount,24711,2640,27588,-22071,22071,10,90,-80,80,gop,27.2


---

In [14]:
merge_slim = merge[["state_name", "county_name", "pct_gop", "vax_pct"]]

In [15]:
merge_slim.corr(method="pearson")

Unnamed: 0,pct_gop,vax_pct
pct_gop,1.0,-0.480022
vax_pct,-0.480022,1.0


---

### Difference between counties won and votes won

In [16]:
merge.winner.value_counts()

gop    2595
dem     557
Name: winner, dtype: int64

### Aggregate by winner

In [17]:
merge.votes_gop.sum()

74208670

In [18]:
merge.votes_dem.sum()

81265196

In [19]:
merge.groupby(["winner"]).agg({"total_votes": "mean"}).round(0).reset_index()

Unnamed: 0,winner,total_votes
0,dem,169433.0
1,gop,24685.0


---

### Export

In [20]:
merge.to_csv("../data/elections/elections_vax_merge.csv", index=False)

In [21]:
merge.to_json("../data/elections/elections_vax_merge.json", indent=4, orient="records")