# Vaccine Hesitancy for COVID-19: County and local estimates

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import glob
import os

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the data from the CDC portal

In [4]:
# https://data.cdc.gov/Vaccinations/Vaccine-Hesitancy-for-COVID-19-County-and-local-es/q9mh-h2tw

In [5]:
url = "https://data.cdc.gov/api/views/q9mh-h2tw/rows.csv?accessType=DOWNLOAD"

In [6]:
df = pd.read_csv(url)

### Clean up the headers

In [7]:
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [8]:
df.drop(
    ["county_boundary", "state_boundary", "geographical_point"], axis=1, inplace=True
)

In [9]:
df["fips_code"] = df["fips_code"].astype(str).str.zfill(5)

In [10]:
df.head()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,ability_to_handle_a_covid-19_outbreak_(cvac),cvac_category,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code
0,1123,"Tallapoosa County, Alabama",ALABAMA,0.23,0.12,0.89,Very High Vulnerability,0.64,High Vulnerability,0.161,0.0242,0.0022,0.0036,0.2697,0.0,0.6887,AL
1,1121,"Talladega County, Alabama",ALABAMA,0.23,0.11,0.87,Very High Vulnerability,0.84,Very High Vulnerability,0.133,0.0229,0.0043,0.0061,0.3237,0.0003,0.6263,AL
2,1131,"Wilcox County, Alabama",ALABAMA,0.23,0.11,0.93,Very High Vulnerability,0.94,Very High Vulnerability,0.228,0.0053,0.0009,0.0003,0.6938,0.0,0.2684,AL
3,1129,"Washington County, Alabama",ALABAMA,0.23,0.11,0.73,High Vulnerability,0.82,Very High Vulnerability,0.192,0.0146,0.0731,0.0025,0.2354,0.0,0.6495,AL
4,1133,"Winston County, Alabama",ALABAMA,0.22,0.11,0.7,High Vulnerability,0.8,High Vulnerability,0.085,0.0315,0.0034,0.0016,0.0073,0.0005,0.937,AL


### Merge with election results

In [11]:
df_election = pd.read_json(
    "../elections/data/election_results_2020.json", dtype={"county_fips": str}
)

In [12]:
df_election.head()

Unnamed: 0,state_name,county_fips,county_name,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner
0,Alabama,1001,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184,Trump
1,Alabama,1003,Baldwin County,83544,24578,109679,58966,0.761714,0.22409,0.537623,Trump
2,Alabama,1005,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631,Trump
3,Alabama,1007,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.57728,Trump
4,Alabama,1009,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022,Trump


In [13]:
merge = pd.merge(
    df,
    df_election,
    left_on="fips_code",
    right_on="county_fips",
    how="left",
).drop(["county_name_y"], axis=1)

In [14]:
merge.dtypes

fips_code                                                 object
county_name_x                                             object
state                                                     object
estimated_hesitant                                       float64
estimated_strongly_hesitant                              float64
social_vulnerability_index_(svi)                         float64
svi_category                                              object
ability_to_handle_a_covid-19_outbreak_(cvac)             float64
cvac_category                                             object
percent_adults_fully_vaccinated_against_covid-19         float64
percent_hispanic                                         float64
percent_non-hispanic_american_indian_alaska_native       float64
percent_non-hispanic_asian                               float64
percent_non-hispanic_black                               float64
percent_non-hispanic_native_hawaiian_pacific_islander    float64
percent_non-hispanic_whit

In [15]:
merge.rename(columns={"county_name_x": "county_name"}, inplace=True)

In [16]:
merge_corr = merge[
    ["fips_code", "county_name", "estimated_hesitant", "per_gop", "winner"]
]

In [17]:
merge_corr[merge_corr.winner == "Trump"].corr(method="pearson")

Unnamed: 0,estimated_hesitant,per_gop
estimated_hesitant,1.0,0.252561
per_gop,0.252561,1.0


In [18]:
alt.Chart(merge).mark_circle(size=40).encode(
    x=alt.X("per_gop", title="% Trump", axis=alt.Axis(tickCount=4, format=("%"))),
    y=alt.Y(
        "estimated_hesitant",
        title="% hesitant",
        axis=alt.Axis(tickCount=4, format=("%")),
    ),
    color=alt.Color(
        "winner", scale=alt.Scale(domain=["Trump", "Biden"], range=["red", "blue"])
    ),
).properties(
    width=800, height=500, title="Relationship between Trump vote and vaccine hesitancy"
)

### Just California

In [19]:
ca_merge = merge[merge["state"] == "CALIFORNIA"].sort_values(
    "estimated_strongly_hesitant", ascending=False
)

In [20]:
alt.Chart(ca_merge).mark_circle(size=50).encode(
    x=alt.X("per_gop", title="% Trump", axis=alt.Axis(tickCount=6, format=("%"))),
    y=alt.Y(
        "estimated_hesitant",
        title="% hesitant",
        axis=alt.Axis(tickCount=4, format=("%")),
    ),
    color=alt.Color(
        "winner", scale=alt.Scale(domain=["Trump", "Biden"], range=["red", "blue"])
    ),
    size=alt.Size("total_votes"),
).properties(
    width=800, height=500, title="Relationship between Trump vote and vaccine hesitancy"
)

### Least hesitant in CA

In [21]:
ca_merge.sort_values("estimated_hesitant", ascending=True).head()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,ability_to_handle_a_covid-19_outbreak_(cvac),cvac_category,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code,state_name,county_fips,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner
422,6075,"San Francisco County, California",CALIFORNIA,0.07,0.03,0.39,Low Vulnerability,0.52,Moderate Vulnerability,0.237,0.1524,0.0019,0.3407,0.05,0.0034,0.4051,CA,California,6075,56417.0,378156.0,443458.0,-321739.0,0.127221,0.852744,0.725523,Biden
410,6041,"Marin County, California",CALIFORNIA,0.08,0.03,0.27,Low Vulnerability,0.31,Low Vulnerability,0.333,0.1598,0.0017,0.0581,0.0213,0.001,0.7115,CA,California,6041,24612.0,128288.0,155801.0,-103676.0,0.157971,0.823409,0.665439,Biden
442,6081,"San Mateo County, California",CALIFORNIA,0.08,0.03,0.26,Low Vulnerability,0.4,Moderate Vulnerability,0.264,0.244,0.0015,0.2831,0.0218,0.0132,0.3924,CA,California,6081,75563.0,291410.0,374138.0,-215847.0,0.201966,0.778884,0.576918,Biden
524,6085,"Santa Clara County, California",CALIFORNIA,0.08,0.03,0.36,Low Vulnerability,0.42,Moderate Vulnerability,0.216,0.2547,0.0017,0.3628,0.0235,0.0032,0.3154,CA,California,6085,214612.0,617967.0,850522.0,-403355.0,0.25233,0.726574,0.474244,Biden
500,6013,"Contra Costa County, California",CALIFORNIA,0.09,0.04,0.41,Moderate Vulnerability,0.47,Moderate Vulnerability,0.261,0.2559,0.002,0.1648,0.084,0.0046,0.4383,CA,California,6013,152877.0,416386.0,581230.0,-263509.0,0.263023,0.716388,0.453364,Biden


### Most hesitant in CA?

In [22]:
ca_merge.sort_values("estimated_hesitant", ascending=True).tail()

Unnamed: 0,fips_code,county_name,state,estimated_hesitant,estimated_strongly_hesitant,social_vulnerability_index_(svi),svi_category,ability_to_handle_a_covid-19_outbreak_(cvac),cvac_category,percent_adults_fully_vaccinated_against_covid-19,percent_hispanic,percent_non-hispanic_american_indian_alaska_native,percent_non-hispanic_asian,percent_non-hispanic_black,percent_non-hispanic_native_hawaiian_pacific_islander,percent_non-hispanic_white,state_code,state_name,county_fips,votes_gop,votes_dem,total_votes,diff,per_gop,per_dem,per_point_diff,winner
529,6063,"Plumas County, California",CALIFORNIA,0.15,0.06,0.27,Low Vulnerability,0.54,Moderate Vulnerability,,0.0895,0.0144,0.0114,0.0076,0.0046,0.8314,CA,California,6063,6445.0,4561.0,11256.0,1884.0,0.572584,0.405206,0.167377,Trump
416,6049,"Modoc County, California",CALIFORNIA,0.15,0.06,0.75,High Vulnerability,0.78,High Vulnerability,,0.1449,0.0418,0.0111,0.0153,0.001,0.7769,CA,California,6049,3109.0,1150.0,4338.0,1959.0,0.71669,0.265099,0.451591,Trump
534,6093,"Siskiyou County, California",CALIFORNIA,0.15,0.06,0.72,High Vulnerability,0.68,High Vulnerability,0.215,0.1258,0.032,0.016,0.0157,0.0029,0.761,CA,California,6093,13290.0,9593.0,23450.0,3697.0,0.566738,0.409083,0.157655,Trump
465,6035,"Lassen County, California",CALIFORNIA,0.15,0.06,0.6,Moderate Vulnerability,0.7,High Vulnerability,0.131,0.1896,0.0274,0.0139,0.0848,0.008,0.6543,CA,California,6035,8970.0,2799.0,11985.0,6171.0,0.748436,0.233542,0.514894,Trump
403,6031,"Kings County, California",CALIFORNIA,0.16,0.06,0.97,Very High Vulnerability,0.75,High Vulnerability,0.108,0.5449,0.0085,0.0369,0.0583,0.0016,0.3221,CA,California,6031,24072.0,18699.0,43858.0,5373.0,0.548862,0.426353,0.122509,Trump


---

In [23]:
counties = gpd.read_file("input/counties/usa_counties_clean_simplified.json")

In [24]:
counties.columns = (
    counties.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
)

In [25]:
counties.head()

Unnamed: 0,statefp,countyfp,countyns,geoid,name,shape_area,geometry
0,15,3,365281,15003,Honolulu,0.138087,"POLYGON ((-157.91418 21.63521, -157.98643 21.6..."
1,41,3,1155126,41003,Benton,0.198723,"POLYGON ((-123.14903 44.72022, -123.17499 44.7..."
2,41,11,1135848,41011,Coos,0.471548,"POLYGON ((-123.81155 42.78884, -123.81150 42.7..."
3,6,45,277287,6045,Mendocino,0.935848,"POLYGON ((-123.54446 40.00192, -123.59440 40.0..."
4,41,41,1135856,41041,Lincoln,0.294925,"POLYGON ((-123.72466 45.04443, -123.74415 45.0..."


In [26]:
merge_geo = counties.merge(merge, right_on="fips_code", left_on="geoid")

In [27]:
len(merge_geo)

3142

In [28]:
merge_geo.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x129265c10>

In [29]:
merge_geo.to_file("output/vaccine_hesitancy_geo.geojson", driver="GeoJSON")

---

## Exports 

In [30]:
ca_merge.sort_values("estimated_hesitant", ascending=True).head().to_csv(
    "output/hesitancy/seven_hesitant_hesitant.csv", index=False
)

In [31]:
ca_merge.sort_values("estimated_hesitant", ascending=True).tail(7).to_csv(
    "output/hesitancy/seven_most_hesitant.csv", index=False
)