# Coronavirus vaccine administration by county

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt

In [3]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the county-level data from CDC

In [4]:
# Pulled from here: https://covid.cdc.gov/covid-data-tracker/#county-view

In [5]:
url = "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_county_condensed_data"

In [6]:
df_nested = pd.read_json(url)

### Extract the nested data

In [7]:
df = pd.json_normalize(df_nested["vaccination_county_condensed_data"])

In [8]:
df.head()

Unnamed: 0,Date,FIPS,StateName,StateAbbr,County,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Series_Complete_Yes,Series_Complete_Pop_Pct,Completeness_pct,Census2019_12PlusPop,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Administered_Dose1_Recip,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_65Plus,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65PlusPop_Pct,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI,SVI_CTGY,series_complete_pop_pct_UR_Equity,series_complete_12PlusPop_pct_UR_Equity,series_complete_18PlusPop_pct_UR_Equity,series_complete_65PlusPop_pct_UR_Equity,metro_status
0,2021-12-06,1001,Alabama,AL,Autauga County,19595.0,45.7,5872.0,65.8,20864.0,37.3,92.7,47574.0,20838.0,43.8,26087.0,25843.0,24261.0,7034.0,46.7,54.3,56.5,78.8,6,7,7,7,B,2.0,3.0,3.0,3.0,Metro
1,2021-12-06,1003,Alabama,AL,Baldwin County,98911.0,56.3,37266.0,79.6,103802.0,46.5,92.7,192649.0,103701.0,53.8,131573.0,130741.0,124402.0,46064.0,58.9,67.9,70.8,98.4,3,4,4,4,A,3.0,4.0,4.0,4.0,Metro
2,2021-12-06,1005,Alabama,AL,Barbour County,9670.0,49.3,3375.0,69.4,10129.0,41.0,92.7,21404.0,10124.0,47.3,12438.0,12392.0,11808.0,3902.0,50.4,57.9,60.2,80.3,15,15,15,15,D,7.0,7.0,7.0,7.0,Non-metro
3,2021-12-06,1007,Alabama,AL,Bibb County,6841.0,38.4,2226.0,59.6,7183.0,32.1,92.7,19480.0,7178.0,36.8,8796.0,8771.0,8364.0,2500.0,39.3,45.0,46.9,67.0,10,10,10,11,C,2.0,2.0,2.0,3.0,Metro
4,2021-12-06,1009,Alabama,AL,Blount County,16061.0,36.0,5637.0,52.1,16777.0,29.0,92.7,49234.0,16775.0,34.1,20273.0,20198.0,19292.0,6489.0,35.1,41.0,43.3,60.0,5,6,6,7,B,1.0,2.0,2.0,3.0,Metro


### Clean up headers, dates and extra spaces around strings

In [9]:
df.columns = df.columns.str.lower()

In [10]:
df_obj = df.select_dtypes(["object"])
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

In [11]:
df["date"] = pd.to_datetime(df["date"])
df["month"] = df["date"].dt.month
df["month"] = df["month"].astype(str)

In [12]:
df.dtypes

date                                       datetime64[ns]
fips                                               object
statename                                          object
stateabbr                                          object
county                                             object
series_complete_18plus                            float64
series_complete_18pluspop_pct                     float64
series_complete_65plus                            float64
series_complete_65pluspop_pct                     float64
series_complete_yes                               float64
series_complete_pop_pct                           float64
completeness_pct                                  float64
census2019_12pluspop                              float64
series_complete_12plus                            float64
series_complete_12pluspop_pct                     float64
administered_dose1_recip                          float64
administered_dose1_recip_12plus                   float64
administered_d

### Filter the data frame to just a specific state

In [13]:
ca = df[df["stateabbr"] == "CA"]

In [14]:
ca[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
207,2021-12-06,6041,Marin County,California,80.9
229,2021-12-06,6085,Santa Clara County,California,78.8
224,2021-12-06,6075,San Francisco County,California,77.8
227,2021-12-06,6081,San Mateo County,California,76.1
193,2021-12-06,6013,Contra Costa County,California,74.4
187,2021-12-06,6001,Alameda County,California,74.0
199,2021-12-06,6025,Imperial County,California,73.6
230,2021-12-06,6087,Santa Cruz County,California,71.8
235,2021-12-06,6097,Sonoma County,California,71.2
214,2021-12-06,6055,Napa County,California,70.6


### Which counties in the U.S. have the highest vaccination rates?

In [15]:
df[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
2318,2021-12-06,72013,Arecibo Muno,Puerto Rico,99.9
413,2021-12-06,13053,Chattahoochee County,Georgia,99.9
71,2021-12-06,2060,Bristol Bay Borough,Alaska,95.8
109,2021-12-06,4023,Santa Cruz County,Arizona,95.3
2336,2021-12-06,72049,Culebra Muno,Puerto Rico,94.3
301,2021-12-06,8111,San Juan County,Colorado,88.9
2995,2021-12-06,51740,Portsmouth City,Virginia,86.0
2353,2021-12-06,72081,Lares Muno,Puerto Rico,84.8
3217,2021-12-06,56039,Teton County,Wyoming,84.5
97,2021-12-06,4001,Apache County,Arizona,84.1


---

### Export to csv

In [16]:
df.to_csv("output/vaccinations_by_county.csv", index=False)