# Coronavirus vaccine administration by county

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt

In [3]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the county-level data from CDC

In [4]:
# Pulled from here: https://covid.cdc.gov/covid-data-tracker/#county-view

In [5]:
url = "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_county_condensed_data"

In [6]:
df_nested = pd.read_json(url)

### Extract the nested data

In [7]:
df = pd.json_normalize(df_nested["vaccination_county_condensed_data"])

In [17]:
len(df)

3279

### Clean up headers, dates and extra spaces around strings

In [9]:
df.columns = df.columns.str.lower()

In [10]:
df_obj = df.select_dtypes(["object"])
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

In [11]:
df["date"] = pd.to_datetime(df["date"])
df["month"] = df["date"].dt.month
df["month"] = df["month"].astype(str)

In [12]:
df.dtypes

date                                       datetime64[ns]
fips                                               object
statename                                          object
stateabbr                                          object
county                                             object
series_complete_5plus                             float64
series_complete_5pluspop_pct                      float64
series_complete_18plus                            float64
series_complete_18pluspop_pct                     float64
series_complete_65plus                            float64
series_complete_65pluspop_pct                     float64
series_complete_yes                               float64
series_complete_pop_pct                           float64
completeness_pct                                  float64
census2019_12pluspop                              float64
series_complete_12plus                            float64
series_complete_12pluspop_pct                     float64
administered_d

### Filter the data frame to just a specific state

In [13]:
ca = df[df["stateabbr"] == "CA"]

In [14]:
ca[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
208,2022-01-27,6041,Marin County,California,86.9
230,2022-01-27,6085,Santa Clara County,California,83.9
200,2022-01-27,6025,Imperial County,California,82.6
225,2022-01-27,6075,San Francisco County,California,81.8
228,2022-01-27,6081,San Mateo County,California,81.4
188,2022-01-27,6001,Alameda County,California,79.5
194,2022-01-27,6013,Contra Costa County,California,79.3
231,2022-01-27,6087,Santa Cruz County,California,76.2
236,2022-01-27,6097,Sonoma County,California,75.7
215,2022-01-27,6055,Napa County,California,74.7


### Which counties in the U.S. have the highest vaccination rates?

In [15]:
df[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
2720,2022-01-27,48235,Irion County,Texas,95.0
2337,2022-01-27,72049,Culebra Muni,Puerto Rico,95.0
71,2022-01-27,2060,Bristol Bay Borough,Alaska,95.0
414,2022-01-27,13053,Chattahoochee County,Georgia,95.0
110,2022-01-27,4023,Santa Cruz County,Arizona,95.0
2791,2022-01-27,48377,Presidio County,Texas,95.0
2319,2022-01-27,72013,Arecibo Muni,Puerto Rico,95.0
98,2022-01-27,4001,Apache County,Arizona,92.2
2354,2022-01-27,72081,Lares Muni,Puerto Rico,90.1
302,2022-01-27,8111,San Juan County,Colorado,90.1


---

### Export to csv

In [16]:
df.to_csv("output/vaccinations_by_county.csv", index=False)