# Coronavirus vaccine administration by county

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the county-level data from CDC

In [4]:
# Pulled from here: https://covid.cdc.gov/covid-data-tracker/#county-view

In [5]:
url = "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_county_condensed_data"

In [6]:
df_nested = pd.read_json(url)

### Extract the nested data

In [7]:
df = pd.json_normalize(df_nested["vaccination_county_condensed_data"])

In [8]:
df.head()

Unnamed: 0,Date,FIPS,StateName,StateAbbr,County,Series_Complete_18Plus,Series_Complete_18PlusPop_Pct,Series_Complete_65Plus,Series_Complete_65PlusPop_Pct,Series_Complete_Yes,Series_Complete_Pop_Pct,Completeness_pct
0,2021-03-27,1001,Alabama,AL,Autauga,4725.0,11.0,2530.0,28.4,4727.0,8.5,93.5
1,2021-03-27,1003,Alabama,AL,Baldwin,29931.0,17.0,19597.0,41.8,29941.0,13.4,93.5
2,2021-03-27,1005,Alabama,AL,Barbour,2416.0,12.3,1641.0,33.8,2416.0,9.8,93.5
3,2021-03-27,1007,Alabama,AL,Bibb,2020.0,11.3,1260.0,33.8,2021.0,9.0,93.5
4,2021-03-27,1009,Alabama,AL,Blount,4177.0,9.4,2642.0,24.4,4179.0,7.2,93.5


### Clean up headers, dates and extra spaces around strings

In [9]:
df.columns = df.columns.str.lower()

In [10]:
df_obj = df.select_dtypes(["object"])
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

In [11]:
df["date"] = pd.to_datetime(df["date"])
df["month"] = df["date"].dt.month
df["month"] = df["month"].astype(str)

In [12]:
df.dtypes

date                             datetime64[ns]
fips                                     object
statename                                object
stateabbr                                object
county                                   object
series_complete_18plus                  float64
series_complete_18pluspop_pct           float64
series_complete_65plus                  float64
series_complete_65pluspop_pct           float64
series_complete_yes                     float64
series_complete_pop_pct                 float64
completeness_pct                        float64
month                                    object
dtype: object

### Filter the data frame to just a specific state

In [13]:
ca = df[df["stateabbr"] == "CA"]

In [14]:
ca[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
206,2021-03-27,6041,Marin,California,24.6
213,2021-03-27,6055,Napa,California,21.2
208,2021-03-27,6045,Mendocino,California,21.0
234,2021-03-27,6097,Sonoma,California,20.4
226,2021-03-27,6081,San Mateo,California,19.6
192,2021-03-27,6013,Contra Costa,California,19.2
240,2021-03-27,6109,Tuolumne,California,19.1
223,2021-03-27,6075,San Francisco,California,19.0
229,2021-03-27,6087,Santa Cruz,California,18.7
216,2021-03-27,6061,Placer,California,18.4


### Which counties in the U.S. have the highest vaccination rates?

In [15]:
df[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
412,2021-03-27,13053,Chattahoochee,Georgia,69.6
94,2021-03-27,2282,Yakutat,Alaska,48.0
90,2021-03-27,2230,Skagway,Alaska,46.8
87,2021-03-27,2195,Petersburg,Alaska,45.3
89,2021-03-27,2220,Sitka,Alaska,44.4
75,2021-03-27,2100,Haines,Alaska,42.7
84,2021-03-27,2180,Nome,Alaska,41.3
76,2021-03-27,2105,Hoonah-Angoon,Alaska,41.1
1615,2021-03-27,30035,Glacier,Montana,40.2
95,2021-03-27,2290,Yukon-Koyukuk,Alaska,39.5


---

### Export to csv

In [16]:
df.to_csv("output/vaccinations_by_county.csv", index=False)