# Coronavirus vaccine administration by county

### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the county-level data from CDC

In [4]:
# Pulled from here: https://covid.cdc.gov/covid-data-tracker/#county-view

In [5]:
url = "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_county_condensed_data"

In [6]:
df_nested = pd.read_json(url)

### Extract the nested data

In [7]:
df = pd.json_normalize(df_nested["vaccination_county_condensed_data"])

In [16]:
df.head()

Unnamed: 0,date,fips,statename,stateabbr,county,series_complete_18plus,series_complete_18pluspop_pct,series_complete_65plus,series_complete_65pluspop_pct,series_complete_yes,series_complete_pop_pct,completeness_pct,month
0,2021-03-26,1001,Alabama,AL,Autauga,4638.0,10.8,2491.0,27.9,4639.0,8.3,93.5,3
1,2021-03-26,1003,Alabama,AL,Baldwin,29522.0,16.8,19428.0,41.5,29532.0,13.2,93.5,3
2,2021-03-26,1005,Alabama,AL,Barbour,2323.0,11.8,1583.0,32.6,2323.0,9.4,93.5,3
3,2021-03-26,1007,Alabama,AL,Bibb,1994.0,11.2,1246.0,33.4,1995.0,8.9,93.5,3
4,2021-03-26,1009,Alabama,AL,Blount,4133.0,9.3,2618.0,24.2,4135.0,7.2,93.5,3


### Clean up headers, dates and extra spaces around strings

In [8]:
df.columns = df.columns.str.lower()

In [9]:
df_obj = df.select_dtypes(["object"])
df[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())

In [10]:
df["date"] = pd.to_datetime(df["date"])
df["month"] = df["date"].dt.month
df["month"] = df["month"].astype(str)

In [11]:
df.dtypes

date                             datetime64[ns]
fips                                     object
statename                                object
stateabbr                                object
county                                   object
series_complete_18plus                  float64
series_complete_18pluspop_pct           float64
series_complete_65plus                  float64
series_complete_65pluspop_pct           float64
series_complete_yes                     float64
series_complete_pop_pct                 float64
completeness_pct                        float64
month                                    object
dtype: object

### Filter the data frame to just a specific state

In [12]:
ca = df[df["stateabbr"] == "CA"]

In [13]:
ca[["date", "fips", "county", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,series_complete_pop_pct
206,2021-03-26,6041,Marin,23.7
213,2021-03-26,6055,Napa,20.6
208,2021-03-26,6045,Mendocino,20.3
234,2021-03-26,6097,Sonoma,19.9
226,2021-03-26,6081,San Mateo,18.8
240,2021-03-26,6109,Tuolumne,18.7
192,2021-03-26,6013,Contra Costa,18.5
223,2021-03-26,6075,San Francisco,18.3
229,2021-03-26,6087,Santa Cruz,18.2
216,2021-03-26,6061,Placer,17.9


### Which counties in the U.S. have the highest vaccination rates?

In [14]:
df[["date", "fips", "county", "statename", "series_complete_pop_pct"]].sort_values(
    "series_complete_pop_pct", ascending=False
).head(10)

Unnamed: 0,date,fips,county,statename,series_complete_pop_pct
412,2021-03-26,13053,Chattahoochee,Georgia,67.0
94,2021-03-26,2282,Yakutat,Alaska,48.0
90,2021-03-26,2230,Skagway,Alaska,46.8
87,2021-03-26,2195,Petersburg,Alaska,45.2
89,2021-03-26,2220,Sitka,Alaska,44.3
75,2021-03-26,2100,Haines,Alaska,42.7
76,2021-03-26,2105,Hoonah-Angoon,Alaska,41.1
84,2021-03-26,2180,Nome,Alaska,40.6
95,2021-03-26,2290,Yukon-Koyukuk,Alaska,39.3
1600,2021-03-26,30005,Blaine,Montana,38.9


---

### Export to csv

In [15]:
df.to_csv("output/vaccinations_by_county.csv", index=False)