In [None]:
"""Calculate the percentage of people with an additional vaccine dose out of the total eligible population.

Eligible = a second Pfizer/Moderna dose at least 5 months ago, or a J&J dose at least 2 months ago

To see more on booster eligibility as of 2/3/2022: 
    https://www.cdc.gov/coronavirus/2019-ncov/vaccines/booster-shot.html


For any further/future use, replace dataframe slicing with some kind of Pandas rolling/groupby function.
"""

import pandas as pd
from datetime import timedelta

# for displaying graphs in notebook
pd.set_option('plotting.backend', 'plotly')
import plotly.offline as py
py.init_notebook_mode(connected=True)


In [None]:
fetched = pd.read_csv("https://data.cdc.gov/api/views/unsk-b7fc/rows.csv?accessType=DOWNLOAD")

In [None]:
# Format/rename columns 

cols = {"Location":"state", "Date": "dt", "Administered_Dose1_Recip":"initiated", "Series_Complete_12Plus":"completed_12_plus", "Administered_Janssen": "jj_series", "Additional_Doses": "additional_dose"}
data = (
    fetched.loc[:, cols.keys()]
    .rename(columns=cols)
    .assign(dt=lambda row: pd.to_datetime(row["dt"]))
)

## Current Date

In [None]:
# Find first day of eligibility (5 months ago for Pfizer/Moderna, 2 months for J&J)
# Assume all months are 30 days for now

window = (pd.Timestamp.today() - timedelta(days=5*30)).floor('d')
window_jj = (pd.Timestamp.today() - timedelta(days=2*30)).floor('d')

In [None]:
# Find eligible for both 2 dose vaccines and J&J, then combine
# Eligible = a second Pfizer/Moderna dose at least 5 months ago, or a J&J dose at least 2 months ago

eligible = data[data["dt"] == window].set_index("state")["completed_12_plus"]
eligible_jj = data[data["dt"] == window_jj].set_index("state")["jj_series"]  # all JJ series are >= 18 years

eligible = eligible + eligible_jj
eligible.name = "eligible"

In [None]:
# Cumulative number of people boosted as of the current date

current_additional = data[data["dt"] == data["dt"].max()].set_index("state")[["additional_dose", "dt"]]

In [None]:
# Combine eligible and current boosted data and calculate %

combined = current_additional.join(eligible)
combined["percent_up_to_date"] = combined["additional_dose"] / combined["eligible"]
# combined.sort_values("percent_up_to_date", ascending=False)

## Timeseries

In [None]:
## This copies the steps above to create a makeshift timeseries.
## Rework this to use a Pandas groupby/rolling function instead of slicing the dataframe like this. 

dfs = []
for date in pd.date_range(start='11/01/2021', end='2/02/2022'):
    # Find latest days of eligibility
    window = (date - timedelta(days=5*30)).floor('d')
    window_jj = (date - timedelta(days=2*30)).floor('d')
    
    # calculate eligible people
    eligible = data[data["dt"] == window].set_index(["state"])["completed_12_plus"]
    eligible_jj = data[data["dt"] == window_jj].set_index("state")["jj_series"]  # all JJ series are >= 18 years
    eligible = eligible + eligible_jj
    eligible.name = "eligible"
    
    # Calculate % of eligible boosted
    current_additional = data[data["dt"] == date].set_index("state")[["additional_dose", "dt"]]
    combined = current_additional.join(eligible)
    combined["percent_up_to_date"] = combined["additional_dose"] / combined["eligible"]
    
    # add this date to the list of dataframes
    dfs.append(combined)
    
# combine all the output date dataframes
out = pd.concat(dfs)
out = out.reset_index()

In [None]:
out.loc[out["state"] == "CA"].plot(x="dt", y=["percent_up_to_date"])

In [None]:
out