# COVID-19 Growth By State (US)
> Growth and death rates of COVID-19 for the US by State.

- author: Avy Faingezicht, Rob Zinkov, Pratap Vardhan & Hamel Husain
- categories: [growth, US, states]
- image: images/covid-growth-states.png
- permalink: /growth-us-states/

In [1]:
#hide
populations = {
    # Taken from US Census Bureau NST-EST2019-01: Table 1. Annual Estimates of the Resident Population 
    # for the United States, Regions, States, and Puerto Rico: April 1, 2010 to July 1, 2019
    # https://www.census.gov/newsroom/press-kits/2019/national-state-estimates.html
    "AL": 4903185,
    "AK": 731545,
    "AZ": 7278717,
    "AR": 3017804,
    "CA": 39512223,
    "CO": 5758736,
    "CT": 3565287,
    "DE": 973764,
    "DC": 705749,
    "FL": 21477737,
    "GA": 10617423,
    "HI": 1415872,
    "ID": 1787065,
    "IL": 12671821,
    "IN": 6732219,
    "IA": 3155070,
    "KS": 2913314,
    "KY": 4467673,
    "LA": 4648794,
    "ME": 1344212,
    "MD": 6045680,
    "MA": 6892503,
    "MI": 9986857,
    "MN": 5639632,
    "MS": 2976149,
    "MO": 6137428,
    "MT": 1068778,
    "NE": 1934408,
    "NV": 3080156,
    "NH": 1359711,
    "NJ": 8882190,
    "NM": 2096829,
    "NY": 19453561,
    "NC": 10488084,
    "ND": 762062,
    "OH": 11689100,
    "OK": 3956971,
    "OR": 4217737,
    "PA": 12801989,
    "RI": 1059361,
    "SC": 5148714,
    "SD": 884659,
    "TN": 6829174,
    "TX": 28995881,
    "UT": 3205958,
    "VT": 623989,
    "VA": 8535519,
    "WA": 7614893,
    "WV": 1792147,
    "WI": 5822434,
    "WY": 578759,
    "PR": 3193694,
    "GU": 165_718,
    "VI": 104_914,
    "AS": 55_641,
    "MP": 55_194,
}

In [2]:
#hide
%matplotlib inline
import math
import requests
import pandas as pd
import numpy as np
import altair as alt
from IPython.display import HTML
import matplotlib.pyplot as plt

states_url = "https://covidtracking.com/api/states/daily"
us_url = "https://covidtracking.com/api/us/daily"
case_threshold = 100

r = requests.get(states_url)
states_df = pd.DataFrame(r.json())
states_df['date'] = pd.to_datetime(states_df.date, format="%Y%m%d")
states_df = states_df[['date', 'state', 'positive', 'death']].sort_values('date')
cols = {}

# Fix Hawaii
# https://www.hawaiinewsnow.com/2020/03/24/state-officials-report-first-coronavirus-related-death-hawaii/
states_df.loc[(states_df.date == '2020-03-24') & (states_df.state == 'HI'), 'death'] = 0
states_df.loc[(states_df.date == '2020-03-25') & (states_df.state == 'HI'), 'death'] = 0

for state in states_df.state.unique():
    cases = states_df[(states_df.state == state) & (states_df.positive > case_threshold)]
    cases = cases.reset_index().positive.reset_index(drop=True)
    if len(cases) > 1:
        cols[state] = cases
        
r = requests.get(us_url)
us_df = pd.DataFrame(r.json())
us_df['date'] = pd.to_datetime(us_df.date, format="%Y%m%d")
us_df = us_df[['date', 'positive', 'death']].sort_values('date')
us_df['state'] = 'US'
cols['US'] = us_df.positive.reset_index(drop=True)

us_df['state'] = 'US'
states_df = states_df.append(us_df, ignore_index=True)

states_df['population'] = states_df['state'].map(populations)
states_df['deaths_per_m'] = states_df.death / (states_df.population / 1_000_000)
states_df['cases_per_m'] = states_df.positive / (states_df.population / 1_000_000)


In [3]:
#hide
SINCE_CASES_NUM = 100
MIN_CASES = 200

st = states_df[states_df['positive'].ge(MIN_CASES)
                     ].sort_values(by='positive', ascending=False)
states = st['state'].values
df = states_df[states_df['state'].isin(states)].copy()

days_since = (df.assign(F=df['positive'].ge(SINCE_CASES_NUM))
              .set_index('date')
              .groupby('state')['F'].transform('idxmax'))

df['Days since 100 cases'] = (df['date'] - days_since.values).dt.days.values

days_since = (df.assign(F=df['death'].ge(1))
              .set_index('date')
              .groupby('state')['F'].transform('idxmax'))

df['Days since first death'] = (df['date'] - days_since.values).dt.days.values

df = df.rename(columns={"positive": "Confirmed Cases", "state": "State", "death": "Confirmed Deaths"})
dfc = df[df['Days since 100 cases'].ge(0)]
dfd = df[df['Days since first death'].ge(0)]

In [4]:
#hide

#calculate days since 10 cases per Million

ten_cases_per_mil = dfc.loc[lambda x: x.cases_per_m >= 10].groupby('State').agg({'date': min}).copy()
joined_date = dfc.merge(ten_cases_per_mil, how='left', on='State').copy()
joined_date.loc[:, 'Days since 10 cases per million'] = joined_date.apply(lambda x: (x.date_x - x.date_y).days, axis=1).copy()
dfc = joined_date.copy()
dfc.rename(columns={'date_x': 'date'}, inplace=True)

# calculate days since 1 death per million

one_death_per_mil = dfd.loc[lambda x: x.deaths_per_m >= 1].groupby('State').agg({'date': min}).copy()
joined_date = dfd.merge(one_death_per_mil, how='left', on='State').copy()
joined_date.loc[:, 'Days since 1 death per million'] = joined_date.apply(lambda x: (x.date_x - x.date_y).days, axis=1).copy()
dfd = joined_date.copy()
dfd.rename(columns={'date_x': 'date'}, inplace=True)

In [5]:
#hide
baseline_states = ['CA', 'WA', 'NY']
max_date = dfc['date'].max()
color_domain = list(states_df.state.unique())

def make_since_chart(highlight_states=[], baseline_states=baseline_states):
    selection = alt.selection_multi(fields=['State'], bind='legend', 
                                    init=[{'State': x} for x in highlight_states + baseline_states])

    base = alt.Chart(dfc, width=550).encode(
        x='Days since 100 cases:Q',
        y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log'), title="Confirmed Cases (log scale)"),
        color=alt.Color(
            'State:N',
            scale=alt.Scale(scheme="category20b"),
            legend=alt.Legend(columns=len(color_domain)//18+1, symbolLimit=len(color_domain))),
        tooltip=list(dfc),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dfc['Days since 100 cases'].max()
    ref = pd.DataFrame([[x, 100*1.33**x] for x in range(max_day+1)], columns=['Days since 100 cases', 'Confirmed Cases'])
    base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q')
    return (
        base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) +
        base_ref.transform_filter(
            alt.datum['Days since 100 cases'] >= max_day
        ).mark_text(dy=-6, align='right', fontSize=12, fontWeight='bold', text='33% Daily Growth') +
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, dx=-20, align='right', fontWeight='bold').encode(text='State:N')
    ).properties(
        title=f"Compare Cases For The {', '.join(highlight_states)} with {', '.join(baseline_states)}"
    ).configure_title(fontSize=18).configure_legend(labelFontSize=13, titleFontSize=15).configure_axis(
    labelFontSize=13,
    titleFontSize=15
)

_Comparisons with California (CA), Washington (WA) & New York (NY) are show by default._

> Tip: Click (Shift+ for multiple) on states in the legend to filter the visualizations below. Click outside the legend to highlight all states.


# Total Cases

In [6]:
#hide_input
make_since_chart(['US'])

In [7]:
#hide
def make_since_chart_percapita(highlight_states=[], baseline_states=baseline_states):
    selection = alt.selection_multi(fields=['State'], bind='legend', 
                                    init=[{'State': x} for x in highlight_states + baseline_states])

    base = alt.Chart(dfc.loc[lambda x: x['Days since 10 cases per million'] >= 0], width=550).encode(
        x='Days since 10 cases per million:Q',
        y=alt.Y('cases_per_m:Q', scale=alt.Scale(type='log'), title="Confirmed Cases Per Million (log scale)"),
        color=alt.Color(
            'State:N',
            scale=alt.Scale(scheme="category20b"),
            legend=alt.Legend(columns=len(color_domain)//18+1, symbolLimit=len(color_domain))),
        tooltip=list(dfc),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dfc['Days since 100 cases'].max()
    ref = pd.DataFrame([[x, 100*1.33**x] for x in range(max_day+1)], columns=['Days since 100 cases', 'Confirmed Cases'])
    base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q')
    return (
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='State:N')
    ).properties(
        title=f"Compare Cases For The {', '.join(highlight_states)} with {', '.join(baseline_states)} Per Capita"
    ).configure_title(fontSize=18).configure_legend(labelFontSize=13, titleFontSize=15).configure_axis(
    labelFontSize=13,
    titleFontSize=15
)

> Warning: The number of cases per capita can be heavily biased depending on a state's testing procedures and how widely the population is tested.  Read with caution.

In [8]:
#hide_input
make_since_chart_percapita(['US'])

# Total Deaths

In [9]:
#hide
baseline_states = ['CA', 'WA', 'NY']
max_date = dfd['date'].max()
color_domain = list(states_df.state.unique())

def make_death_chart(highlight_states=[], baseline_states=baseline_states):
    selection = alt.selection_multi(fields=['State'], bind='legend', 
                                    init=[{'State': x} for x in highlight_states + baseline_states])

    base = alt.Chart(dfd, width=550).encode(
        x='Days since first death:Q',
        y=alt.Y('Confirmed Deaths:Q', scale=alt.Scale(type='log'), title="Confirmed Deaths (log scale)"),
        color=alt.Color(
            'State:N',
            scale=alt.Scale(scheme="category20b"),
            legend=alt.Legend(columns=len(color_domain)//18+1, symbolLimit=len(color_domain))),
        tooltip=list(df),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dfd['Days since first death'].max()
    ref = pd.DataFrame([[x, 20*1.33**x] for x in range(max_day+1)], columns=['Days since first death', 'Confirmed Deaths'])
    base_ref = alt.Chart(ref).encode(x='Days since first death:Q', y='Confirmed Deaths:Q')
    return (
        base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) +
        base_ref.transform_filter(
            alt.datum['Days since first death'] >= max_day
        ).mark_text(dy=-6, dx=-100, align='left', fontSize=12, fontWeight='bold', text='33% Daily Growth') +
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='left', fontWeight='bold').encode(text='State:N')
    ).properties(
        title=f"Compare Deaths For The {', '.join(highlight_states)} with {', '.join(baseline_states)}"    
    ).configure_title(fontSize=18).configure_legend(labelFontSize=13, titleFontSize=15).configure_axis(
    labelFontSize=13,
    titleFontSize=15
)

In [10]:
#hide_input
make_death_chart(['US'])

In [11]:
#hide
baseline_states = ['CA', 'WA', 'NY']
max_date = dfd['date'].max()
color_domain = list(states_df.state.unique())

def make_death_chart_per_capita(highlight_states=[], baseline_states=baseline_states):
    selection = alt.selection_multi(fields=['State'], bind='legend', 
                                    init=[{'State': x} for x in highlight_states + baseline_states])

    base = alt.Chart(dfd.loc[lambda x: x.deaths_per_m >= 1], width=550).encode(
        x='Days since 1 death per million:Q',
        y=alt.Y('deaths_per_m:Q', scale=alt.Scale(type='log'), title="Confirmed Deaths Per Million (log scale)"),
        color=alt.Color(
            'State:N',
            scale=alt.Scale(scheme="category20b"),
            legend=alt.Legend(columns=len(color_domain)//18+1, symbolLimit=len(color_domain))),
        tooltip=list(df),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dfd['Days since first death'].max()
    ref = pd.DataFrame([[x, 20*1.33**x] for x in range(max_day+1)], columns=['Days since first death', 'Confirmed Deaths'])
    base_ref = alt.Chart(ref).encode(x='Days since first death:Q', y='Confirmed Deaths:Q')
    return (
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='State:N')
    ).properties(
        title=f"Compare Deaths For The {', '.join(highlight_states)} With {', '.join(baseline_states)} per Capita"    
    ).configure_title(fontSize=18).configure_legend(labelFontSize=13, titleFontSize=15).configure_axis(
    labelFontSize=13,
    titleFontSize=15
)

In [12]:
#hide_input
make_death_chart_per_capita(['US'])

This visualization was made by [Avy Faingezicht](https://twitter.com/avyfain), [Rob Zinkov](https://twitter.com/zaxtax) and [Pratap Vardhan](https://twitter.com/PratapVardhan)[^1].  Assistance provided by [Hamel Husain](https://twitter.com/HamelHusain).

[^1]:  Data sourced from ["The COVID Tracking Project"](https://covidtracking.com/).  Updated hourly by [GitHub Actions](https://github.com/features/actions).