In [1]:
import pandas as pd
import datetime as dt
from datetime import date
import numpy as np

In [2]:
# read COVID data
us = pd.read_csv('data/us_total.csv') # read data

us['2_wk_deaths'] = us.groupby('Province_State')['new_deaths'].rolling(14).mean().reset_index(0, drop=True) #calculate 2 week moving average
us = us[['Province_State', 'Date', 'new_deaths_avg', 'Population', '2_wk_deaths']] # select state, date, population, daily deaths (7 and 14 day rolling avg)

# read oxford data
oxford = pd.read_csv('data/oxford.csv')

#merge dataframes
df = pd.merge(us, oxford, left_on = ('Province_State', 'Date'), right_on = ('RegionName', 'Date'), left_index=False, right_index=False)
df['Population'] = df['Population'].str.replace(',', '').astype(float) # convert string to float

#normalize death data for population
df['deaths_per_100k'] = df['new_deaths_avg']/df['Population']*100000 # new deaths per 100k
df['2_wk_deaths_per_100k'] = df['2_wk_deaths']/df['Population']*100000 # new deaths per 100k

df.tail()

Unnamed: 0.1,Province_State,Date,new_deaths_avg,Population,2_wk_deaths,Unnamed: 0,RegionName,C1_School closing,C1_Flag,C2_Workplace closing,...,H6_Facial Coverings,H6_Flag,M1_Wildcard,StringencyIndex,StringencyLegacyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,deaths_per_100k,2_wk_deaths_per_100k
14173,Wyoming,2020-10-21,0.571429,578759.0,0.571429,16194,Wyoming,1.0,1.0,1.0,...,2.0,1.0,,40.74,49.52,45.24,48.61,25.0,0.098733,0.098733
14174,Wyoming,2020-10-22,1.571429,578759.0,1.0,16195,Wyoming,1.0,1.0,1.0,...,2.0,1.0,,40.74,49.52,45.24,48.61,25.0,0.271517,0.172783
14175,Wyoming,2020-10-23,1.571429,578759.0,1.0,16196,Wyoming,,,,...,,,,,,,,,0.271517,0.172783
14176,Wyoming,2020-10-24,1.571429,578759.0,1.0,16197,Wyoming,,,,...,,,,,,,,,0.271517,0.172783
14177,Wyoming,2020-10-25,1.571429,578759.0,1.0,16198,Wyoming,,,,...,,,,,,,,,0.271517,0.172783


In [3]:
tmp = pd.DataFrame() # create temporary df
tmp['Province_State'] = df['Province_State'].unique() # create column with state names

tmp = pd.merge(tmp, df.loc[df.groupby('Province_State')['new_deaths_avg'].idxmax()][['Province_State', 'Date']], how = 'inner', left_on = 'Province_State', right_on = 'Province_State') # add column with date of peak deaths
tmp.rename(columns={'Date': 'Peak_Date'}, inplace=True) # rename columns
tmp['days_since_peak'] = (pd.Timestamp.now().floor('d') - pd.to_datetime(tmp['Peak_Date'])).astype('timedelta64[D]') # calculate days since peak death
tmp = pd.merge(tmp, df[df['Date'] == '2020-10-25'][['Province_State', '2_wk_deaths_per_100k']], how = 'inner', left_on = 'Province_State', right_on = 'Province_State')
#replace date with 'today.strftime("%Y-%m-%d")' for live code
tmp = pd.merge(tmp, df.groupby(['Province_State']).last()['ContainmentHealthIndex'], how = 'left', left_on = 'Province_State', right_on = 'Province_State')
#need to include a check to make sure the latest value is recent enough
tmp

Unnamed: 0,Province_State,Peak_Date,days_since_peak,2_wk_deaths_per_100k,ContainmentHealthIndex
0,Alabama,2020-07-27,104.0,0.294269,45.14
1,Alaska,2020-09-27,42.0,0.078113,56.6
2,Arizona,2020-07-21,110.0,0.112853,40.62
3,Arkansas,2020-09-15,54.0,0.575158,46.53
4,California,2020-08-10,90.0,0.140824,64.93
5,Colorado,2020-04-24,198.0,0.136439,51.74
6,Connecticut,2020-04-26,196.0,0.094162,65.28
7,Delaware,2020-06-23,138.0,0.198053,55.56
8,Florida,2020-08-05,95.0,0.354187,30.21
9,Georgia,2020-08-13,87.0,0.26439,48.26


In [4]:
# create conditions list
conditions = [
    (tmp['days_since_peak'] >= 90) & (tmp['2_wk_deaths_per_100k'] < 0.02) & (tmp['ContainmentHealthIndex'] >= 80), # elimination
    (tmp['days_since_peak'] >= 60) & (tmp['2_wk_deaths_per_100k'] < 0.05) & (tmp['ContainmentHealthIndex'] >= 60), # suppression
    (tmp['days_since_peak'] >= 30) & (tmp['2_wk_deaths_per_100k'] < 0.4) & (tmp['ContainmentHealthIndex'] >= 50) # mitigation
]

# assign values based on conditions
values = ['Elimination', 'Suppression', 'Mitigation']

# label each state
tmp['Category'] = np.select(conditions, values, default = 'Uncontrolled Spread')
tmp.sort_values(by = 'Category')
tmp

Unnamed: 0,Province_State,Peak_Date,days_since_peak,2_wk_deaths_per_100k,ContainmentHealthIndex,Category
0,Alabama,2020-07-27,104.0,0.294269,45.14,Uncontrolled Spread
1,Alaska,2020-09-27,42.0,0.078113,56.6,Mitigation
2,Arizona,2020-07-21,110.0,0.112853,40.62,Uncontrolled Spread
3,Arkansas,2020-09-15,54.0,0.575158,46.53,Uncontrolled Spread
4,California,2020-08-10,90.0,0.140824,64.93,Mitigation
5,Colorado,2020-04-24,198.0,0.136439,51.74,Mitigation
6,Connecticut,2020-04-26,196.0,0.094162,65.28,Mitigation
7,Delaware,2020-06-23,138.0,0.198053,55.56,Mitigation
8,Florida,2020-08-05,95.0,0.354187,30.21,Uncontrolled Spread
9,Georgia,2020-08-13,87.0,0.26439,48.26,Uncontrolled Spread
