## Purpose
State-by-state comparisons of numbers of new cases to assess progress toward [CDC's guidelines for reopening](https://www.cdc.gov/coronavirus/2019-ncov/downloads/php/CDC-Activities-Initiatives-for-COVID-19-Response.pdf).

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import scipy
import scipy.interpolate
from scipy.interpolate import CubicSpline
import matplotlib.figure as fg # figure control
import matplotlib.pyplot as plt # plotting
import matplotlib.ticker as plticker  #ticker control
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# View files available in my /kaggle/input
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df1 = pd.read_csv('/kaggle/input/covid19testing/tested_worldwide.csv', delimiter=',') 
df1.dataframeName = 'tested_worldwide.csv'
nRow, nCol = df1.shape
#print(f'There are {nRow} rows and {nCol} columns')
df1.columns

These are the states/territories included in the US data:

In [None]:
all_states = np.sort(df1[df1.Country_Region=='United States'].Province_State.unique())
raw_states = np.delete(all_states,[2,3,13,38,43,51])

short_states = {'Alabama':'AL','Alaska':'AK','Arizona':'AZ','Arkansas':'AR','California':'CA','Colorado':'CO','Connecticut':'CT','Delaware':'DE','District of Columbia':'DC','Florida':'FL','Georgia':'GA','Hawaii':'HI','Idaho':'ID','Illinois':'IL','Indiana':'IN','Iowa':'IA','Kansas':'KS','Kentucky':'KY','Louisiana':'LA','Maine':'ME','Maryland':'MD','Massachusetts':'MA','Michigan':'MI','Minnesota':'MN','Mississippi':'MS','Missouri':'MO','Montana':'MT','Nebraska':'NE','Nevada':'NV','New Hampshire':'NH','New Jersey':'NJ','New Mexico':'NM','New York':'NY','North Carolina':'NC','North Dakota':'ND','Ohio':'OH','Oklahoma':'OK','Oregon':'OR','Pennsylvania':'PA','Rhode Island':'RI','South Carolina':'SC','South Dakota':'SD','Tennessee':'TN','Texas':'TX','Utah':'UT','Vermont':'VT','Virginia':'VA','Washington':'WA','West Virginia':'WV','Wisconsin':'WI','Wyoming':'WY'}
short_states_t = tuple(item for item in short_states.values())
#short_states[raw_states[0]]
raw_states

## Raw Positive Tests
We'll start by examining the raw data -- number of daily positive tests being conducted in each state. You'll see dips and spikes that are mostly driven by changes in testing capacity or reporting methodology.

In [None]:
fig, axs = plt.subplots(9,6,gridspec_kw={'hspace': 0.4, 'wspace': 0.3},figsize=(20, 20))

for i, ax in enumerate(fig.axes):
    if i>50:
        ax.set_visible(False)
    else:
        state = df1[df1.Province_State==raw_states[i]]
        #ax.plot(state.Date, state.daily_tested,'-')
        ax.plot(state.Date, state.daily_positive,'-')
        ax.set_title(raw_states[i])
        ax.get_xaxis().set_visible(False)
        if i!=0:
            ax.get_xaxis().set_visible(False)
#    else:
#        for tick in ax.get_xticklabels():
#            tick.set_rotation(90)
#        loc = plticker.MultipleLocator(base=10.0)
#        ax.xaxis.set_major_locator(loc)

fig.suptitle('Daily Total Tests and Positive Tests By State')

plt.show()

## Rolling Test-Positivity Rates
Next calculate daily test-positivity rates, and average test-positivity rates over a 7-day window.

In [None]:
us = df1[df1.Country_Region=='United States']
us=us.assign(daily_rate=(us.daily_positive/us.daily_tested)*100)
us.daily_rate.replace(us[us.daily_rate<0].daily_rate, 0, inplace=True)
us['daily_rate'].clip(lower=0)
us.daily_rate.fillna(0, inplace=True)
#us.reset_index(drop=True, inplace=True)
state = us[us.Province_State=='Alaska']

In [None]:
# Add population data for per capita testing
popdata = pd.read_csv('/kaggle/input/nstest2019alldata/nst-est2019-alldata.csv', delimiter=',')
#cols = popdata.columns
#popdata[['NAME','POPESTIMATE2019']]
pop = popdata[popdata.NAME=='Alabama'].POPESTIMATE2019.values[0] #4903185

daily_tested_rolling = state.daily_tested.rolling(window=7).mean()
daily_tested_rolling.head(10) #28, 40, 56

pop = pd.Series(popdata[popdata.NAME=='Alabama'].POPESTIMATE2019.values[0],index=daily_tested_rolling.index)
tests_per_capita = daily_tested_rolling.divide(pop)*100000
tests_per_capita.tail(10)

#int(daily_tested_rolling[-1:].values[0])
#state.daily_tested.index
#print(f'There are {nRow} rows and {nCol} columns')

In [None]:
fig, axs = plt.subplots(9,6,gridspec_kw={'hspace': 0.4, 'wspace': 0.3},figsize=(20, 20),sharey=True)

for i, ax in enumerate(fig.axes):
    # Hide blank plots since 51 doesn't divide evenly into full rows
    if i>50:
        ax.set_visible(False)
    else:
        state = us[us.Province_State==raw_states[i]]
        ax.plot(state.Date, state.daily_rate,'-')
        ax.set_title(raw_states[i])
        ax.get_xaxis().set_visible(False)
        if i!=0:
            ax.get_xaxis().set_visible(False)
#    else:
#        for tick in ax.get_xticklabels():
#            tick.set_rotation(90)
#        loc = plticker.MultipleLocator(base=10.0)
#        ax.xaxis.set_major_locator(loc)

fig.suptitle('Daily Test-Positive Rate By State')

plt.show()

# Unpacking the "Downward Trajectory" Criterion for New Positive Cases (Select States)
Let's examine the data from a few states to illustrate the criteria. Based on [page 28 paragraph 4](https://www.cdc.gov/coronavirus/2019-ncov/downloads/php/CDC-Activities-Initiatives-for-COVID-19-Response.pdf), "downward trajectory" for new positive cases means that given 3-day rolling average with cubic spline smoothing applied (I haven't done this):
1. today's number must be smaller than 14 days ago, and
2. within the last 14 days, you can't have any 5 consecutive days with increasing numbers of cases (positive difference in new cases).

In [None]:
# Create DataFrame containing MD, DC, VA
# Select all three states, then groupby
mdv=df1[df1.Province_State=='Maryland'].dropna(subset=['daily_tested'])
mdv = mdv.append(df1[df1.Province_State=='District of Columbia'].dropna(subset=['daily_tested']))
mdv = mdv.append(df1[df1.Province_State=='Virginia'].dropna(subset=['daily_tested']))
mdv = mdv.append(df1[df1.Province_State=='Utah'].dropna(subset=['daily_tested']))
mdv = mdv.append(df1[df1.Province_State=='Arkansas'].dropna(subset=['daily_tested']))

# Calculate daily diff in smoothed new positive cases
mdv=mdv.assign(daily_positive_rolling=mdv['daily_positive'].rolling(3).mean())
mdv=mdv.assign(daily_positive_diff = mdv.groupby('Province_State')['daily_positive_rolling'].diff())

# Calculate daily test positivity rate (rolling????? to assess "downward trajectory")
mdv=mdv.assign(daily_rate=mdv.daily_positive/mdv.daily_tested*100)
mdv.daily_rate.replace(np.inf, 0, inplace=True)
mdv.daily_rate.fillna(0, inplace=True)
mdv.reset_index(drop=True, inplace=True)
mdv=mdv.assign(daily_rate_rolling=mdv.daily_rate.rolling(3).mean())
# Calculate difference in daily test positivity rate
mdv=mdv.assign(daily_rate_diff = mdv.groupby('Province_State')['daily_rate_rolling'].diff())

# Shorten date representation
mdv.Date = mdv.Date.str.replace('2020-','')

mdv_grouped = mdv.groupby('Province_State')
d = mdv.loc[mdv_grouped.groups['District of Columbia']][70:]
#daily_pos_rolling_spline = scipy.interpolate.CubicSpline(d.Date[t0:].index.values,d.daily_positive_rolling[t0:],bc_type='natural')
#daily_pos.plot(d.Date[t0:],daily_pos_rolling_spline.__call__([d.Date[t0:].index.values]))
mdv#.daily_positive_rolling[t0:]

In [None]:
#daily_pos_rolling_spline.__call__([d.Date[t0:].index.values])

In [None]:
state_plots = ('AR', 'DC', 'UT')
fig, state_plots = plt.subplots(3, 4, sharey='col',gridspec_kw={'hspace': 0.3, 'wspace': 0.3},figsize=(20, 10))
short_states = ['AR','DC','UT']
states = ['Arkansas','District of Columbia','Utah']
loc = plticker.MultipleLocator(base=3.0) 

#state_plots = ('MD', 'DC', 'VA')
#fig, state_plots = plt.subplots(3, 4, sharey='col',gridspec_kw={'hspace': 0.3, 'wspace': 0.3},figsize=(20, 10))
#short_states = ['MD','DC','VA']
#states = ['Maryland','District of Columbia','Virginia']
#loc = plticker.MultipleLocator(base=15.0) 

# Drop anomalous data from earlier days
#t0 = 28
t0=-14

for p in range(0,len(states)):
    state = mdv.loc[mdv_grouped.groups[states[p]]]
    
    daily_pos = state_plots[p][0]
    #daily_raw.set_ylim(-1000,10000)
    daily_pos.set_title('Daily Total Positive Tests ('+short_states[p]+')')
    daily_pos.plot(state.Date[t0:],state.daily_positive_rolling[t0:])
    #daily_pos_rolling_spline = scipy.interpolate.CubicSpline(state.Date[t0:].index.values,state.daily_positive_rolling[t0:],bc_type='natural')
    #daily_pos.plot(state.Date[t0:],daily_pos_rolling_spline.__call__([state.Date[t0:].index.values]))
    daily_pos.annotate(int(state.daily_positive_rolling.iloc[t0]),xy=(state.Date.iloc[t0],state.daily_positive_rolling.iloc[t0]+6))
    daily_pos.annotate(int(state.daily_positive_rolling.iloc[-1]),xy=(state.Date.iloc[-1],state.daily_positive_rolling.iloc[-1]+6))
    daily_pos.xaxis.set_major_locator(loc)
    
    daily_pos_diff = state_plots[p][1]
    daily_pos_diff.set_title('Daily Positive Diff')
    #daily_pos_diff.set_ylim(-400,400)
    daily_pos_diff.axhline()
    daily_pos_diff.plot(state.Date[t0:],state.daily_positive_diff[t0:],'+-')    
    daily_pos_diff.xaxis.set_major_locator(loc)
    
    daily_rate = state_plots[p][2]
    daily_rate.axhline(y=5,linestyle='--')
    daily_rate.set_title('Daily Positive Test Rate ('+short_states[p]+')')
    daily_rate.plot(state.Date[t0:],state.daily_rate_rolling[t0:])
    daily_rate.xaxis.set_major_locator(loc)
    
    daily_rate_diff = state_plots[p][3]
    daily_rate_diff.axhline()
    daily_rate_diff.set_ylim(-30,30)
    daily_rate_diff.set_title('Difference in Daily Positive Test Rate ('+short_states[p]+')')
    daily_rate_diff.plot(state.Date[t0:],state.daily_rate_diff[t0:],'+-')
    daily_rate_diff.xaxis.set_major_locator(loc)

plt.show()

# All States' Downward Trajectory for New Positive Cases
Let's look at all states, just the positive tests and daily positive diff:

In [None]:
#df1 is all states
us = df1[df1.Country_Region=='United States'].dropna(subset=['daily_tested'])

# Calculate daily diff in smoothed new positive cases
daily_rolling_grouped = us.groupby('Province_State').rolling(3)['daily_positive'].mean().reset_index().set_index('level_1').rename(columns={'daily_positive':'daily_positive_rolling'}).drop(columns='Province_State')
daily_rolling_grouped = daily_rolling_grouped.fillna(0.0).astype(int)
us = us.join(daily_rolling_grouped)

#us=us.assign(daily_positive_rolling=us_grouped.rolling(3).mean())
us = us.assign(daily_positive_diff = us.groupby('Province_State')['daily_positive_rolling'].diff())

# Calculate daily test positivity rate (rolling????? to assess "downward trajectory")
us=us.assign(daily_rate=us.daily_positive*100/us.daily_tested)
us.daily_rate.replace(np.inf, 0, inplace=True)
us.daily_rate.fillna(0, inplace=True)
us.reset_index(drop=True, inplace=True)
us=us.assign(daily_rate_rolling=100*us.daily_positive.rolling(3).sum()/us.daily_tested.rolling(3).sum())
# Calculate difference in daily test positivity rate
us=us.assign(daily_rate_diff = us.groupby('Province_State')['daily_rate_rolling'].diff())

# Calculate Daily Deaths
us=us.assign(daily_deaths = us.groupby('Province_State')['death'].diff())

# Shorten date representation
us.Date = us.Date.str.replace('2020-','')

us_grouped = us.groupby('Province_State')
#us.loc[us_grouped.groups['Arkansas']]
us.loc[us_grouped.groups['Maryland']]

In [None]:
len(raw_states)

In [None]:
state_plots = short_states_t
#fig, state_plots = plt.subplots(51,2, sharey='col',gridspec_kw={'hspace': 0.3, 'wspace': 0.3},figsize=(10, 100))
fig, state_plots = plt.subplots(51,4, gridspec_kw={'hspace': 0.3, 'wspace': 0.3},figsize=(20, 100))
loc = plticker.MultipleLocator(base=5.0) 

# Last 14 days
t0=-14
# Long t00
t00=28

for p in range(0,len(raw_states)):
    state = us.loc[us_grouped.groups[raw_states[p]]]
    
    daily_pos_long = state_plots[p][0]
    daily_pos_long.set_title('Daily Positive Tests since April('+raw_states[p]+')')
    daily_pos_long.plot(state.Date[t00:],state.daily_positive_rolling[t00:])
    daily_pos_long.axis('off')
    
    daily_pos = state_plots[p][1]
    daily_pos.set_title('Daily Positive Tests (Last 14 Days)')
    # Apply cubic spline to daily_positive_rolling
    #daily_pos_rolling_spline = scipy.interpolate.CubicSpline(state.Date[t0:],state.daily_positive_rolling[t0:],bc_type='natural')
    #daily_pos.plot(state.Date[t0:],daily_pos_rolling_spline)
    daily_pos.plot(state.Date[t0:],state.daily_positive_rolling[t0:])
    today = state.daily_positive_rolling.iloc[-1]
    start = state.daily_positive_rolling.iloc[t0]
    if today<start:
        daily_pos.plot([state.Date.iloc[t0],state.Date.iloc[-1]],[start,today],c='purple')
    daily_pos.annotate(int(state.daily_positive_rolling.iloc[t0]),xy=(state.Date.iloc[t0],state.daily_positive_rolling.iloc[t0]+5))
    daily_pos.annotate(int(state.daily_positive_rolling.iloc[-1]),xy=(state.Date.iloc[-1],state.daily_positive_rolling.iloc[-1]+5))
    #daily_pos.xaxis.set_major_locator(loc)
    daily_pos.axis('off')
    
    #daily_pos_diff = state_plots[p][2]
    #daily_pos_diff.set_title('Daily Positive Change, Last 14 Days')
    #daily_pos_diff.axhline()
    #daily_pos_diff.plot(state.Date[t0:],state.daily_positive_diff[t0:],'+-')
    #daily_pos_diff.axis('off')
    
    daily_rate = state_plots[p][2]
    daily_rate.set_ylim(0,20)
    #daily_rate.axhline(y=20,linestyle='--',c='purple')
    daily_rate.axhline(y=5,linestyle='--',c='purple')
    daily_rate.set_title('Daily Positive Test Rate (Last 14 Days)')
    daily_rate.plot(state.Date[t0:],state.daily_rate_rolling[t0:],'+-')
    #daily_rate.annotate('5%',xy=(state.Date.iloc[t0],10))
    #daily_rate.annotate('20%',xy=(state.Date.iloc[t0],25))
    daily_rate.axis('off')
    
    daily_deaths = state_plots[p][3]
    daily_deaths.set_title('Daily Number of Deaths (Last 14 Days)')
    daily_deaths.plot(state.Date[t0:],state.daily_deaths[t0:])
    today = state.daily_deaths.iloc[-1]
    start = state.daily_deaths.iloc[t0]
    if today<start:
        daily_deaths.plot([state.Date.iloc[t0],state.Date.iloc[-1]],[start,today],c='purple')
    daily_deaths.annotate(int(state.daily_deaths.iloc[t0]),xy=(state.Date.iloc[t0],state.daily_deaths.iloc[t0]))
    daily_deaths.annotate(int(state.daily_deaths.iloc[-1]),xy=(state.Date.iloc[-1],state.daily_deaths.iloc[-1]))
    daily_deaths.axis('off')


plt.show()