In [1]:
# This notebook looks at covid-19 data from the JHU website
# data can downloaded by running the following command
#
#     git clone https://github.com/CSSEGISandData/COVID-19.git
#
# Written by Tony Qian (5 April 2020)

In [2]:
# Triple comments '###' invite user action 
#      e.g. date range, states, smoothing intervals
#
# download pandas with 
#     pip3 install pandas
#
# paths assumes ipynb runs from directory that contains git repository
#     modify as necessary

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib notebook

In [4]:
# load data by location
path = 'COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/'
file = '04-04-2020.csv'

data = pd.read_csv(path+file)

In [5]:
# load data by time
path2 = 'COVID-19/csse_covid_19_data/csse_covid_19_time_series/'
file2 = 'time_series_covid19_confirmed_US.csv'

data2 = pd.read_csv(path2+file2) 

In [6]:
### Choose Date Range ###
start = '3/1/20'
end   = '4/4/20'

df0 = data2.groupby('Province_State').sum()  # combines county level data by State
df1 = df0.loc[:,start:end]                   # earliest date '1/22/20' 
df1

Unnamed: 0_level_0,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,...,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alabama,0,0,0,0,0,0,0,0,0,0,...,517,587,694,825,899,987,1060,1233,1495,1614
Alaska,0,0,0,0,0,0,0,0,0,0,...,56,58,85,102,114,119,132,143,157,171
American Samoa,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Arizona,1,1,1,1,1,2,4,4,4,6,...,508,665,773,919,1157,1289,1530,1715,1937,2187
Arkansas,0,0,0,0,0,0,0,0,0,0,...,335,381,409,426,473,523,584,643,704,743
California,12,21,25,35,51,59,81,95,101,144,...,3899,4657,5095,5852,7138,8210,9399,10773,12004,12837
Colorado,0,0,0,0,0,3,7,7,7,14,...,1430,1433,1740,2307,2311,2966,2982,3342,3742,4188
Connecticut,0,0,0,0,0,0,0,0,0,1,...,1012,1291,1524,1993,2571,3128,3557,3824,4914,5276
Delaware,0,0,0,0,0,0,0,0,0,0,...,130,163,214,232,264,319,368,393,450,593
Diamond Princess,0,0,0,0,0,0,0,0,0,0,...,49,49,49,49,49,49,49,49,49,49


In [29]:
### Choose any states ###
states = sorted(['New Jersey','New York', 'Florida', 'California'])

df2 = df1.loc[states,:]

In [43]:
df2.transpose().plot(style='.',figsize=(8,4))

plt.ylabel('count')
plt.title('Case Count')
plt.grid()

df2.transpose().plot(style='.',figsize=(8,4))

plt.ylabel('count')
plt.yscale('log')
plt.title('Case Count')
plt.grid()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [33]:
dates = df2.columns

arr = df2.to_numpy()
N = len(arr)

In [34]:
# configures x axis for plotting dates
def set_dates(dates,M):
    plt.gcf().autofmt_xdate()
    ax = plt.gca()

    ax.set_xticks(dates,minor=True)
    ax.set_xticks(dates[::M])

    # And a corresponding grid
    ax.grid(which='major')

In [35]:
# smooths (derivatives) by averages M points to the left and right (2M + 1 total)
def smooth(arr,n):
    
    N = len(arr)
    out = []
    for j in np.arange(N):
        if (j < n):
            out.append( arr[j] )
        elif (N - j < n):
            out.append( arr[j] )
        else:
            out.append( np.mean(arr[j-n:j+n]))    
            
    
    return np.array(out)

In [36]:
# compute first derivative, remove one date
first_deriv = np.transpose(arr[:,1:] - arr[:, :-1])
dates1 = dates[1:]

### set first derivative smoothing interval  
M = 2  
smooth_first_deriv = np.array([smooth(first_deriv[:,j], M) for j in range(N)])

In [45]:
plt.figure(figsize=(8,5))
for j in range(N):
    
    plt.plot(dates1, smooth_first_deriv[j],'C%i.--'%j, label=states[j])
    plt.plot(dates1, first_deriv[:,j],'C%i--'%j, lw='0.5')

plt.legend()
#plt.title('smooth first deriv')
plt.title('rate of cases')
plt.ylabel('new cases per day')
set_dates(dates1,5)


plt.figure(figsize=(8,5))
for j in range(N):
    
    plt.plot(dates1, smooth_first_deriv[j],'C%i.--'%j, label=states[j])
    #plt.plot(dates[1:], first_deriv[:,j],'C%i--'%j, lw='0.5')

plt.legend()
plt.yscale('log')
#plt.title('smooth first deriv')
plt.title('rate of cases')
plt.ylabel('new cases per day')
set_dates(dates1,5)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [38]:
# compute second derivative, remove (1+M) dates
arr2 = np.transpose(smooth_first_deriv)[:-M]
second_deriv = arr2[1:] - arr2[:-1]
dates2 = dates1[1:-M]

### set second derivative smoothing interval 
M2 = 3 
smooth_second_deriv = np.array([smooth(second_deriv[:,j], M2) for j in range(N)])

In [46]:
plt.figure(figsize=(8,5))

for j in range(N):
    
    plt.plot(dates2, second_deriv[:,j],'C%i--'%j, lw='0.5')
    plt.plot(dates2[:-M2], smooth_second_deriv[j,:-M2],'C%i.--'%j, label=states[j])

plt.legend()
#plt.title('smooth second deriv')
plt.title('rate of new cases')
plt.ylabel('change in new cases per day')

set_dates(dates2,5)

<IPython.core.display.Javascript object>

In [40]:
norm2 = smooth_second_deriv / (smooth_first_deriv[:,1:-M] + 1)

### set normalized second derivative smoothing interval 
M3 = 2 
smooth_norm2 = np.array([smooth(norm2[j], M3) for j in range(N)])

In [47]:
plt.figure(figsize=(8,5))
for j in range(N):
    #plt.plot(dates2[5:], norm2[j,5:], 'C%i--'%j, lw=0.5)
    plt.plot(dates2[5:], smooth_norm2[j,5:], 'C%i.--'%j, label=states[j])
    
#plt.title('normalized second deriv (smoothed)')
plt.title('percentage of new cases')
plt.ylabel('change in new cases / new cases')
    
plt.legend()

set_dates(dates2[5:],5)

<IPython.core.display.Javascript object>