In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import requests
import pymc3 as pm
import pandas as pd
import numpy as np
import theano
import theano.tensor as tt

from matplotlib import pyplot as plt
from matplotlib import dates as mdates
from matplotlib import ticker

from datetime import date
from datetime import datetime

from IPython.display import clear_output

%config InlineBackend.figure_format = 'retina'

In [3]:
url = 'https://covidtracking.com/api/v1/states/daily.csv'
## New data source from API


In [5]:
states = pd.read_csv(url,
                     parse_dates=['date'],
                     index_col=['state', 'date']).sort_index()
states = states.drop(['MP', 'GU', 'AS', 'PR', 'VI'])

In [6]:
states.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,...,hospitalized,total,totalTestResults,posNeg,fips,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
state,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AK,2020-03-06,0.0,8.0,1.0,,,,,,,,...,,9,8,8,2,,,,,
AK,2020-03-07,0.0,12.0,2.0,,,,,,,,...,,14,12,12,2,0.0,0.0,4.0,0.0,4.0
AK,2020-03-08,0.0,14.0,6.0,,,,,,,,...,,20,14,14,2,0.0,0.0,2.0,0.0,2.0
AK,2020-03-09,0.0,23.0,9.0,,,,,,,,...,,32,23,23,2,0.0,0.0,9.0,0.0,9.0
AK,2020-03-10,0.0,23.0,9.0,,,,,,,,...,,32,23,23,2,0.0,0.0,0.0,0.0,0.0


In [7]:
states.columns

Index(['positive', 'negative', 'pending', 'hospitalizedCurrently',
       'hospitalizedCumulative', 'inIcuCurrently', 'inIcuCumulative',
       'onVentilatorCurrently', 'onVentilatorCumulative', 'recovered',
       'dataQualityGrade', 'lastUpdateEt', 'hash', 'dateChecked', 'death',
       'hospitalized', 'total', 'totalTestResults', 'posNeg', 'fips',
       'deathIncrease', 'hospitalizedIncrease', 'negativeIncrease',
       'positiveIncrease', 'totalTestResultsIncrease'],
      dtype='object')

In [None]:
## A lot more to work with 

In [8]:
# Errors in Covidtracking.com
states.loc[('WA','2020-04-21'), 'positive'] = 12512
states.loc[('WA','2020-04-22'), 'positive'] = 12753 
states.loc[('WA','2020-04-23'), 'positive'] = 12753 + 190

states.loc[('VA', '2020-04-22'), 'positive'] = 10266
states.loc[('VA', '2020-04-23'), 'positive'] = 10988

states.loc[('PA', '2020-04-22'), 'positive'] = 35684
states.loc[('PA', '2020-04-23'), 'positive'] = 37053

states.loc[('MA', '2020-04-20'), 'positive'] = 39643

states.loc[('CT', '2020-04-18'), 'positive'] = 17550
states.loc[('CT', '2020-04-19'), 'positive'] = 17962

states.loc[('HI', '2020-04-22'), 'positive'] = 586

states.loc[('RI', '2020-03-07'), 'positive'] = 3

In [9]:
## Integrity check - make sure that all the states have current data

In [10]:
today = datetime.combine(date.today(), datetime.min.time())

In [14]:
last_updated = states.reset_index('date').groupby('state')['date'].max()
is_current = last_updated < today


In [15]:
try:
    assert is_current.sum() == 0
except AssertionError:
    print("Not all states have updated")
    display(last_updated[is_current])

In [16]:
# Ensure all case diffs are greater than zero
for state, grp in states.groupby('state'):
    new_cases = grp.positive.diff().dropna()
    is_positive = new_cases.ge(0)
    
    try:
        assert is_positive.all()
    except AssertionError:
        print(f"Warning: {state} has date with negative case counts")
        display(new_cases[~is_positive])
        
# Let's make sure that states have added cases
idx = pd.IndexSlice
assert not states.loc[idx[:, '2020-04-22':'2020-04-23'], 'positive'].groupby('state').diff().dropna().eq(0).any()



state  date      
MA     2020-04-20   -177.0
Name: positive, dtype: float64



state  date      
MT     2020-05-05   -1.0
Name: positive, dtype: float64



state  date      
OH     2020-04-27   -264.0
Name: positive, dtype: float64

In [23]:
states.loc[idx['MA', :]]

Unnamed: 0_level_0,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,onVentilatorCumulative,recovered,...,hospitalized,total,totalTestResults,posNeg,fips,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-12,8.0,,,,,,,,,,...,,8,8,8,25,,,,,
2020-03-13,23.0,,,,,,,,,,...,,23,23,23,25,0.0,0.0,0.0,15.0,15.0
2020-03-14,38.0,,,,,,,,,,...,,38,38,38,25,0.0,0.0,0.0,15.0,15.0
2020-03-15,64.0,584.0,,,,,,,,,...,,648,648,648,25,0.0,0.0,584.0,26.0,610.0
2020-03-16,97.0,1199.0,,,,,,,,,...,,1296,1296,1296,25,0.0,0.0,615.0,33.0,648.0
2020-03-17,118.0,1633.0,,,,,,,,,...,,1751,1751,1751,25,0.0,0.0,434.0,21.0,455.0
2020-03-18,156.0,2115.0,,,,,,,,,...,,2271,2271,2271,25,2.0,0.0,482.0,38.0,520.0
2020-03-19,229.0,2918.0,,,,,,,,,...,,3147,3147,3147,25,1.0,0.0,803.0,73.0,876.0
2020-03-20,314.0,3795.0,,,,,,,,,...,,4109,4109,4109,25,2.0,0.0,877.0,85.0,962.0
2020-03-21,426.0,4799.0,,,61.0,,,,,,...,61.0,5225,5225,5225,25,2.0,61.0,1004.0,112.0,1116.0
