In [1]:
# Specify names for dataset (input)

dataname = "us_md_montgomery.csv"

In [2]:
# Choose whether to plot cases, deaths, or both (as column titles)

columns = ["diagnosed", "killed"]

In [3]:
# Specify which model to use: "exp" for exponential, "log" for logistic

models = {"diagnosed": "log",
          "killed": "log"}

equations = {"exp": "$f(t) = a (1 + b)^t$\n$a = {0:.4f} \pm {2:.5f}$\n$b = {1:.4f} \pm {3:.6f}$", 
             "log": "$f(t) = c / (1 + \exp((b - t)/a))$\n$a = {0:.3f} \pm {3:.4f}$\n$b = {1:.2f} \mp {4:.4f}$\n$c = {2:.0f}. \pm {5:.2f}$"}

In [4]:
# Everything else is details

import chart_studio.plotly as py
from datetime import date, timedelta
import numpy as np
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from scipy.optimize import curve_fit
from scipy.stats import describe, chisquare, t
from time import strptime

init_notebook_mode(connected=True)

In [5]:
# Define the model equation and its Jacobian

def f_exp(t, a, b):
    # Exponential growth law, $f(t) = a * (1 + b) ^ t$,
    # where $a$ is the number of cases at $t=0$ and $b$ is the growth rate.
    return a * (1 + b) ** t

def df_exp(t, a, b):
    # Jacobian: df/dp for p=(a, b)
    return np.array([(1 + b) ** t,
 (a * t * (1 + b) ** t) / (1 + b)]).T

def f_log(t, a, b, c):
    # Logistic growth law, $f(t) = c / (exp((b - t)/a) + 1)$
    return c / (np.exp((b - t)/a) + 1)

def df_log(t, a, b, c):
    # Jacobian: df/dp for p=(a, b, c)
    return np.array([c*(b - t)*np.exp((b - t)/a)/(a**2*(np.exp((b - t)/a) + 1)**2),
 -c*np.exp((b - t)/a)/(a*(np.exp((b - t)/a) + 1)**2),
 1/(np.exp((b - t)/a) + 1)]).T

In [6]:
def sigfig(x, n):
    # Round a float, x, to n significant figures.
    # Source: https://github.com/corriander/python-sigfig
    n = int(n)

    e = np.floor(np.log10(np.abs(x)) - n + 1) # exponent, 10 ** e
    shifted_dp = x / (10 ** e) # decimal place shifted n d.p.
    return np.around(shifted_dp) * (10 ** e) # round and revert

In [7]:
# Data

covid = pd.read_csv(dataname)
ndays = len(covid)
key = "diagnosed"
first_fit = 10 # fitting with fewer than 10 data points is meaningless

start = strptime(covid["date"].iloc[0], "%Y-%m-%d")
first_day = date(start.tm_year, start.tm_mon, start.tm_mday)
start = first_day.toordinal()

allPeople = np.array(covid[key])
allDays   = np.zeros_like(allPeople)
for i in range(len(allPeople)):
    day = strptime(covid["date"].iloc[i], "%Y-%m-%d")
    allDays[i] = date(day.tm_year, day.tm_mon, day.tm_mday).toordinal() - start

points = go.Scatter(x=allDays, y=allPeople, mode="markers", name="$\mathrm{Diagnosed}$")

# Initialize figure

layout = go.Layout(title={"text": "COVID-19 in Montgomery County, Maryland, USA",
                          "x": 0.5},
                   xaxis=go.layout.XAxis(dict(range=[0,   1 + np.amax(allDays)],
                                              title="Days since 5 March 2020")),
                   yaxis=go.layout.YAxis(dict(range=[0, 500 + np.amax(allPeople)],
                                              title="Number of People")),
                   plot_bgcolor="rgb(234, 234, 242)",
                   showlegend=True)

fig = go.Figure(data=[points], layout=layout)

for dor in range(first_fit, ndays, 1):
    today = strptime(covid["date"].iloc[dor], "%Y-%m-%d")
    today = date(today.tm_year, today.tm_mon, today.tm_mday).toordinal()

    model = "exp" if dor < 20 else "log"
    f  = f_exp  if dor < 20 else f_log
    df = df_exp if dor < 20 else df_log

    t = allDays[:dor+1]
    y = allPeople[:dor+1]

    # Levenburg-Marquardt Least-Squares Fit
    """
    Note that sigma represents the relative error associated with each data point. By default,
    `curve_fit` will assume an array of ones (constant values imply no difference in error),
    which is probably incorrect: given sparsity of testing, there's considerable uncertainty,
    and the earlier numbers may be lower than the truth to a greater extent than the later 
    numbers. Quantifying this error by some non-trivial means for each datapoint would produce
    much more realistic uncertainty bands in the final plots.
    """
    p, pcov = curve_fit(f, t, y, sigma=None, method="lm", jac=df, maxfev=1000)
    perr = np.sqrt(np.diag(pcov))

    # Reduced chi-square goodness of fit
    ## https://en.wikipedia.org/wiki/Reduced_chi-squared_statistic

    ndof = len(y) - len(p) - 1
    chisq, chip = chisquare(y, f(t, *p))
    chi_sq_red = chisq / ndof

    # Confidence Band: dfdp represents the partial derivatives of the model
    # with respect to each parameter p (i.e., a and b)

    t_hat = np.linspace(0, allDays[-1], 50)
    y_hat = f(t_hat, *p)
    fig.add_trace(go.Scatter(x=t_hat, y=y_hat, fill=None, visible=False,
                             name="$\mathrm{Day}\ %2d,\  \\chi^2_{\\nu}=%5.2f$" % (dor, chi_sq_red)))    

    if (models[key] == "log"):
        perr[1] *= -1

    upr_p = p + perr
    lwr_p = p - perr

    if (models[key] == "log"):
        perr[1] *= -1

    t_hat = np.linspace(0, t[-1], 50)
    upper = f(t_hat, *upr_p)
    lower = f(t_hat, *lwr_p)
        
    fig.add_trace(go.Scatter(x=t_hat, y=upper, fill=None, name="$y+\sigma$", visible=False))
    fig.add_trace(go.Scatter(x=t_hat, y=lower, fill="tonexty", name="$y-\sigma$", visible=False))
    

# Create and add slider

steps = [{}] * 3 * ndays

for i in range(1, len(fig.data), 3):
    day_of_record = first_fit + int((i - 1)/3)
    date_of_record = first_day + timedelta(days=day_of_record)
    date_str = "{0:02d}/{1:02d}".format(date_of_record.month, date_of_record.day)
    steps[i] = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Curve fit to COVID-19 data in Montgomery County, MD: 2020/" + date_str}],
        label=date_str
    )
    steps[i]["args"][0]["visible"][0] = True   # Toggle data trace to "visible"
    steps[i]["args"][0]["visible"][i] = True
    steps[i]["args"][0]["visible"][i+1] = True # Toggle model trace to "visible"
    steps[i]["args"][0]["visible"][i+2] = True
    
slider = go.layout.Slider(dict(
    active=len(steps)-1,
    currentvalue={"prefix": "Simple regression on 2020/"},
    pad={"t": 50},
    steps=steps)
)

fig.update_layout(sliders=[slider])

iplot(fig, filename="COVID19-curve-your-county", show_link=False)