In [1]:
import os
import datetime
import random
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook, reset_output
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, Band
from bokeh.embed import components

import numpy as np

In [2]:
output_notebook()

In [3]:
import math
import scipy.optimize as optim
import pandas as pd
import statsmodels.api as sm


In [4]:
def output_graphs(script, div, text, 
                 home_page_dir = '/home/henry/projects/covid19/home_page/'):
    with open(os.path.join(home_page_dir, 'script'), 'w') as write_obj:
              write_obj.write(script)
    with open(os.path.join(home_page_dir, 'div'), 'w') as write_obj:
              write_obj.write(div)
    with open(os.path.join(home_page_dir, 'text.txt'), 'w') as write_obj:
              write_obj.write(text)

In [5]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [6]:
def sse(y, y_hat):
    final = []
    for i in range(len(y)):
        final.append((y[i] - y_hat[i])**2)
    return sum(final)

def sst(y):
    y_mean = np.mean(y)
    final = []
    for i in y:
        final.append((y_mean - i)**2)
    return sum(final)



In [7]:
def resample(l):
    final = []
    for i in range(len(l)):
        final.append(random.choice(l))
    return final
def get_ratios(x, y, num_iter = 100):
    zip_obj = list(zip(x, y))
    ratios = []
    all_lines = []
    for i in range(num_iter):
        new_ = resample(zip_obj)
        new_ = sorted(new_, key = lambda x: x[0])
        x_ = [x[0] for x in new_]
        y_ = [x[1] for x in new_]
        try:
            popt, pcov = optim.curve_fit(f = exp_func, xdata =np.array(x_), ydata = np.array(y_))
        except RuntimeError:
            continue
        y_hat = [exp_func(initial = popt[0], ratio = popt[1], x = x) for x in x]
        all_lines.append(y_hat)
        ratios.append(popt[1])
    points = zip(*all_lines)
    high = []
    low = []
    for i in points:
        high.append(np.percentile(i, 95))
        low.append(np.percentile(i, 5))
    return ratios, low, high



In [8]:
def make_trend_line(df, plot_width = 350, plot_height = 350, title = '', days_back = 14, window = 1):
    last_date = df['date'].tolist()[-1 * days_back]
    df_trend = df[(df['date']>=  last_date)]
    nums = df_trend['new_cases'].rolling(window).mean().tolist()
    nums = [x for x in nums if not math.isnan(x)]
    X = range(len(nums))
    #df_trend = df_trend.assign(X = X)
    labels = df_trend['date'].tolist()[window -1:]
    labels = [datetime.datetime(x.year, x.month, x.day) for x in labels]
    popt, pcov = optim.curve_fit(f = exp_func, xdata =np.array(X), ydata = np.array(nums) )
    y_hat = [exp_func(initial = popt[0], ratio = popt[1], x = x) for x in X]
    sse_ = sse(nums, y_hat)
    sst_ = sst(nums)
    r = 1 - sse_/sst_
    print('r is {r}'.format(r = r))
    ratios, y_hat_low, y_hat_high = get_ratios(x = X, y=nums, num_iter = 100)
    p_value = 1- len([x for x in ratios if x < 1])/len(ratios)
    lower_slope = np.percentile(ratios, 5)
    upper_slope = np.percentile(ratios, 95)
    #y_hat_low, y_hat_high, slopes = resample_line_exp(x= X, fitted = y_hat, y = nums)
    p = figure(x_axis_type = 'datetime', title = '{title} {p} ({p1}-{p2})'.format(
        title = title, p = round(popt[1],2), p1 = round(lower_slope,2), p2 = round(upper_slope,2)),
                 plot_width = plot_width , plot_height = plot_height, y_range = None)
    p.vbar(x=labels, top=nums, line_width = 5, width = .9)
    p.line(x = labels, y = y_hat)
    source = ColumnDataSource({'x':labels, 'upper': y_hat_high, 'lower':y_hat_low})
    band = Band(base='x', lower='lower', upper='upper', source=source, 
            level='underlay', fill_alpha=1.0, line_width=1, line_color='white', fill_color = '#ff9999')
    p.add_layout(band)
    return popt[0], popt[1], p_value, p


In [9]:
#reset_output()
output_notebook()
def do_wash(days_back = 14, window = 1):
    ps = []
    df = pd.read_csv('data/seven_day_county.csv')
    df['date'] = pd.to_datetime(df['date'])
    df_king = df[(df['state'] == 'Washington') & (df['county'] == 'King')]
    intercept, slope, p_value, p = make_trend_line(df_king, title ='King', days_back = days_back,
                                                  window = window)
    ps.append(p)
    df_state = pd.read_csv('data/states.csv')
    new_cases = df_state['cases']
    df_state =df_state.assign(new_cases = new_cases)
    df_state['date'] = pd.to_datetime(df_state['date'])
    df_wash = df_state[(df_state['state'] == 'Washington')]
    intercept, slope, p_value, p = make_trend_line(df_wash, title= 'Washington', 
          days_back = days_back, window = window)
    ps.append(p)
    df_non_king = pd.read_csv('data/non_king.csv')
    df_non_king['date'] = pd.to_datetime(df_non_king['date'])
    new_cases = df_non_king['cases']
    df_non_king =df_non_king.assign(new_cases = new_cases)
    intercept, slope, p_value, p = make_trend_line(df_non_king, title= 'Non King', 
       days_back = days_back, window = window)
    ps.append(p)
    grid = gridplot(ps, ncols = 3)
    return grid
grid = do_wash(days_back = 14, window = 1)
show(grid)

r is 0.4141446955816511
r is 0.44281752597601975
r is 0.3020675005096707


In [10]:
script, div = components(grid)
text = """(2020-11-08)\nWe are doubing our infections every 7 days."""
output_graphs(script, div, text = text)

In [11]:
def resample(l):
    final = []
    for i in range(len(l)):
        final.append(random.choice(l))
    return final
def repeat_resample(sample_a, sample_b, num_iter = 1000):
    difference_in_means = []#keep track of the difference in heights for each experiment
    for i in range(num_iter):
        resample_a = resample(sample_a)
        resample_b = resample(sample_b)
        difference = np.mean(resample_a) - np.mean(resample_b)
        difference_in_means.append(difference)
    return difference_in_means

In [12]:
def linear_vs_exp(df, plot_width = 350, plot_height = 350, title = '', days_back = 14):
    last_date = df['date'].tolist()[-1 * days_back]
    df_trend = df[(df['date']>=  last_date)]
    y = df_trend['new_cases'].rolling(1).mean().tolist()
    x = range(len(y))
    df_trend = df_trend.assign(x = x)
    popt, pcov = optim.curve_fit(f = exp_func, xdata =np.array(x), ydata = np.array(y) )
    y_hat = [exp_func(initial = popt[0], ratio = popt[1], x = x) for x in x]
    ssr_curve = [(y_hat[x] - y[x]) ** 2 for x in range(len(y_hat))]
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    y_hat2 = [x * result.params[1] + result.params[0] for x in x]
    print(result.params[1], result.params[0])
    ssr_lin = [(y_hat2[x] - y[x]) ** 2 for x in range(len(y_hat2))]
    print(sum(ssr_curve), sum(ssr_lin))
    print(round(result.pvalues[1],2))
    print(result.pvalues)
    resamps = repeat_resample(ssr_curve, ssr_lin, num_iter = 1000)
    p_value = 1 - len([x for x in resamps if x > 0])/len(resamps)
    print(p_value)
   
DF = pd.read_csv('data/seven_day_county.csv')
DF['date'] = pd.to_datetime(DF['date'])

df_king = DF[(DF['state'] == 'Washington') & (DF['county'] == 'King')]
linear_vs_exp(df_king, days_back = 28)


12.768199233716475 79.0221674876847
199047.04750512665 226280.51094690745
0.0
[2.96362130e-02 3.62723769e-06]
0.636


In [13]:
# doubling
x = np.power(1.11, 14)
#4.13 = 1.11^x
#log(4.13)  = log(1.11 ^x)
#log(4.13) = x log(1.11)
#x = log(4.13)/log(1.11)
print(x)
# 
math.log(4.31044)/math.log(1.11)
# so doubling is
math.log(2)/math.log(1.11)

4.3104409804844


6.641884618417903