In [1]:
import os
import datetime
import random
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, Band
import numpy as np

In [2]:
output_notebook()

In [3]:
import math
import scipy.optimize as optim
import pandas as pd

In [4]:
def output_graphs(script, div, text, 
                 home_page_dir = '/home/henry/projects/covid19/home_page/'):
    with open(os.path.join(home_page_dir, 'script'), 'w') as write_obj:
              write_obj.write(script)
    with open(os.path.join(home_page_dir, 'div'), 'w') as write_obj:
              write_obj.write(div)
    with open(os.path.join(home_page_dir, 'text.txt'), 'w') as write_obj:
              write_obj.write(text)

In [8]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [5]:
def error_opt(X, y, y_hat, num_iter = 100):
    fits = []
    inter = []
    for i in range(num_iter):
        d = diff(y, y_hat)
        diffs = []
        for i in range(len(y)):
            diffs.append(y[i] - y_hat[i])
        random.shuffle(diffs)
        y_new = []
        for i in range(len(y)):
            y_new.append(y[i] + diffs[i])
        popt, pcov = optim.curve_fit(f = exp_func, xdata =np.array(X), ydata = np.array(y_new) )
        fits.append(popt[1])
        inter.append(popt[0])
    y_hat_low = [exp_func(initial = np.percentile(inter, 5), ratio = np.percentile(fits, 5), x = x) for x in X]
    y_hat_high = [exp_func(initial = np.percentile(inter, 95), ratio = np.percentile(fits, 95), x = x) for x in X]
    return y_hat_low, y_hat_high

In [14]:
def resample_two_samples(sample1, sample2, num_iterations = 100):
    both = sample1 + sample2
    sum1 = []
    sum2 = []
    for i in range(num_iterations):
        random.shuffle(both) 
        new_1 = both[0:len(sample1)] 
        new_2 = both[len(sample1):] 
        sum1.append(np.mean(new_1)) 
        sum2.append(np.mean(new_2)) 
    return sum1, sum2

def combine_resamples(sample1, sample2, resample1, resample2):
    diff1 = [np.mean(sample1 )- x for x in resample1]
    diff2 = [np.mean(sample2)- x for x in resample2]
    both = np.array(diff1) - np.array(diff2)
    return both

In [10]:
def get_p_value(y, y_hat):
    diffs_fit = diff(y, y_hat)
    the_mean = np.mean(y)
    null_ys = [the_mean for x in y]
    diffs_null = diff(y, null_ys )
    res1, res2 = resample_two_samples(diffs_fit, diffs_null)
    c = combine_resamples(diffs_fit, diffs_null, res1, res2)
    p_c = [x for x in c if x > 0]
    return len(p_c)/len(c)

In [12]:
def diff(x1, x2):
    assert len(x1) == len(x2)
    diffs = []
    for i in range(len(x1)):
        diffs.append((x1[i] - x2[i])**2)
    return diffs

In [6]:
def make_trend_line(df, plot_width = 350, plot_height = 350, title = ''):
    last_date = df['date'].tolist()[-14]
    df_trend = df[(df['date']>=  last_date)]
    nums = df_trend['new_cases'].rolling(1).mean().tolist()
    X = range(len(nums))
    df_trend = df_trend.assign(X = X)
    labels = df_trend['date'].tolist()
    labels = [datetime.datetime(x.year, x.month, x.day) for x in labels]
    popt, pcov = optim.curve_fit(f = exp_func, xdata =np.array(X), ydata = np.array(nums) )
    y_hat = [exp_func(initial = popt[0], ratio = popt[1], x = x) for x in X]
    p_value = get_p_value(nums, y_hat)
    y_hat_low, y_hat_high = error_opt(X, nums, y_hat)
    p = figure(x_axis_type = 'datetime', title = '{title} {p}'.format(title = title, p = round(popt[1],2)), 
                 plot_width = plot_width , plot_height = plot_height, y_range = None)
    p.vbar(x=labels, top=nums, line_width = 5, width = .9)
    p.line(x = labels, y = y_hat)
    source = ColumnDataSource({'x':labels, 'upper': y_hat_high, 'lower':y_hat_low})
    band = Band(base='x', lower='lower', upper='upper', source=source, 
            level='underlay', fill_alpha=1.0, line_width=1, line_color='white', fill_color = '#ff9999')
    p.add_layout(band)
    return popt[0], popt[1], p_value, p

In [15]:
def do_wash():
    ps = []
    df = pd.read_csv('data/seven_day_county.csv')
    df['date'] = pd.to_datetime(df['date'])
    df_king = df[(df['state'] == 'Washington') & (df['county'] == 'King')]
    intercept, slope, p_value, p = make_trend_line(df_king, title ='King')
    ps.append(p)
    df_state = pd.read_csv('data/states.csv')
    new_cases = df_state['cases']
    df_state =df_state.assign(new_cases = new_cases)
    df_state['date'] = pd.to_datetime(df_state['date'])
    df_wash = df_state[(df_state['state'] == 'Washington')]
    intercept, slope, p_value, p = make_trend_line(df_wash, title= 'Washington')
    ps.append(p)
    df_non_king = pd.read_csv('data/non_king.csv')
    df_non_king['date'] = pd.to_datetime(df_non_king['date'])
    new_cases = df_non_king['cases']
    df_non_king =df_non_king.assign(new_cases = new_cases)
    intercept, slope, p_value, p = make_trend_line(df_non_king, title= 'Non King')
    ps.append(p)
    grid = gridplot(ps, ncols = 3)
    show(grid)
do_wash()