In [1]:
import datetime
import os


In [2]:
import pandas as pd
import statsmodels.api as sm
import scipy.optimize as optim
import numpy as np

In [3]:
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.io import reset_output
from bokeh.layouts import gridplot
from bokeh.models import Arrow, NormalHead, OpenHead, VeeHead
from bokeh.models import Label
from bokeh.models import Span
from bokeh.embed import components

In [4]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [5]:
def get_data():
    df = pd.read_csv('data/state_poisson.csv')
    df['date'] =pd.to_datetime(df['date'])
    df = df.sort_values(by = ['date'])
    return df

In [6]:
def get_data_states():
    df = pd.read_csv('data/states.csv')
    df['date'] =pd.to_datetime(df['date'])
    df = df.sort_values(by = ['date'])
    return df

In [7]:
def get_linear(y):
    x = range(len(y))
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    slope = result.params[1]
    p_value =result.pvalues[1]
    return result.params[0], slope, p_value

In [8]:
#output_notebook()
reset_output()

In [9]:
def make_dt_graph(dates, y, dates2, y2, title = None,  plot_height = 350, plot_width = 350, p = None):
    if not p:
        p = figure(title = title, x_axis_type = 'datetime', 
              plot_height = plot_height, plot_width = plot_width)
    p.line(x = dates, y = y, line_width = 2)
    p.line(x = dates2, y = y2, color = 'red', line_width = .5, alpha = .5)
    return p

def make_state_graph(df, df2, state):
    df_ = df[df['state']== state]
    df2_ = df2[df2['state'] == state]
    return make_dt_graph(dates = df_['date'], y = df_['cases'],
          title = state, dates2 = df2_['date'], y2 = df2_['cases'])

def make_state_graph_pop(df, df2, state):
    df_ = df[df['state']== state]
    df2_ = df2[df2['state'] == state]
    return make_dt_graph(dates = df_['date'], y = df_['cases'],
          title = state, dates2 = df2_['date'], y2 = df2_['cases'])

In [10]:
reset_output()
def all_states():
    grids = []
    df = get_data()
    df2 = get_data_states()
    for i in sorted(set(df['state'].tolist())):
        grids.append(make_state_graph(df = df, df2 = df2, state = i))
    return grids
grids = all_states()
grid = gridplot(grids, ncols = 3)
show(grid)

In [11]:
def get_rates_rolling(start = datetime.datetime(2020,3,1),
                     end =  datetime.datetime(2020,9,1)):
    while 1:
        e_ = start + datetime.timedelta(days = 13)
        yield start, e_
        start += datetime.timedelta(days = 1)
        if e_ > end:
            return

In [12]:
def get_range(l, range_l = 14):
    for i in range(len(l)):
        r = l[i]
        new_ = []
        for j in range(range_l):
            if j + i == len(l) :
                return 
            new_.append(l[j + i])
        yield new_

In [13]:
def get_ranges(l1, l2, l3, range_l = 14):
    assert len(l1) == len(l2)
    assert len(l2) == len(l3)
    for i in range(len(l1)):
        new_1 = []
        new_2 = []
        new_3 = []
        for j in range(range_l):
            if j + i == len(l1) :
                return 
            new_1.append(l1[j + i])
            new_2.append(l2[j + i])
            new_3.append(l3[j + i])
        yield new_1, new_2, new_3


In [14]:
def _get_info(dates, cases, cases_poisson, len_range = 14):
    final =[]
    for dates_, cases_, cases_poisson_ in get_ranges(dates, cases, cases_poisson,
            range_l = len_range):
        inte, slope, p = get_linear(cases_)
        inte_p, slope_p, p_p = get_linear(cases_poisson_)
        exp_r = get_exp_fit(cases_)
        exp_r_p = get_exp_fit(cases_poisson_)
        final.append({'date': dates_[-1], 'intercept': inte, 'slope': slope,
                     'p':p, 'exp_r': exp_r, 'intercept_poisson': inte_p, 'slope_poisson': slope_p,
                     'p_poisson': slope_p, 'exp_r_poisson':exp_r_p})
    return final

In [15]:
def calc_diff_slopes(info):
    final = []
    for i in info:
        #reg slope - poisson slope
        final.append({'date': i['date']})
        final.append((i[0][0], i[0][2] - i[1][2], i[0][2] * i[1][2] >= 0, i[0][4] - i[1][4],
                     (i[0][4] < 1 and i[1][4] < 1) or (i[0][4] > 1 and i[1][4] > 1)))
    return final

In [16]:
def get_exp_fit(y):
    X = range(len(y))
    popt, pcov = optim.curve_fit(f = exp_func, xdata =np.array(X), ydata = np.array(y) )
    return popt[1]

In [17]:
def get_comparison(state, df_poisson = None, df = None, len_range = 9):
    if not df:
        df_poisson = get_data()
        df = get_data_states()
    df_wash_p = df_poisson[(df_poisson['state'] == state) &
                           (df_poisson['date'] > datetime.datetime(2020,5,1)) &
                           (df_poisson['date'] < datetime.datetime(2020,9,1))
                          ]
    df_wash = df[(df['state'] == state) & 
                 (df['date'] > datetime.datetime(2020,5,1)) &
                (df['date'] < datetime.datetime(2020,9,1))
                ]
    dates = df_wash['date'].tolist()
    cases = df_wash['cases'].tolist()
    cases_poisson = df_wash_p['cases'].tolist()
    return calc_diff_slopes(_get_info(dates, cases, cases_poisson, len_range))

def comparison_with_graph(state, start_date = datetime.datetime(2020, 3, 1),
                    end_date = datetime.datetime(2020, 9, 1),
                         date_range = 14):
    output_notebook()
    df_poisson = get_data()
    df = get_data_states()
    df_state_p = df_poisson[(df_poisson['state'] == state) &
                           (df_poisson['date'] > start_date) &
                           (df_poisson['date'] < end_date)
                          ]
    df_state = df[(df['state'] == state) & 
                 (df['date'] > start_date) &
                (df['date'] < end_date)
                ]
    dates = df_state['date'].tolist()
    cases = df_state['cases'].tolist()
    cases_poisson = df_state_p['cases'].tolist()
    cases_dict = {}
    for counter, i in enumerate(dates):
        cases_dict[i] = (cases[counter], cases_poisson[counter])
    p = figure(x_axis_type = 'datetime')
    p.line(x = dates, y = cases)
    p.line(x = dates, y = cases_poisson, color = 'red')
        
    info =  _get_info(dates, cases, cases_poisson, len_range = date_range)
    for  i in info:
        if (i['exp_r'] < 1 and i['exp_r_poisson'] < 1) or\
            (i['exp_r'] > 1 and i['exp_r_poisson'] > 1):
            continue
        #p reg point
        p.circle(x = i['date'], y = cases_dict[i['date']][0], color = 'black')
        #possion point
        p.circle(x = i['date'], y = cases_dict[i['date']][1], color = 'green')
        print('date is {d} possion rate is {p} and date rate is {r}'.format(
            d = i['date'], p = i['exp_r_poisson'], r = i['exp_r']))
    return p

show(comparison_with_graph('Washington', date_range = 7))


date is 2020-03-28 00:00:00 possion rate is 0.9962718542681136 and date rate is 1.1948351794943028
date is 2020-03-31 00:00:00 possion rate is 1.0218180811591138 and date rate is 0.9588644713295196
date is 2020-04-01 00:00:00 possion rate is 1.0107160895428318 and date rate is 0.8450408415888329
date is 2020-04-02 00:00:00 possion rate is 0.9894779802826784 and date rate is 1.0395949100117101
date is 2020-04-03 00:00:00 possion rate is 0.9659681424730753 and date rate is 1.034607963779776
date is 2020-04-04 00:00:00 possion rate is 0.9467399469598575 and date rate is 1.0798219043547468
date is 2020-04-05 00:00:00 possion rate is 0.9349892918976794 and date rate is 1.1094375267929413
date is 2020-04-06 00:00:00 possion rate is 0.9284472195821287 and date rate is 1.048690035590123
date is 2020-04-13 00:00:00 possion rate is 1.0093022616109162 and date rate is 0.8971626528495593
date is 2020-04-14 00:00:00 possion rate is 1.0325799989221713 and date rate is 0.8252447000800314
date is 2020

In [18]:
from scipy.stats import poisson

for i in range(14):
    p = poisson.pmf(i, 5, loc = 0)
    print(i, p)


0 0.006737946999085467
1 0.03368973499542734
2 0.08422433748856832
3 0.1403738958142805
4 0.17546736976785063
5 0.17546736976785068
6 0.1462228081398754
7 0.10444486295705395
8 0.06527803934815865
9 0.036265577415643714
10 0.018132788707821854
11 0.008242176685373562
12 0.0034342402855723226
13 0.0013208616482970467
