In [1]:
import numpy as np
import random
import datetime
import math

In [2]:
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.models import Arrow, NormalHead, OpenHead, VeeHead
from bokeh.models import Label
from bokeh.models import Span

In [3]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
import pandas as pd

In [4]:
output_notebook()

In [5]:
def get_states_data():
    path = 'data/states.csv'
    df = pd.read_csv(path)
    df['date'] = pd.to_datetime(df['date'])
    return df


In [6]:
def my_log(x, base = 10):
    if x <= 0:
        return math.nan
    try:
        return math.log(x, base)
    except ValueError:
        print(x)
        assert False

In [7]:
def incidents_over_time_bar(df,  window= 3, plot_height = 250,
             plot_width = 250, title = None, line_width = 2, y_range = None,
                           x_range=None, alpha = .1, use_log = False):
    labels = df['date'].tolist()
    if isinstance(labels[0], datetime.date):
        labels = [datetime.datetime(x.year, x.month, x.day) for x in labels]
    deaths = df['deaths'].rolling(window).mean()
    deaths_log = [my_log(x) for x in deaths]
    cases = df['cases'].rolling(window).mean()
    cases_log = [my_log(x) for x in cases]
    if use_log:
        cas = cases_log
        dea = deaths_log
    else:
        cas = cases
        dea = deaths
    p = figure(x_axis_type = 'datetime', title = title,
                 plot_width = plot_width , plot_height = plot_height, y_range = y_range,
              x_range = x_range)
    p.vbar(x=labels, top=dea, line_width = line_width, width = .9) 
    p.vbar(x=labels, top=cas, line_width = line_width, width = .9, color= 'green',
           alpha = alpha)
    p.yaxis.axis_label = 'cases/day'   
    return p


In [8]:
def main(state):
    df_states = get_states_data()
    df_states.head()
    df_ny = df_states[df_states['state'] == state]
    p = incidents_over_time_bar(df_ny, alpha = .3, window = 1, 
                               title = state, use_log = True)
    return p


In [9]:
def all_states():
    df_states = get_states_data()
    ps = []
    for i in sorted(set(df_states['state'].tolist())):
        df_ = df_states[df_states['state'] == i]
        p = incidents_over_time_bar(df_, alpha = .3, window = 1, 
                               title = i, use_log = True)
        #p = main(i)
        ps.append(p)
    return ps
g = all_states()
grid = gridplot(g, ncols = 4)
show(grid)

In [10]:
def selected_states():
    df_states = get_states_data()
    ps = []
    for i in ["Connecticut",  'Florida','Washington',]:
        df_ = df_states[df_states['state'] == i]
        p = incidents_over_time_bar(df_, alpha = .3, window = 3, 
                               title = '{i} Linear'.format(i = i), use_log = True,
                        plot_height = 450, plot_width = 450,
                                   )
        #p = main(i)
        ps.append(p)
    return ps
g = selected_states()
grid = gridplot(g, ncols = 4)
show(grid)