In [1]:
import os
import datetime
import random
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, Band
from bokeh.embed import components
from bokeh.models import Range1d

import numpy as np

In [2]:
output_notebook()

In [3]:
import math
import scipy.optimize as optim
import pandas as pd
import statsmodels.api as sm
from scipy.stats import ttest_ind

In [4]:
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.layouts import gridplot
from bokeh.models import Arrow, NormalHead, OpenHead, VeeHead
from bokeh.models import Label
from bokeh.models import Span
from bokeh.embed import components


In [5]:
def get_data():
    df = pd.read_csv('data/states.csv')
    df = df[(df['state'] != 'Guam') & (df['state'] != 'Northern Mariana Islands')]
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by = ['date'])
    return df


In [6]:

def get_pop_data():
    df = pd.read_csv('data/seven_day_state.csv')
    #df = df[(df['state'] != 'Guam') & (df['state'] != 'Northern Mariana Islands')]
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by = ['date'])
    return df

def get_pop():
    df = get_pop_data()
    states = df['state']
    population = df['population']
    d = {}
    for x in set(list(zip(states, population))):
        d[x[0]] = x[1]
    return d


In [7]:
def get_rate(df, state, leng, key):
    df_s = df[df['state'] == state]
    current = df_s[key].tolist()[-1 * leng:]
    prev = df_s[key].tolist()[-2 * leng: -1 * leng]
    assert len(prev) == leng
    assert len(current) == leng
    p_value_ttest = round(ttest_ind(current, prev).pvalue,2)
    return np.mean(current)/np.mean(prev), p_value_ttest


In [8]:
def get_linear(y):
    x = range(len(y))
    X = list(zip(*[x]))
    xm = sm.add_constant(X)
    model = sm.OLS(y, xm) 
    result = model.fit()
    slope = result.params[1]
    p_value =result.pvalues[1]
    return result.params[0], slope, p_value


def get_state_linear(df, state, leng, key):
    df_s = df[df['state'] == state]
    y = df_s[key].tolist()[-1 * leng:]
    assert len(y) == leng
    return get_linear(y)
get_state_linear(get_data(), 'Washington', 14, 'cases')


(1945.9142857142863, -2.7670329670329323, 0.9728000466736413)

In [9]:
def test_slope(df, state, leng, key, window = None):
    #print('p value is {p}'.format(p = p))
    df_s = df[df['state'] == state]
    df_s = df_s.sort_values(by = ['date'])
    if window:
        y = df_s['cases'].rolling(window).mean().tolist()[-1 * leng:]
    else:
        y = df_s['cases'].tolist()[-1 * leng:]

    inter, slope, p = get_linear(y)
    print('p value is {p}'.format(p = p))
    x = df_s['date'].tolist()[-1 * leng:]
    y_hat = [x * slope + inter for x in range(leng)]
    p = figure(x_axis_type = 'datetime')
    p.line(x=x, y = y)
    p.line(x = x[-1 * leng:], y = y_hat)
    return p
show(test_slope(get_data(), 'Colorado', 7, 'cases', window = None))
    

p value is 0.0018343082573693118


Metrics:
* if the state is way down from its peak
* if the state has a low # cases/million
* if the state has a longer declining slope (how to measure?)
* if the slope is increasing but not significant ? 

In [10]:
def all_states_info(df, leng):
    final = []
    for state in set(df['state'].tolist()):
        change, p = get_rate(df, state, leng, 'cases')
        final.append((state, change))
    return final

In [11]:
def make_bar(labels_neg, labels_pos, y_neg, y_pos, labels_neutral = None, y_neutral = None, plot_height = 450, 
             plot_width = 450, title = None):
    x_range = []
    if labels_neutral:
        x_range = labels_neg + labels_neutral + labels_pos
    else:
        x_range = labels_neg  + labels_pos    
    p = figure(x_range = x_range , plot_height = plot_height, 
               plot_width = plot_width, title = title)
    p.vbar(x=labels_neg, top=y_neg , line_width = 5, width = .9, color = 'green')
    if labels_neutral:
        p.vbar(x=labels_neutral, top=y_neutral , line_width = 5, width = .9, color = 'yellow')
    p.vbar(x=labels_pos, top=y_pos , line_width = 5, width = .9, color = 'red')
    p.xaxis.major_label_orientation = "vertical"
    p.xgrid.grid_line_color = None
    #p.y_range = Range1d(1, max(y_pos))
    return p

In [12]:
def all_states(leng):
    df = get_data()
    neutral_n = 1.1
    data= sorted(all_states_info(df, leng), key = lambda x: x[1])
    neg = list(filter(lambda x: x[1] <=1, data))
    neutral = list(filter(lambda x: x[1] > 1 and x[1] <= neutral_n, data))
    pos = list(filter(lambda x: x[1] > neutral_n, data))
    neg_l = [x[0] for x in neg]
    neg_y = [x[1] for x in neg]
    assert len(neg_l) == len(neg_y)
    p = make_bar(labels_neg = [x[0] for x in neg], 
                 y_neg = [x[1] for x in neg],
                labels_pos = [x[0] for x in pos],
                 y_pos = [x[1] for x in pos],
                 labels_neutral = [x[0] for x in neutral],
                 y_neutral = [x[1] for x in neutral],
                 plot_width = 800,
                 title = '{l} -day'.format(l = leng)
                )
    return p
show(all_states(7))
show(all_states(14))
show(all_states(28))




In [13]:
def make_bar_pop(labels, y, plot_height = 450, 
             plot_width = 450, title = None, mask_dict = {}):
    x_range = labels
    p = figure(x_range = x_range , plot_height = plot_height, 
               plot_width = plot_width, title = title)
    for counter, i in enumerate(labels):
        if mask_dict.get(i):
            color = 'green'
            legend_label = 'Masked Mandate'
        else:
            color = 'blue'
            legend_label = 'No Mandate'
        p.vbar(x=[i], top=y[counter] , line_width = 5, width = .5, color = color, legend_label = legend_label)
    p.xaxis.major_label_orientation = "vertical"
    p.xgrid.grid_line_color = None
    p.yaxis.axis_label = 'Cases/100,000'
    p.legend.location = 'top_left'

    return p
    #p.y_range 

In [14]:
def get_mask():
    final = []
    df = pd.read_csv('data/masks_states.csv')
    df['date'] = pd.to_datetime(df['date'])
    states = df['state']
    dates = df['date']
    for counter, i in enumerate(states):
        final.append((i, dates[counter]))
    return final



In [15]:
def cases_with_pop(back = 7, per = 100000):
    df = get_data()
    pop = get_pop()
    final = []
    now = datetime.datetime.now()
    states = set(df['state'].tolist())
    for i in states:
        df_ = df[(df['state'] == i) & (df['date'] >= now - datetime.timedelta(days = back))]
        if i in ["Puerto Rico", "Virgin Islands"]:
            continue
        cases = np.mean(df_['cases'])/pop[i] * per
        final.append((i, cases))
    return sorted(final, key = lambda x: x[1])


In [16]:
case_with_pop = cases_with_pop(back = 7)
labels = [x[0] for x in case_with_pop]
y = [x[1] for x in case_with_pop]
masks = get_mask()
d = {}
for i in masks:
    d[i[0]] = i[1]
p = make_bar_pop(labels, y, plot_height = 450, plot_width = 750, title = None, mask_dict = d)
show(p)


In [17]:
def make_bar_pop2_(labels, y, plot_height = 450, 
             plot_width = 450, title = None, vote_dict = {}):
    x_range = labels
    p = figure(x_range = x_range , plot_height = plot_height, 
               plot_width = plot_width, title = title)
    for counter, i in enumerate(labels):
        if vote_dict.get(i) == 'D':
            color = 'blue'
        else:
            color = 'red'
        p.vbar(x=[i], top=y[counter] , line_width = 5, width = .5, color = color)
    p.xaxis.major_label_orientation = "vertical"
    p.xgrid.grid_line_color = None

    return p
    #p.y_range 
def make_bar_pop2(labels, y, plot_height = 450, 
             plot_width = 450, title = None, vote_dict = {}):
    y_range = labels
    p = figure(y_range = y_range , plot_height = plot_height, 
               plot_width = plot_width, title = title)
    for counter, i in enumerate(labels):
        if vote_dict.get(i) == 'D':
            color = 'blue'
        else:
            color = 'red'
        p.hbar(y=[i], right=y[counter] , line_width = 5, width = .5, color = color)
    #p.xaxis.major_label_orientation = "vertical"
    p.xaxis.axis_label = 'Deaths per Million'
    p.xgrid.grid_line_color = None

    return p

In [18]:
def by_deaths():
    df = pd.read_csv('data/party_cases_deaths_pop.csv')
    df = df.assign(deaths_per_million = df['deaths']/df['population'] * 1e6)
    df.sort_values(by = 'deaths_per_million', inplace = True)
    labels = df['state'].tolist()
    deaths = df['deaths_per_million'].tolist()
    party = df['party'].tolist()
    vote_dict = {}
    for counter, i in enumerate(labels):
        vote_dict[i] = party[counter]
    p = make_bar_pop2(labels, y = deaths, plot_height = 450, 
             plot_width = 800,  vote_dict = vote_dict, title = "Misleading Graph: Deaths/million per state")
    return p

show(by_deaths())
    

In [19]:
def make_bar_pop3(labels, y, plot_height = 450, 
             plot_width = 450, title = None):
    x_range = labels
    p = figure(x_range = x_range , plot_height = plot_height, 
               plot_width = plot_width, title = title)
    p.vbar(x=labels, top=y , line_width = 3, width = .5)
    p.xaxis.major_label_orientation = "vertical"
    p.xgrid.grid_line_color = None
    p.yaxis.axis_label = 'Deaths/1,000,000'
    #p.legend.location = 'top_left'

    return p
    #p.y_range 

def deaths_recent(back = 7):
    df = pd.read_csv('data/seven_day_state.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df[df['date'] > datetime.datetime.now() - datetime.timedelta(days = back)]
    dates = set(list(df['date'].tolist()))
    print(dates)
    states = list(set(df['state']))
    data = []
    for i in sorted(states):
        df_ = df[df['state'] == i]
        death_million = list(set((np.mean(df_['new_deaths'])/df_['population'] * 1e6).tolist()))[0]
        data.append((i, death_million))
    data = sorted(data, key = lambda x: x[1])
    return make_bar_pop3(labels = [x[0] for x in data], y = [x[1] for x in data],
                        plot_width = 800, title = 'Mean Deaths last {x} days'.format(x = back -1))
    

show(deaths_recent(back = 8))


{Timestamp('2021-01-01 00:00:00'), Timestamp('2020-12-30 00:00:00'), Timestamp('2020-12-29 00:00:00'), Timestamp('2020-12-27 00:00:00'), Timestamp('2020-12-26 00:00:00'), Timestamp('2020-12-31 00:00:00'), Timestamp('2020-12-28 00:00:00')}


In [20]:
def wash(window = 7):
    df = pd.read_csv('data/states.csv')
    df['date'] = pd.to_datetime(df['date'])

    df_wash = df[df['state'] == 'Washington']
    p = figure(x_axis_type = 'datetime', title = 'wash', 
                 plot_width = 650 , plot_height = 650, y_range = None)
    cases = df_wash['cases'].rolling(window).mean()
    dates = df_wash['date']
    p.vbar(x=dates, top=cases , line_width = 5, width = .5)


    return p
show(wash())

In [21]:
def seattle(window = 7):
    df = pd.read_csv('data/seven_day_county.csv')
    df['date'] = pd.to_datetime(df['date'])
    df_seattle = df[(df['state'] == 'Washington') & (df['county']== 'King')]
    df_seattle = df_seattle.assign(by_pop = df_seattle['new_cases']/df_seattle['population'] * 1e5)
    cases = df_seattle['new_cases'].rolling(window).mean()
    cases_p = df_seattle['by_pop'].rolling(window).mean()
    cases_1 = df_seattle['new_cases']
    last_c = cases_1.tolist()[-1]
    cases_p_1 = df_seattle['by_pop']
    p = figure(x_axis_type = 'datetime', title = 'Seattle {c}'.format(c = last_c), 
                 plot_width = 650 , plot_height = 650, y_range = None)
   

    dates = df_seattle['date']
    p.vbar(x=dates, top=cases_p_1 , line_width = 5, width = .5, alpha = .1)

    p.line(x = dates, y = cases_p, legend_label = '{n}'.format(n = window), color = 'red')
    #p.line(x = dates, y = cases_p_1, legend_label = 'current', color = 'red')
    p.legend.location = 'top_left'


    return p
show(seattle(window =7))