In [1]:
import os
import datetime
import random
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook, reset_output
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, Band
from bokeh.embed import components
from bokeh.models import Range1d
from bokeh.models import NumeralTickFormatter
from bokeh.models import Span
from bokeh.models import FuncTickFormatter
from bokeh.models import Arrow, NormalHead, OpenHead, TeeHead, VeeHead
from bokeh.models import LabelSet
from bokeh.models.annotations import Label


import numpy as np
import pandas as pd

import scipy
from scipy import stats
import math
import pendulum

In [2]:
def get_data():
    df = pd.read_csv('data/states.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by=['date'])

    return df

def get_population():
    df = pd.read_csv('data/states_population.csv')
    return df

def get_hospital():
    df = pd.read_csv('data/hospital.csv')
    df['date'] = pd.to_datetime(df['date'])    
    df = df.assign(hospitalized = df['hospitalized_currently'])
    df = df.sort_values(by=['date'])
    return df

def filter_list(state, x, y):
    z = list(zip(x,y))
    if state == 'Ohio':
        z = [x for x in z if x[1] < 10]
    z = [x for x in z if x[1] >= 0]
    x = [x[0] for x in z]
    y = [x[1] for x in z]
    return x, y


In [21]:
output_notebook()
#reset_output()
def make_cases(the_type, plot_width = 300, plot_height = 300, window =7,
              the_list = None, min_date = datetime.datetime(2020, 3, 1)):
    labels_dict = {'cases':'Cases/100,000',
                  'deaths': 'Deaths/1,000,000',
                   'hospitalized': "Hospitalized/100,000",

                  }
    if the_type in (['deaths', 'cases']):
        df_us = get_data()
    else:
        df_us = get_hospital()
    plots = []
    def make_pop_dict():
        population = get_population()
        pop_dict = {}
        states = population['state']
        nums = population['population_2019']
        for counter, i in enumerate(states):
            pop_dict[i] = nums[counter]
        return pop_dict
    pop_dict = make_pop_dict()
    def get_y(the_type, state, df_s, pop_dict, window, per = 100000):
        y = df_state[the_type].rolling(window).mean()
        y_ = []
        for i in y:
            if math.isnan(i):
                y_.append(0)
            else:
                y_.append(i)
        y = [x/pop_dict[state] * per for x in y_]
        return y
    if not the_list:
        the_list = sorted(set(df_us['state'].tolist()))
    

    for i in the_list:
        if i in ['Guam', 'Northern Mariana Islands', 'Puerto Rico', 'Virgin Islands']:
            continue
        df_state = df_us[(df_us['state'] == i) & (df_us['date'] >= min_date) ]
        if the_type == 'cases':
            df_state = df_state[(df_state['cases'] > 0) ]

        per = 100000
        if the_type == 'deaths':
            per = 1e6
        elif the_type == 'hospitalized':
            per = 100000
        y = get_y(the_type = the_type, 
                  state = i, df_s = df_state, pop_dict = pop_dict, 
                  window = window, per = per)
        x = df_state['date'].tolist()
        #x, y = filter_list(i, x, y)
        x, y = x[window:], y[window:]
        p = figure(x_axis_type = 'datetime', title = i, 
                 plot_width = plot_width , plot_height = plot_height)
        p.line(x = x, y = y)
        p.yaxis.axis_label = labels_dict[the_type]
        p.xaxis.axis_label = 'Date'


        plots.append(p)
    return gridplot(plots, ncols = 3)

the_list = None
the_list = ['California',  'Arizona', 'Georgia',
     'Michigan', 'Massachusetts', 'Pennsylvania', 'Indiana', 'North Carolina', 'Texas',
      'Washington', 'Florida','Georgia', 'Oregon', 'Ohio']
#the_list = None
the_type = 'cases'
show(make_cases(the_type = the_type, the_list = the_list,  window = 7,
               min_date = datetime.datetime(2021,1,5)))

        


In [4]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [5]:
output_notebook()
#reset_output()
def bam_yam(the_type, plot_width = 300, plot_height = 300, window =7,
              the_list = None, min_date = datetime.datetime(2020, 3, 1)):
    labels_dict = {'cases':'Cases/100,000',
                  'deaths': 'Deaths/1,000,000',
                   'hospitalized': "Hospitalized/100,000",

                  }
    if the_type in (['deaths', 'cases']):
        df_us = get_data()
    else:
        df_us = get_hospital()
    plots = []
    def add_yaneer(state, p):
        ratio_dict = {'California': (2500, 1.025),
         'Florida': (5100, 1.015),'Arizona': (700,1.035), 'Georgia':(1300,1.04),
        'Massachusetts':(2200, 1.02), 'Pennsylvania':(4000, 1.02),
        'Indiana':(1000, 1.04), 'Michigan':(5000, 1.03), 'North Carolina':(2000, 1.07),
        'Texas':(4000, 1.05)
        }
       
        y = [ratio_dict.get(state, (1000, None))[0]]
        x = [datetime.datetime(2021,4, 1)]
        r = ratio_dict.get(state, (None, 1))[1]
        while 1:
            next_date = x[-1] + datetime.timedelta(days = 1)
            if next_date > datetime.datetime(2021, 5,1):
                break
            x.append(next_date)
            y.append(exp_func(len(x), initial = y[0], ratio = r))
        p.line(x =x, y = y, color = 'red', legend_label = 'Yaneer')
        return
    if not the_list:
        the_list = sorted(set(df_us['state'].tolist()))

    for i in the_list:
        df_state = df_us[(df_us['state'] == i) & (df_us['date'] >= min_date) 
            & (df_us['date'] <= datetime.datetime(2021,5,1)) ]
        y = df_state[the_type].rolling(window).mean()
        x = df_state['date'].tolist()
        x, y = filter_list(i, x, y)
        x, y = x[window -1:], y[window -1:]
        p = figure(x_axis_type = 'datetime', title = i, 
                 plot_width = plot_width , plot_height = plot_height)
        p.line(x = x, y = y)
        add_yaneer(state= i, p = p)
        p.xaxis.axis_label = 'Date'
        p.yaxis[0].formatter = NumeralTickFormatter(format="0,")
        if i in ['Massachusetts', 'Pennsylvania', 'Indiana', 'Michigan',
                'North Carolina', 'Texas']:
            p.legend.location = 'top_center'

        plots.append(p)
    return gridplot(plots, ncols = 3)

the_list = ['California', 'Florida', 'Arizona', 'Georgia',
     'Michigan', 'Massachusetts', 'Pennsylvania', 'Indiana', 'North Carolina', 'Texas']
#the_list = ['Massachusetts']
show(bam_yam(the_type = 'cases', the_list = the_list,  window = 7,
               min_date = datetime.datetime(2021,1,15)))

        


In [6]:
def hospitals(plot_width = 300, plot_height = 300, ncols = 3, 
              use_new_hos = False, by_pop = False, window = 7,
             max_date = datetime.datetime(2020,10,1)):
    output_notebook()
    df = pd.read_csv('data/us_cum.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df.assign(deaths_million = df['deaths']/382e6 * 1e6)
    df = df.assign(cases_million = df['cases']/382e6 * 1e5)
    df = df.assign(new_hos_100k = df['new_hos']/382e6 * 1e6)
    df = df.assign(hos_100k = df['hos']/382e6 * 1e6)


    if use_new_hos and by_pop:
        df = df[(df['new_hos'] > 0) & (df['new_hos'] < 5000)]
        hos_key = 'new_hos_100k'
        hos_label = 'Newly Hospitalized/1M'
    elif use_new_hos:
        df = df[(df['new_hos'] > 0) & (df['new_hos'] < 5000)]
        hos_key = 'new_hos'
        hos_label = 'Newly Hospitalized' 
    elif not use_new_hos and by_pop:
        hos_key = 'hos_100k'
        hos_label = 'Currently Hospitalized/1M' 
    else:
        hos_key = 'hos'
        hos_label= 'Currenty Hospitalized'
    if by_pop:
        deaths_key = 'deaths_million'
        cases_key = 'cases_million'
        cases_label = 'Cases/100K'
        deaths_label = 'Deaths/1M'
    else:
        deaths_key = 'deaths'
        cases_key = 'cases'
        deaths_label = 'Deaths'
        cases_label = 'Cases'
    df = df[df['date'] > max_date]
    p1 = figure (x_axis_type = 'datetime',  plot_width = plot_width, plot_height = plot_height,
                 title = "USA Hospitalizations")
    p1.line(x = df['date'].tolist(), y = df[hos_key].rolling(window).mean().tolist())
    p1.xaxis.axis_label = 'Date'
    p1.yaxis.axis_label = hos_label

    p2 = figure (x_axis_type = 'datetime',  plot_width = plot_width, plot_height = plot_height, 
                 title = "USA deaths")
    p2.line(x = df['date'].tolist(), y = df[deaths_key]. rolling(window).mean())
    p2.xaxis.axis_label = 'Date'
    p2.yaxis.axis_label = deaths_label

    p3 = figure (x_axis_type = 'datetime',  plot_width = plot_width, plot_height = plot_height, 
                 title = "USA Cases")
    p3.line(x = df['date'].tolist(), y = df[cases_key]. rolling(window).mean())
    p3.xaxis.axis_label = 'Date'
    p3.yaxis.axis_label = cases_label
    if not by_pop:
        p1.yaxis[0].formatter = NumeralTickFormatter(format="0,")
        p2.yaxis[0].formatter = NumeralTickFormatter(format="0,")
        p3.yaxis[0].formatter = NumeralTickFormatter(format="0,")


    g = gridplot([p1, p2, p3], ncols = ncols)
    return g

show(hospitals(use_new_hos = False, by_pop = True, window = 7,
              max_date = datetime.datetime(2020,4,1)))


In [7]:
def usa(plot_width = 500, plot_height = 500, ncols = 3, 
        min_date = datetime.datetime(2020, 10, 1), 
        target_date = datetime.datetime(2020,8,1), window = 7,
       use_mean_as_base = False):
    output_notebook()
    if target_date < min_date:
        target_date = min_date + datetime.timedelta(days = window)
    def make_y(l):
        final = []
        if use_mean_as_base:
            base= float(np.nanmean([x[1] for x in l]))
        else:
            base = None
            for i in l:
                if i[0] == target_date:
                    base = i[1]
                    break
        for i in l:
            final.append( i[1]/base * 100)
        return final
    df = pd.read_csv('data/us_cum.csv')
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by=['date'], inplace = True)
    df = df[df['date'] > min_date]
    hos = df['hos'].rolling(window).mean().tolist()
    dates = df['date'].tolist()
    zz = list(zip(dates, hos))
    y_hos = make_y(zz)
    deaths = df['deaths'].rolling(window).mean().tolist()
    zz = list(zip(dates, deaths))
    y_deaths = make_y(zz)
    cases = df['cases'].rolling(window).mean().tolist()
    zz = list(zip(dates, cases))
    y_cases= make_y(zz)
    p1 = figure (x_axis_type = 'datetime',  plot_width = plot_width, 
                 plot_height = plot_height,
                 title = "USA")
    diff = [y_cases[x] - y_deaths[x] for x in range(len(y_hos))]
    p2 = figure(title = "cases minus deaths", x_axis_type = 'datetime')
    p2.line(x = dates, y = diff)
    p1.line(x = dates, y = y_hos, legend_label = "hospitalized")
    p1.line(x = dates, y = y_deaths, legend_label = "deaths", color = 'red')
    p1.line(x = dates, y = y_cases, legend_label = "cases", color = 'green')

    p1.xaxis.axis_label = 'Date'
    p1.yaxis.axis_label = 'Percent Change'
    p1.yaxis[0].formatter = NumeralTickFormatter(format="0,")
    arrow_top = 150
    p1.legend.location = 'top_left'
    arrow1 = Arrow(x_start=datetime.datetime(2021,3,29), y_start=arrow_top -13, 
                   x_end=datetime.datetime(2021,3,29), y_end=100,
               line_color='black', line_alpha=0.7,
                line_width=1, end=OpenHead()
               )
    p1.add_layout(arrow1)
    label = Label(x=datetime.datetime(2021,3,29), y=arrow_top, x_offset=-40,  text="Impending",
                 text_font_size = '10pt')
    label2 = Label(x=datetime.datetime(2021,3,29), y=arrow_top - 11, x_offset=-30,  
                   text="Doom!", text_font_size = '10pt')

    p1.add_layout(label)
    p1.add_layout(label2)

    #p1.circle(x = datetime.datetime(2021,3,29), y = 75, color = 'red',
             #size = 10, legend_label = 'Impenpding Doom')


    #return p1

    g = gridplot([p1, ], ncols = ncols)
    return g

show(usa(min_date = datetime.datetime(2020, 11, 1), use_mean_as_base = False))

In [8]:
output_notebook()
#Yaneer Bar-Yam
def make_per_change(plot_width = 300, plot_height = 300, window =7,
              the_list = None, target_date = datetime.datetime(2020, 10, 1),
                   use_mean = False):
    labels_dict = {'cases':'Cases/100,000',
                  'deaths': 'Deaths/1,000,000',
                   'hospitalized': "Hospitalized/100,000",

                  }
    df_cases = get_data()
    df_hosp = get_hospital()
    plots = []
    
    def get_y(the_type, state, df_s, window, target_date):
        y = df_s[the_type].rolling(window).mean()

        final = []
        dates = df_s['date']
        zz = list(zip(dates, y))
        base = None
        for i in zz:
            if i[0] == target_date:
                base = i[1]
                if base == 0:
                    base = 1
                break
        if use_mean:
            base= float(np.nanmean([x for x in y]))
        if base == 0:
            base = 1

        for i in zz:
            final.append((i[0],i[1]/base * 100))
        return final
    if not the_list:
        the_list = sorted(set(df_cases['state'].tolist()))

    for i in the_list:
        if i in ['Guam', 'Northern Mariana Islands', 'Puerto Rico', 'Virgin Islands']:
            continue
        df_state = df_cases[(df_cases['state'] == i) & 
           (df_cases['date'] >= target_date - datetime.timedelta(window))]
        df_state_hosp = df_hosp[(df_hosp['state'] == i) & 
           (df_hosp['date'] >= target_date - datetime.timedelta(window))]
        x_y_cases = get_y(the_type = 'cases', 
                  state = i, df_s = df_state, 
                  window = window, target_date = target_date)
        x_y_deaths = get_y(the_type = 'deaths', 
                  state = i, df_s = df_state, 
                  window = window, target_date = target_date)
        x_y_hosp = get_y(the_type = 'hospitalized_currently', 
                  state = i, df_s = df_state_hosp, 
                  window = window, target_date = target_date)
        p = figure(x_axis_type = 'datetime', title = i, 
                plot_width = plot_width , plot_height = plot_height)
        p.line(x = [x[0] for x in x_y_cases], y = [x[1] for x in x_y_cases],
              legend_label = 'cases')
        p.line(x = [x[0] for x in x_y_deaths], y = [x[1] for x in x_y_deaths],
              legend_label = 'deaths', color = 'red')
        p.line(x = [x[0] for x in x_y_hosp], y = [x[1] for x in x_y_hosp],
              legend_label = 'hospital', color = 'green')
        p.xaxis.axis_label = 'Date'
        if i == 'Michigan' or i == 'Minnesota':
            p.legend.location = 'top_left'

        else:
            p.legend.location = 'top_right'

        plots.append(p)
    return gridplot(plots, ncols = 3)

show(make_per_change( the_list = None, 
                     window = 7, target_date = datetime.datetime(2021, 1, 1),
                    use_mean = True),
        )

        


In [9]:
def make_rt_change(window = 7):
    df_cases = get_data()
    df_cases = df_cases[(df_cases['state'] != 'Guam')
        & (df_cases['state'] != 'Northern Mariana Islands')
        & (df_cases['state'] != 'Puerto Rico')
         & (df_cases['state'] != 'Virgin Islands')
             ] 
    df_hosp = get_hospital()
    df_hosp = df_hosp[(df_hosp['state'] != 'Guam')
        & (df_hosp['state'] != 'Northern Mariana Islands')
        & (df_hosp['state'] != 'Puerto Rico')
         & (df_hosp['state'] != 'Virgin Islands')
             ]    

    for i in sorted(set(df_cases['state'].to_list())):
        df_state = df_cases[df_cases['state'] == i]
        cases = df_state['cases'].rolling(window).mean().tolist()
        dates = df_state['date'].to_list()
        deaths = df_state['deaths'].rolling(window).mean().to_list()
        df_state_hosp = df_hosp[df_hosp['state'] == i]

        dates_hosp = df_state_hosp['date'].to_list()
        hosp = df_state_hosp['new_hospitalized'].rolling(window).mean().to_list()

        def get_per_change(dates, incidents):
            zz = zip(dates, incidents)
            zz_ = filter(lambda x: x[0] >= datetime.datetime(2021,3,1), zz)
            zz_ = filter(lambda x: x[0] <= datetime.datetime(2021,3,15), zz_)
            min_incidents = min(zz_, key = lambda x: x[1])
            if min_incidents[1] == 0:
                return math.nan
            per_change = (incidents[-1]- min_incidents[1])/min_incidents[1] * 100
            return per_change
        per_cases = get_per_change(dates, cases)
        per_deaths = get_per_change(dates, deaths)
        per_hosp = get_per_change(dates_hosp, hosp)
        print(i, per_cases, per_deaths, per_hosp)

    
make_rt_change()

Alabama -54.55020920502093 -61.5819209039548 -25.304878048780484
Alaska -79.58950969213228 nan nan
Arizona -54.549886252843685 -69.95515695067263 -49.02597402597403
Arkansas -30.44096728307254 -111.11111111111111 13.20754716981132
California -69.67560572932933 -80.57409879839787 nan
Colorado -37.418590882178805 131.03448275862067 39.393939393939384
Connecticut -90.37735849056604 -75.40983606557377 nan
Delaware -80.150068212824 -85.29411764705883 nan
District of Columbia -88.67924528301886 -85.71428571428572 nan
Florida -63.43729051616817 -51.04895104895104 -100.0
Georgia -72.39764916824384 -44.79495268138801 -45.39007092198582
Hawaii 11.83431952662722 0.0 21.739130434782613
Idaho -59.877913429522756 -11.111111111111123 -37.5
Illinois -75.00932835820895 -8.602150537634417 nan
Indiana -51.890034364261176 -55.46218487394958 nan
Iowa -81.76377952755905 -61.038961038961034 -56.54205607476634
Kansas -53.3415082771306 58.33333333333335 -8.75
Kentucky -69.50539233915953 -62.732919254658384 83.

In [10]:
def king_county(plot_width = 450, plot_height = 450, window = 1,
     min_date = datetime.datetime(2021,1,1)):
    df = pd.read_csv('data/seven_day_county.csv')
    plots = []
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values( by="date")
    df = df[(df['new_cases'] > 0) & (df['new_cases'] < 1000)]
    df = df.assign(per_pop = df['new_cases']/df['population'] * 100000)

    p = figure(x_axis_type = 'datetime', title = 'King', 
                 plot_width = plot_width , plot_height = plot_height)
    df = df[(df['state'] == 'Washington') 
            & (df['date'] >= min_date)
           & (df['county'] == 'King')]
    y = df['new_cases'].rolling(window).mean()
    y = df['per_pop'].rolling(window).mean()
    x = df['date']
    p.line(x = x, y = y)
    p.yaxis.axis_label = 'Cases'
    p.xaxis.axis_label = 'Date'
    plots.append(p)
    return gridplot(plots, ncols = 3)

show(king_county(window = 7, min_date = datetime.datetime(2020,4,1)))

In [11]:
def masks_vs_vaccines(window = 7, plot_width = 350, plot_height = 350):
    plots = []
    df = pd.read_csv('masks_vs_vaccines.csv')
    p = figure()
    x = df['n']
    y_mask = df['mask_per_change'].rolling(window).mean()
    y_vaccine = df['vaccine_per_change'].rolling(window).mean()
    p = figure(plot_width = plot_width, plot_height= plot_height,
              title = 'Vaccine')
    #p.line(x = x, y= y_mask, legend_label = 'WA mask mandate')
    p.line(x = x, y = y_vaccine,  )
    p.legend.location = "top_left"
    p.xaxis.axis_label = 'Days since start'
    p.yaxis.axis_label = 'Percentage Change'
    p2 = figure(plot_height = plot_height, plot_width = plot_width,
               title = 'Mask mandate')
    p2.line(x = x, y= y_mask, )
    p2.legend.location = "top_left"
    p2.xaxis.axis_label = 'Days since start'
    p2.yaxis.axis_label = 'Percentage Change'
    return gridplot([p, p2], ncols = 3)


    return p
show(masks_vs_vaccines())

You are attempting to set `plot.legend.location` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.



In [12]:
def gb(window = 7):
    df = pd.read_csv('data/gb.csv')
    df = df.assign(cases_per_pop = df['new_cases']/df['population'] * 1e5)
    df = df.assign(deaths_per_pop = df['new_deaths']/df['population'] * 1e6)

    df['date'] = pd.to_datetime(df['date'])
    df = df[df['date'] > datetime.datetime(2020,1,31)]

    cases = df['cases_per_pop'].rolling(window).mean()
    p = figure(x_axis_type = 'datetime')
    p.line(x = df['date'], y = cases)
    return p

show(gb(window = 7))

    

In [13]:
def gb2(window=7, min_date = datetime.datetime(2021,5,1),
       plot_width = 350, plot_height = 350):
    df = pd.read_csv('data/world2.csv')
    df = df.assign(cases_per_pop = df['new_cases']/df['population'] * 1e5)
    df = df.assign(deaths_per_pop = df['new_deaths']/df['population'] * 1e6)
    df = df.assign(hospitalized_per_pop = df['hospitalized']/df['population'] * 1e5)

    df['date'] = pd.to_datetime(df['date'])
    df = df[df['iso_code'] == 'GBR']
    df = df[df['date'] > min_date]
    df = df[df['hospitalized'] > 0]

    p = figure(x_axis_type = 'datetime', plot_height = plot_height,
              plot_width = plot_width, title="Great Britain Hospitalized/100K")
    cases = df['hospitalized_per_pop'].rolling(window).mean()

    p.line(x = df['date'], y = cases)
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Hospitalized/100K'
    return p

show(gb2(min_date = datetime.datetime(2021,1,1)))

In [22]:
def gb3(window=7, min_date = datetime.datetime(2021,5,1),
       plot_width = 350, plot_height = 350):
    df = pd.read_csv('data/world2.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df[df['iso_code'] == 'GBR']
    df = df[df['date'] >= min_date]
    df = df[df['hospitalized'] > 0]
    df_t = df[df['date'] == min_date]
    base_cases = df_t['new_cases'].tolist()[0]
    base_hospital = df_t['hospitalized'].tolist()[0]
    df = df.assign(cases_per_change = df['new_cases']/base_cases * 100)
    df = df.assign(hospitalized_per_change = df['hospitalized']/base_hospital * 100)
    p = figure(x_axis_type = 'datetime', plot_width = plot_width,
              plot_height = plot_height, title = "Great Britain Cases & Hospitalizations")
    cases = df['cases_per_change'].rolling(window).mean()
    hospitalized = df['hospitalized_per_change'].rolling(window).mean()
    p.line(x = df['date'], y = cases, legend_label = "cases")
    p.line(x = df['date'], y = hospitalized, color = 'red', 
           legend_label = 'hospitalized')
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Percentage Change'
    p2 = gb2(min_date = min_date, plot_height = plot_height,
            plot_width = plot_width)
    p.legend.location = "top_left"
    return gridplot([p, p2], ncols = 3)
    return p

show(gb3(min_date = datetime.datetime(2021,5,1)))