In [26]:
import os
import datetime
import random
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook, reset_output
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, Band
from bokeh.embed import components
from bokeh.models import Range1d
from bokeh.models import NumeralTickFormatter
from bokeh.models import Span
from bokeh.models import FuncTickFormatter
from bokeh.models import Arrow, NormalHead, OpenHead, TeeHead, VeeHead
from bokeh.models import LabelSet
from bokeh.models.annotations import Label


import numpy as np
import pandas as pd

import scipy
from scipy import stats
import math
import pendulum
import csv
from google.cloud import bigquery



In [27]:
REFRESH = True

In [28]:
def get_data_(path, sql, client = None, refresh = False, has_date = True):
    if not client:
        client = bigquery.Client(project='paul-henry-tremblay')
    result = client.query(sql)
    out_path = os.path.join('/tmp', path)

    if refresh or not os.path.isfile(out_path):
        with open(out_path, 'w') as write_obj:
            csv_writer = csv.writer(write_obj)
            counter = 0 
            for i in result:
                counter += 1
                if counter == 1:
                    csv_writer.writerow([x[0] for x in i.items()])
                csv_writer.writerow([x[1] for x in i.items()])
    df = pd.read_csv(out_path)
    if has_date: 
        df['date'] = pd.to_datetime(df['date'])
    return df



In [29]:
def get_state_data(refresh = False):
    sql = """
     /* STATE BY DAY */
 with t1 as
(
SELECT  date, state, new_cases as cases, new_deaths as deaths
FROM `paul-henry-tremblay.covid19.us_states_day_diff`
), t2 as
(SELECT state, population_2019 as population
FROM `paul-henry-tremblay.covid19.population_2019_est`
where state = county
)
select t1.*, population
from t1
inner join  t2
on t1.state = t2.state
order by date
    """
    df = get_data_(path = 'states.csv', sql = sql, refresh = refresh)
    return df


In [30]:
def get_population_data(refresh = False):
    sql = """
     SELECT state, population_2019
FROM `paul-henry-tremblay.covid19.population_2019_est`
where state = county

    """
    df = get_data_(path = 'population.csv', sql = sql, has_date = False,
        refresh = refresh)
    return df


In [31]:
def get_hospital_data(refresh = False):
    sql = """
    with t1 as
(
SELECT date, state, currently_hospitalized as hospitalized, 
    new_hospitalized 
FROM `paul-henry-tremblay.covid19.hospitalized_by_day`
order by date
), t2 as
(SELECT state, population_2019 as population
FROM `paul-henry-tremblay.covid19.population_2019_est`
where state = county
)
select t1.*, population
from t1
inner join  t2
on t1.state = t2.state
order by date

    """
    df = get_data_(path = 'hospital.csv', sql = sql, refresh = refresh)
    return df


In [32]:
def get_us_cum_data(refresh = False):
    sql = """
    with hosp as
(
SELECT date, coalesce(sum( currently_hospitalized ),0) as hos,
coalesce(sum(new_hospitalized),0) as new_hos
FROM `paul-henry-tremblay.covid19.hospitalized_by_day`
group by date
),
cases_t as
(SELECT date, coalesce(sum( new_cases ),0) as cases,
coalesce(sum( new_deaths ),0) as deaths
FROM `paul-henry-tremblay.covid19.us_states_day_diff`
group by date
)
select h.date, cases, deaths, hos, new_hos
from hosp h
inner join
cases_t c
on c.date = h.date

    """
    df = get_data_(path = 'us_cum.csv', sql = sql, refresh = refresh)
    return df


In [33]:
def get_us_counties(refresh = False):
    sql = """
     with ny_pop as
 (select sum(population_2019) as population
 from covid19.population_2019_est
 where state = 'New York'
 and  county in ('Kings County', 'Queens County', 'Bronx County', 'New York County')
 )
 SELECT u.state, u.county, date, new_deaths, new_cases,
 case when u.county = 'New York City' then (select population from ny_pop) else
population_2019 end as population,
rucc_2013
FROM `paul-henry-tremblay.covid19.us_counties_diff` u
inner join covid19.population_2019_est p
on p.fips = u.fips
inner join covid19.rural_urban_codes_2013 r
on r.fips = u.fips
    """
    df = get_data_(path = 'us_counties.csv', sql = sql, refresh = refresh)
    return df


In [34]:
def get_gb(refresh = False):
    sql = """
     SELECT *
FROM
  `paul-henry-tremblay.covid19.world2`
where iso_code = 'GBR'
order by date
    """
    df = get_data_(path = 'gb.csv', sql = sql, refresh = refresh)
    return df

In [35]:
 def get_world(refresh = False):
    sql = """
         select * from `paul-henry-tremblay.covid19.world_cases_deaths_hosp`
    """
    df = get_data_(path = 'world.csv', sql = sql, refresh = refresh)
    return df


In [36]:
DF_POPULATION = get_population_data(refresh = REFRESH)
DF_STATES = get_state_data(refresh = REFRESH)
DF_HOSPITAL =  get_hospital_data(refresh = REFRESH)
DF_US_CUM = get_us_cum_data(refresh = REFRESH)
DF_COUNTIES = get_us_counties(refresh = REFRESH)
DF_GB = get_gb(refresh = REFRESH)
DF_WORLD = get_world(refresh = REFRESH)

In [37]:
def filter_list(state, x, y):
    z = list(zip(x,y))
    if state == 'Ohio':
        z = [x for x in z if x[1] < 10]
    z = [x for x in z if x[1] >= 0]
    x = [x[0] for x in z]
    y = [x[1] for x in z]
    return x, y


In [38]:
output_notebook()
#reset_output()
def make_cases(the_type, plot_width = 300, plot_height = 300, window =7,
              the_list = None, min_date = datetime.datetime(2020, 3, 1)):
    labels_dict = {'cases':'Cases/100,000',
                  'deaths': 'Deaths/1,000,000',
                   'hospitalized': "Hospitalized/100,000",

                  }
    if the_type in (['deaths', 'cases']):
        df_us = DF_STATES
    else:
        df_us = DF_HOSPITAL
    plots = []
    def make_pop_dict():
        population = DF_POPULATION
        pop_dict = {}
        states = population['state']
        nums = population['population_2019']
        for counter, i in enumerate(states):
            pop_dict[i] = nums[counter]
        return pop_dict
    pop_dict = make_pop_dict()
    def get_y(the_type, state, df_s, pop_dict, window, per = 100000):
        y = df_state[the_type].rolling(window).mean()
        y_ = []
        for i in y:
            if math.isnan(i):
                y_.append(0)
            else:
                y_.append(i)
        y = [x/pop_dict[state] * per for x in y_]
        return y
    if not the_list:
        the_list = sorted(set(df_us['state'].tolist()))
    

    for i in the_list:
        if i in ['Guam', 'Northern Mariana Islands', 'Puerto Rico', 'Virgin Islands']:
            continue
        df_state = df_us[(df_us['state'] == i) & (df_us['date'] >= min_date) ]
        if the_type == 'cases':
            df_state = df_state[(df_state['cases'] > 0) ]

        per = 100000
        if the_type == 'deaths':
            per = 1e6
        elif the_type == 'hospitalized':
            per = 100000
        y = get_y(the_type = the_type, 
                  state = i, df_s = df_state, pop_dict = pop_dict, 
                  window = window, per = per)
        x = df_state['date'].tolist()
        #x, y = filter_list(i, x, y)
        x, y = x[window:], y[window:]
        p = figure(x_axis_type = 'datetime', title = i, 
                 plot_width = plot_width , plot_height = plot_height)
        p.line(x = x, y = y)
        p.yaxis.axis_label = labels_dict[the_type]
        p.xaxis.axis_label = 'Date'


        plots.append(p)
    return gridplot(plots, ncols = 2)

the_list = None
the_list = ['California',  'Arizona', 'Georgia',
     'Michigan', 'Massachusetts', 'Pennsylvania', 'Indiana', 'North Carolina', 'Texas',
      'Washington', 'Florida','Georgia', 'Oregon', 'Ohio', 'Missouri']
#the_list = None
the_type = 'hospitalized'
show(make_cases(the_type = the_type, the_list = the_list,  window = 7,
               min_date = datetime.datetime(2021,5,5)))

        


In [39]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [40]:
output_notebook()
#reset_output()
def bam_yam(the_type, plot_width = 300, plot_height = 300, window =7,
              the_list = None, min_date = datetime.datetime(2020, 3, 1)):
    labels_dict = {'cases':'Cases/100,000',
                  'deaths': 'Deaths/1,000,000',
                   'hospitalized': "Hospitalized/100,000",

                  }
    if the_type in (['deaths', 'cases']):
        df_us = get_data()
    else:
        df_us = get_hospital()
    plots = []
    def add_yaneer(state, p):
        ratio_dict = {'California': (2500, 1.025),
         'Florida': (5100, 1.015),'Arizona': (700,1.035), 'Georgia':(1300,1.04),
        'Massachusetts':(2200, 1.02), 'Pennsylvania':(4000, 1.02),
        'Indiana':(1000, 1.04), 'Michigan':(5000, 1.03), 'North Carolina':(2000, 1.07),
        'Texas':(4000, 1.05)
        }
       
        y = [ratio_dict.get(state, (1000, None))[0]]
        x = [datetime.datetime(2021,4, 1)]
        r = ratio_dict.get(state, (None, 1))[1]
        while 1:
            next_date = x[-1] + datetime.timedelta(days = 1)
            if next_date > datetime.datetime(2021, 5,1):
                break
            x.append(next_date)
            y.append(exp_func(len(x), initial = y[0], ratio = r))
        p.line(x =x, y = y, color = 'red', legend_label = 'Yaneer')
        return
    if not the_list:
        the_list = sorted(set(df_us['state'].tolist()))

    for i in the_list:
        df_state = df_us[(df_us['state'] == i) & (df_us['date'] >= min_date) 
            & (df_us['date'] <= datetime.datetime(2021,5,1)) ]
        y = df_state[the_type].rolling(window).mean()
        x = df_state['date'].tolist()
        x, y = filter_list(i, x, y)
        x, y = x[window -1:], y[window -1:]
        p = figure(x_axis_type = 'datetime', title = i, 
                 plot_width = plot_width , plot_height = plot_height)
        p.line(x = x, y = y)
        add_yaneer(state= i, p = p)
        p.xaxis.axis_label = 'Date'
        p.yaxis[0].formatter = NumeralTickFormatter(format="0,")
        if i in ['Massachusetts', 'Pennsylvania', 'Indiana', 'Michigan',
                'North Carolina', 'Texas']:
            p.legend.location = 'top_center'

        plots.append(p)
    return gridplot(plots, ncols = 3)

the_list = ['California', 'Florida', 'Arizona', 'Georgia',
     'Michigan', 'Massachusetts', 'Pennsylvania', 'Indiana', 'North Carolina', 'Texas']
#the_list = ['Massachusetts']
#show(bam_yam(the_type = 'cases', the_list = the_list,  window = 7,
               #min_date = datetime.datetime(2021,1,15)))

        


In [41]:
def hospitals(plot_width = 300, plot_height = 300, ncols = 3, 
              use_new_hos = False, by_pop = False, window = 7,
             max_date = datetime.datetime(2020,10,1)):
    output_notebook()
    df = DF_US_CUM
    df = df.assign(deaths_million = df['deaths']/382e6 * 1e6)
    df = df.assign(cases_million = df['cases']/382e6 * 1e5)
    df = df.assign(new_hos_100k = df['new_hos']/382e6 * 1e6)
    df = df.assign(hos_100k = df['hos']/382e6 * 1e6)


    if use_new_hos and by_pop:
        df = df[(df['new_hos'] > 0) & (df['new_hos'] < 5000)]
        hos_key = 'new_hos_100k'
        hos_label = 'Newly Hospitalized/1M'
    elif use_new_hos:
        df = df[(df['new_hos'] > 0) & (df['new_hos'] < 5000)]
        hos_key = 'new_hos'
        hos_label = 'Newly Hospitalized' 
    elif not use_new_hos and by_pop:
        hos_key = 'hos_100k'
        hos_label = 'Currently Hospitalized/1M' 
    else:
        hos_key = 'hos'
        hos_label= 'Currenty Hospitalized'
    if by_pop:
        deaths_key = 'deaths_million'
        cases_key = 'cases_million'
        cases_label = 'Cases/100K'
        deaths_label = 'Deaths/1M'
    else:
        deaths_key = 'deaths'
        cases_key = 'cases'
        deaths_label = 'Deaths'
        cases_label = 'Cases'
    df = df[df['date'] > max_date]
    p1 = figure (x_axis_type = 'datetime',  plot_width = plot_width, plot_height = plot_height,
                 title = "USA Hospitalizations")
    p1.line(x = df['date'].tolist(), y = df[hos_key].rolling(window).mean().tolist())
    p1.xaxis.axis_label = 'Date'
    p1.yaxis.axis_label = hos_label

    p2 = figure (x_axis_type = 'datetime',  plot_width = plot_width, plot_height = plot_height, 
                 title = "USA deaths")
    p2.line(x = df['date'].tolist(), y = df[deaths_key]. rolling(window).mean())
    p2.xaxis.axis_label = 'Date'
    p2.yaxis.axis_label = deaths_label

    p3 = figure (x_axis_type = 'datetime',  plot_width = plot_width, plot_height = plot_height, 
                 title = "USA Cases")
    p3.line(x = df['date'].tolist(), y = df[cases_key]. rolling(window).mean())
    p3.xaxis.axis_label = 'Date'
    p3.yaxis.axis_label = cases_label
    if not by_pop:
        p1.yaxis[0].formatter = NumeralTickFormatter(format="0,")
        p2.yaxis[0].formatter = NumeralTickFormatter(format="0,")
        p3.yaxis[0].formatter = NumeralTickFormatter(format="0,")


    g = gridplot([p1, p2, p3], ncols = ncols)
    return g

show(hospitals(use_new_hos = False, by_pop = True, window = 7,
              max_date = datetime.datetime(2020,4,1), ncols=2))


In [42]:
def usa(plot_width = 500, plot_height = 500, ncols = 3, 
        min_date = datetime.datetime(2020, 10, 1), 
        target_date = datetime.datetime(2020,8,1), window = 7,
       use_mean_as_base = False):
    output_notebook()
    if target_date < min_date:
        target_date = min_date + datetime.timedelta(days = window)
    def make_y(l):
        final = []
        if use_mean_as_base:
            base= float(np.nanmean([x[1] for x in l]))
        else:
            base = None
            for i in l:
                if i[0] == target_date:
                    base = i[1]
                    break
        for i in l:
            final.append( i[1]/base * 100)
        return final
    df = DF_US_CUM
    df.sort_values(by=['date'], inplace = True)
    df = df[df['date'] > min_date]
    hos = df['hos'].rolling(window).mean().tolist()
    dates = df['date'].tolist()
    zz = list(zip(dates, hos))
    y_hos = make_y(zz)
    deaths = df['deaths'].rolling(window).mean().tolist()
    zz = list(zip(dates, deaths))
    y_deaths = make_y(zz)
    cases = df['cases'].rolling(window).mean().tolist()
    zz = list(zip(dates, cases))
    y_cases= make_y(zz)
    p1 = figure (x_axis_type = 'datetime',  plot_width = plot_width, 
                 plot_height = plot_height,
                 title = "USA")
    diff = [y_cases[x] - y_deaths[x] for x in range(len(y_hos))]
    p2 = figure(title = "cases minus deaths", x_axis_type = 'datetime')
    p2.line(x = dates, y = diff)
    p1.line(x = dates, y = y_hos, legend_label = "hospitalized")
    p1.line(x = dates, y = y_deaths, legend_label = "deaths", color = 'red')
    p1.line(x = dates, y = y_cases, legend_label = "cases", color = 'green')

    p1.xaxis.axis_label = 'Date'
    p1.yaxis.axis_label = 'Percent Change'
    p1.yaxis[0].formatter = NumeralTickFormatter(format="0,")
    arrow_top = 150
    p1.legend.location = 'top_left'
    arrow1 = Arrow(x_start=datetime.datetime(2021,3,29), y_start=arrow_top -13, 
                   x_end=datetime.datetime(2021,3,29), y_end=100,
               line_color='black', line_alpha=0.7,
                line_width=1, end=OpenHead()
               )
    p1.add_layout(arrow1)
    label = Label(x=datetime.datetime(2021,3,29), y=arrow_top, x_offset=-40,  text="Impending",
                 text_font_size = '10pt')
    label2 = Label(x=datetime.datetime(2021,3,29), y=arrow_top - 11, x_offset=-30,  
                   text="Doom!", text_font_size = '10pt')

    p1.add_layout(label)
    p1.add_layout(label2)

    #p1.circle(x = datetime.datetime(2021,3,29), y = 75, color = 'red',
             #size = 10, legend_label = 'Impenpding Doom')


    #return p1

    g = gridplot([p1, ], ncols = ncols)
    return g

show(usa(min_date = datetime.datetime(2020, 11, 1), use_mean_as_base = False))

In [43]:
output_notebook()
def make_per_change(plot_width = 300, plot_height = 300, window =7,
              the_list = None, target_date = datetime.datetime(2020, 10, 1),
                   use_mean = False):
    labels_dict = {'cases':'Cases/100,000',
                  'deaths': 'Deaths/1,000,000',
                   'hospitalized': "Hospitalized/100,000",

                  }
    df_cases = DF_STATES
    df_hosp = DF_HOSPITAL
    plots = []
    
    def get_y(the_type, state, df_s, window, target_date):
        y = df_s[the_type].rolling(window).mean()

        final = []
        dates = df_s['date']
        zz = list(zip(dates, y))
        base = None
        for i in zz:
            if i[0] == target_date:
                base = i[1]
                if base == 0:
                    base = 1
                break
        if use_mean:
            base= float(np.nanmean([x for x in y]))
        if base == 0:
            base = 1

        for i in zz:
            final.append((i[0],i[1]/base * 100))
        return final
    if not the_list:
        the_list = sorted(set(df_cases['state'].tolist()))

    for i in the_list:
        if i in ['Guam', 'Northern Mariana Islands', 'Puerto Rico', 'Virgin Islands']:
            continue
        df_state = df_cases[(df_cases['state'] == i) & 
           (df_cases['date'] >= target_date - datetime.timedelta(window))]
        df_state_hosp = df_hosp[(df_hosp['state'] == i) & 
           (df_hosp['date'] >= target_date - datetime.timedelta(window))]
        x_y_cases = get_y(the_type = 'cases', 
                  state = i, df_s = df_state, 
                  window = window, target_date = target_date)
        x_y_deaths = get_y(the_type = 'deaths', 
                  state = i, df_s = df_state, 
                  window = window, target_date = target_date)
        x_y_hosp = get_y(the_type = 'hospitalized', 
                  state = i, df_s = df_state_hosp, 
                  window = window, target_date = target_date)
        p = figure(x_axis_type = 'datetime', title = i, 
                plot_width = plot_width , plot_height = plot_height)
        p.line(x = [x[0] for x in x_y_cases], y = [x[1] for x in x_y_cases],
              legend_label = 'cases')
        p.line(x = [x[0] for x in x_y_deaths], y = [x[1] for x in x_y_deaths],
              legend_label = 'deaths', color = 'red')
        p.line(x = [x[0] for x in x_y_hosp], y = [x[1] for x in x_y_hosp],
              legend_label = 'hospital', color = 'green')
        p.xaxis.axis_label = 'Date'
        if i == 'Michigan' or i == 'Minnesota':
            p.legend.location = 'top_left'

        else:
            p.legend.location = 'top_right'

        plots.append(p)
    return gridplot(plots, ncols = 3)

show(make_per_change( the_list = None, 
                     window = 7, target_date = datetime.datetime(2021, 1, 1),
                    use_mean = True),
        )

        


In [44]:
def make_rt_change(window = 7):
    df_cases = get_data()
    df_cases = df_cases[(df_cases['state'] != 'Guam')
        & (df_cases['state'] != 'Northern Mariana Islands')
        & (df_cases['state'] != 'Puerto Rico')
         & (df_cases['state'] != 'Virgin Islands')
             ] 
    df_hosp = get_hospital()
    df_hosp = df_hosp[(df_hosp['state'] != 'Guam')
        & (df_hosp['state'] != 'Northern Mariana Islands')
        & (df_hosp['state'] != 'Puerto Rico')
         & (df_hosp['state'] != 'Virgin Islands')
             ]    

    for i in sorted(set(df_cases['state'].to_list())):
        df_state = df_cases[df_cases['state'] == i]
        cases = df_state['cases'].rolling(window).mean().tolist()
        dates = df_state['date'].to_list()
        deaths = df_state['deaths'].rolling(window).mean().to_list()
        df_state_hosp = df_hosp[df_hosp['state'] == i]

        dates_hosp = df_state_hosp['date'].to_list()
        hosp = df_state_hosp['new_hospitalized'].rolling(window).mean().to_list()

        def get_per_change(dates, incidents):
            zz = zip(dates, incidents)
            zz_ = filter(lambda x: x[0] >= datetime.datetime(2021,3,1), zz)
            zz_ = filter(lambda x: x[0] <= datetime.datetime(2021,3,15), zz_)
            min_incidents = min(zz_, key = lambda x: x[1])
            if min_incidents[1] == 0:
                return math.nan
            per_change = (incidents[-1]- min_incidents[1])/min_incidents[1] * 100
            return per_change
        per_cases = get_per_change(dates, cases)
        per_deaths = get_per_change(dates, deaths)
        per_hosp = get_per_change(dates_hosp, hosp)
        print(i, per_cases, per_deaths, per_hosp)

    
#make_rt_change()

In [45]:
def king_county(plot_width = 450, plot_height = 450, window = 1,
     min_date = datetime.datetime(2021,1,1), window_death = 14):
    df = DF_COUNTIES
    plots = []
    df = df.sort_values( by="date")
    df = df[(df['new_cases'] > 0) & (df['new_cases'] < 1000)]
    df = df.assign(per_pop = df['new_cases']/df['population'] * 100000)
    df = df.assign(per_pop_death = df['new_deaths']/df['population'] * 1e6)


    p = figure(x_axis_type = 'datetime', title = 'King', 
                 plot_width = plot_width , plot_height = plot_height)
    df = df[(df['state'] == 'Washington') 
            & (df['date'] >= min_date)
           & (df['county'] == 'King')]
    y = df['new_cases'].rolling(window).mean()
    y2 = df['per_pop_death'].rolling(window_death).mean()
    y = df['per_pop'].rolling(window).mean()
    x = df['date']
    p.line(x = x, y = y)
    y_ = [x for x in y]
    x_ = [x for x in x]
    zz = zip(x_, y_)
    the_min = (None, 1e6)
    the_max = (None, -100)
    for i in zz:
        if i[0] < datetime.datetime(2021, 2, 1) or i[0] > datetime.datetime(2021,5,1):
            
            continue
        if i[1] < the_min[1]:
            the_min = i
        if i[1] > the_max[1]:
            the_max = i
    print(the_min)
    print(the_max)
    the_len = (the_max[0] - the_min[0]).days
    r = (the_max[1] - the_min[1])/the_len
    print(r)
    p.yaxis.axis_label = 'Cases'
    p.xaxis.axis_label = 'Date'
    p2 = figure(x_axis_type = 'datetime', title = 'King', 
                 plot_width = plot_width , plot_height = plot_height)
    p2.line(x = x, y = y2)
    plots.append(p)
    plots.append(p2)
    return gridplot(plots, ncols = 3)

show(king_county(window = 7, min_date = datetime.datetime(2020,4,1),
                window_death = 30),
    )

(Timestamp('2021-03-09 00:00:00'), 7.438421852244406)
(Timestamp('2021-04-26 00:00:00'), 21.991855910983457)
0.3031965428903969


In [46]:
def masks_vs_vaccines(window = 7, plot_width = 350, plot_height = 350):
    plots = []
    df = pd.read_csv('masks_vs_vaccines.csv')
    p = figure()
    x = df['n']
    y_mask = df['mask_per_change'].rolling(window).mean()
    y_vaccine = df['vaccine_per_change'].rolling(window).mean()
    p = figure(plot_width = plot_width, plot_height= plot_height,
              title = 'Vaccine')
    #p.line(x = x, y= y_mask, legend_label = 'WA mask mandate')
    p.line(x = x, y = y_vaccine,  )
    p.legend.location = "top_left"
    p.xaxis.axis_label = 'Days since start'
    p.yaxis.axis_label = 'Percentage Change'
    p2 = figure(plot_height = plot_height, plot_width = plot_width,
               title = 'Mask mandate')
    p2.line(x = x, y= y_mask, )
    p2.legend.location = "top_left"
    p2.xaxis.axis_label = 'Days since start'
    p2.yaxis.axis_label = 'Percentage Change'
    return gridplot([p, p2], ncols = 3)


    return p
show(masks_vs_vaccines())

You are attempting to set `plot.legend.location` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.



In [47]:
def gb(window = 7, min_date = datetime.datetime(2020, 1, 31)):
    df = DF_GB
    df = df.assign(cases_per_pop = df['new_cases']/df['population'] * 1e5)
    df = df.assign(deaths_per_pop = df['new_deaths']/df['population'] * 1e6)

    df = df[df['date'] >= min_date]
    dates = df['date'].tolist()
    dates = [x for x in dates if x >= datetime.datetime(2021,5,25)]

    cases = df['cases_per_pop'].rolling(window).mean()
    cases = [x for x in cases]
    y = cases[-1 * len(dates):]
    the_len = len(y)
    y = [y[0], y[-1]]
    r = (y[-1] - y[0])/the_len
    print(datetime.datetime(2021,5,25), y[0])
    print(dates[-1], y[1])
    print(r)
    dates = [dates[0], dates[-1]]

    p = figure(x_axis_type = 'datetime')
    p.line(x = df['date'], y = cases)
    p.line(x = dates, y = y, color = 'red')
    return p

show(gb(window = 8, min_date = datetime.datetime(2021,1,1)))

    

2021-05-25 00:00:00 2.6874096168629986
2021-06-23 00:00:00 16.018029283326207
0.444353988882107


In [48]:
def gb2(window=7, min_date = datetime.datetime(2021,5,1),
       plot_width = 350, plot_height = 350, state = 'California'):
    def get_slope(df):
        df_ = df[df['date'] >= datetime.datetime(2021,6,1)]
        hos = df_['hospitalized'].tolist()
        pop_ = df['population'].tolist()
        hos = [x/pop_[0] * 1e5 for x in hos]

        slope = (hos[-1] - hos[0])/len(hos)
        for_1 = 1/slope
        print(slope, for_1)
    df = DF_WORLD
    df2 = DF_HOSPITAL
    df_states = DF_STATES
    df_ca = df_states[(df_states['state'] == state) &
            (df_states['date'] > min_date)]
    df_ca=df_ca.assign(deaths_per_pop = df_ca['deaths']/df_ca['population'] * 1e6)

    df = df.assign(cases_per_pop = df['new_cases']/df['population'] * 1e5)
    df = df.assign(deaths_per_pop = df['new_deaths']/df['population'] * 1e6)
    df = df.assign(hospitalized_per_pop = df['hospitalized']/df['population'] * 1e5)
    df2 = df2.assign(hospitalized_per_pop = df2['hospitalized']/df2['population'] * 1e5)
    df2 = df2[df2['state'] == state]
    
    df = df[df['iso_code'] == 'GBR']
    df = df[df['date'] > min_date]
    df = df[df['hospitalized'] > 0]
    df2 = df2[df2['date'] > min_date]
    df2 = df2[df2['hospitalized'] > 0]

    p = figure(x_axis_type = 'datetime', plot_height = plot_height,
              plot_width = plot_width, title="Great Britain & CA Hospitalized/100K")
    p2 = figure(x_axis_type = 'datetime', plot_height = plot_height,
              plot_width = plot_width, title="Great Britain & CA deaths/1M")
    hosp_gb = df['hospitalized_per_pop'].rolling(window).mean()
    hosp_ca = df2['hospitalized_per_pop'].rolling(window).mean()
    deaths_gb = df['deaths_per_pop'].rolling(window).mean()
    deaths_ca = df_ca['deaths_per_pop'].rolling(window).mean()

    p.line(x = df['date'], y = hosp_gb, legend_label= 'GB')
    p2.line(x = df['date'], y = deaths_gb, legend_label= 'GB')
    p2.line(x = df_ca['date'], y = deaths_ca, legend_label= 'CA', color = 'red')


    if state:
        p.line(x = df2['date'], y = hosp_ca, color = 'red', legend_label = 'CA')
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Hospitalized/100K'
    p2.xaxis.axis_label = 'Date'
    p2.yaxis.axis_label = 'Deaths/1M'

    get_slope(df)
    #return p
    return p, p2
    return gridplot([p, p2], ncols = 1)

    return p
r = 40
p1, p2 = gb2(min_date = datetime.datetime(2021,1,1), window = 7,
       plot_width= 16 * r, plot_height = 9 * r, state = 'California')
show(p1)
show(p2)

0.0406844503898736 24.57941524137932


In [66]:
def gb3(window=7, min_date = datetime.datetime(2021,5,1),
       plot_width = 350, plot_height = 350,
       days_adjust = 0):
    df = DF_WORLD
    df = df[df['iso_code'] == 'GBR']
    df = df[df['date'] >= min_date]
    df_hosp = df[df['hospitalized'] > 0]
    df_t = df[df['date'] == min_date]
    df_t_hosp = df_hosp[df_hosp['date'] == min_date]
    base_cases = df_t['new_cases'].tolist()[0]
    base_hospital = df_t_hosp['hospitalized'].tolist()[0]
    df = df.assign(cases_per_change = df['new_cases']/base_cases * 100)
    df_hosp = df_hosp.assign(hospitalized_per_change = df_hosp['hospitalized']/base_hospital * 100)
    p = figure(x_axis_type = 'datetime', plot_width = plot_width,
              plot_height = plot_height, title = "Great Britain Cases & Hospitalizations")
    cases = df['cases_per_change'].rolling(window).mean()

    hospitalized = df_hosp['hospitalized_per_change'].rolling(window).mean()
    dates = df['date'].tolist()
    dates_hosp = df_hosp['date'].tolist()
    dates_10 = [x - datetime.timedelta(days=days_adjust) for x in dates_hosp]
    p.line(x = dates, y = cases, legend_label = "cases")
    p.line(x = dates_10, y = hospitalized, color = 'red', 
           legend_label = 'hospitalized')
    p.xaxis.axis_label = 'Date'
    p.yaxis.axis_label = 'Percentage Change'
    p2 = gb2(min_date = min_date, plot_height = plot_height,
            plot_width = plot_width, state = 'California')
    p.legend.location = "top_left"
    return p
    return gridplot([p, p2], ncols = 3)
r = 40
show(gb3(min_date = datetime.datetime(2021,5,1),
    plot_width = int(16/1) * r, plot_height = 9 * r,
        days_adjust = 0))

0.0406844503898736 24.57941524137932


In [57]:
def gb4(window=7, min_date = datetime.datetime(2021,5,1),
       plot_width = 350, plot_height = 350,
       ):
    df = DF_WORLD
    df = df[df['iso_code'] == 'GBR']
    df = df[df['date'] >= min_date]
    df_hosp = df[df['hospitalized'] > 0]
    df = df.assign(cases_per_100k = df['new_cases']/df['population'] * 1e5)
    df_hosp = df_hosp.assign(hospitalized_per_100k = df['hospitalized']/df['population'] * 1e5)

    p1 = figure(x_axis_type = 'datetime', plot_width = plot_width,
              plot_height = plot_height, title = "Great Britain Cases",
               )
    cases = df['cases_per_100k'].rolling(window).mean()

    hospitalized = df_hosp['hospitalized_per_100k'].rolling(window).mean()
    dates = df['date'].tolist()
    dates_hosp = df_hosp['date'].tolist()
    p1.line(x = dates, y = cases)
    p1.xaxis.axis_label = 'Date'
    p1.yaxis.axis_label = 'cases/100K'
    p2 = figure(x_axis_type = 'datetime', plot_width = plot_width,
              plot_height = plot_height, title = "Great Britain Hospitalized",
               )
    p2.line(x = dates_hosp, y = hospitalized)
    p1.line(x = dates_hosp, y = hospitalized)

    #p1.legend.location = "top_left"
    p1.xaxis.visible = False
    p2.yaxis.axis_label = 'hospitalized/100K'

    return gridplot([p1, p2], ncols = 1)
r = 40
show(gb4(min_date = datetime.datetime(2021,1,1),
    plot_width = int(16) * r, plot_height = int(9/1) * r,
        ))