In [5]:
import datetime
import time
from google.cloud import bigquery  # This is for running sql to get the data
import pandas as pd
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import (HoverTool, ColumnDataSource,
                          DatetimeTickFormatter, FuncTickFormatter, Range1d)
from bokeh.layouts import gridplot


# This is for future use if can.
def make_dataframe_helper(col_specs, data_lst):
    """Constructs dataframe from list of tuples/lists."""
    dict = {}
    for col_name, idx in col_specs:
        dict[col_name] = [x[idx] for x in data_lst]
    return pd.DataFrame.from_dict(dict)

# define a dataframe, with parametres of column names and the data
def dataframe_from_sql(col_names, sql_str):
    dict = {}
    for x in col_names:
        dict[x] = []

    client = bigquery.Client(project='paul-henry-tremblay')
    result = client.query(sql_str)

    for rows in result:
        for x in col_names:
            dict[x].append(rows.get(x))
    return pd.DataFrame.from_dict(dict)
    
# In a specific state, to plot a county
def make_county_graph(state, county, df, max_y=None):
    # Filter state
    df_county = df.loc[(df['state'] == state) & (df['county'] == county)]
    # Make the datetime formate suitable for Bokeh graph and Hovertools
    df_county.loc[:,'date'] = pd.to_datetime(df_county.loc[:,'date'])
    df_county.loc[:,'ToolTipDates'] = df_county.loc[:,'date'].map(lambda x: x.strftime("%b %d")) 
    
    source = ColumnDataSource(df_county)
      
    p = figure(plot_height=400,
               plot_width=450,
               title='{county} County {state} State'.format(state=state,
                                                            county=county),
               x_axis_label="Date",
               y_axis_label="Daily new cases and deaths",
               toolbar_location="below")
    
    # In the same grid plot, make each plot has the same max value of Y axis' allowing comparison
    if max_y is not None: 
        p.y_range = Range1d(0, max_y)

    # vbar with y axis value of daily new cases
    p.vbar(x='date',
           top='daily_cases',
           bottom = -10,
           source = source,
           line_width=3,
           width=datetime.timedelta(days=0.5), #  0.5 instead of 1 can help to show some gap between bars
           color='blue',
           legend_label='daily_cases')
    
    # vbar with y axis value of daily new deaths
    p.vbar(x='date',
           top='daily_deaths',
           bottom=-10,
           source = source,
           line_width=3,
           width=datetime.timedelta(days=0.5), # 0.5 instead of 1 can help to show some gap between bars
           color='red',
           legend_label='daily_deaths')
    
       
    p.xaxis.formatter = DatetimeTickFormatter(days=['%m/%d', '%a%d%y'])
    p.legend.location = 'top_left'
    p.xgrid.grid_line_color = None
    # add hovertools so that related values can be seen when hover the cursor around the plot
    p.add_tools(HoverTool(tooltips=[('date', '@ToolTipDates'),
                                    ('daily_cases', '@daily_cases'),
                                    ('daily_deaths', '@daily_deaths')], mode='vline'))
       
    p.legend.label_text_font_size = '8pt'
    return p


# Plot counties graphs in grid
def all_counties_graph(state, df):

    # Filter data down to specific state.
    df_state = df.loc[df['state'] == state]
    
    # Find list of all counties in this state then sort this list.
    counties = sorted(list(set(df_state['county'])))

    # Find the largest number of cases, exclude the none number if there are.
    # Inflating 2% of the max_cases number to make the graph more elegant.
    max_cases = max(x for x in df_state['daily_cases'] if pd.notna(x)) * 1.02

    # Populating a list of individual county graph into a grid and plot them.
    p_list = []
    for i in counties:
        p_list.append(make_county_graph(state, i, df_state, max_y=max_cases))
    
    # show all the plots in a grid with 2 columns each row
    grid = gridplot(p_list, ncols=2)
    
    show(grid)

# Provide a list of states, to show  the plots at county level of these states
def list_of_state_graph(states_lst, df):
    for state in states_lst:
        all_counties_graph(state, df)
        time.sleep(1)

# Generate file here                        
output_notebook()

# Provide the sql query to get the data
sql_str = """
    select date, state, county,
    cases-lag(cases, 1) over(partition by county order by date) as daily_cases,
    deaths-lag(deaths,1) over(partition by county order by date) as daily_deaths
    from covid19.us_counties
    where state in ('Washington', 'Alabama')
    and date > date_sub(current_date(), interval 1 MONTH)
    and lower(substr(county,1,1)) in ('a','k')
    order by state, county, date"""

# The columns will show in the DataFrame and ColumnDataSource
col_names = ['date', 'state', 'county', 'daily_cases', 'daily_deaths']

# Cal the function
df = dataframe_from_sql(col_names, sql_str)

# Call the list of state function
list_of_state_graph(["Alabama", "Washington"], df)

