In [1]:
import datetime
import time
from google.cloud import bigquery  # This is for running sql to get the data
import pandas as pd
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import (CategoricalColorMapper, HoverTool, ColumnDataSource,
                          Panel, DatetimeTickFormatter, FuncTickFormatter,
                          SingleIntervalTicker, LinearAxis, Range1d)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, Tabs,
                                  CheckboxButtonGroup, TableColumn, DataTable,
                                  Select)
from bokeh.layouts import gridplot, column, row, WidgetBox
from bokeh.palettes import Category20_16




def dataframe_from_sql(col_names, sql_str):
    dict = {}
    for x in col_names:
        dict[x] = []

    client = bigquery.Client(project='paul-henry-tremblay')
    result = client.query(sql_str)

    for rows in result:
        for x in col_names:
            dict[x].append(rows.get(x))
    # source = ColumnDataSource(df)
    return pd.DataFrame.from_dict(dict)
    # return source


def make_county_graph(state, county, df, max_y=None):
    # Filter state
    df_county = df[(df['state'] == state) & (df['county'] == county)]
    df_county_dates = [
        datetime.datetime(x.year, x.month, x.day) for x in df_county['date']
    ]

    p = figure(plot_height=400,
               plot_width=400,
               title='{county} County {state} State'.format(state=state,
                                                            county=county),
               x_axis_label="Date",
               y_axis_label="Daily new cases and deaths",
               toolbar_location="right")

    if max_y is not None:
        p.y_range = Range1d(0, max_y)

    p.line(x=df_county_dates,
           y=df_county['daily_cases'],
           line_width=2,
           color='blue',
           legend_label='daily_cases')

    p.line(x=df_county_dates,
           y=df_county['daily_deaths'],
           line_width=3,
           color='red',
           legend_label='daily_deaths')
    p.xaxis.formatter = DatetimeTickFormatter(days=['%m/%d', '%a%d%y'])
    p.legend.location = 'top_left'
    p.xgrid.grid_line_color = None
    p.add_tools(
        HoverTool(tooltips=[('date', '@date'), ('daily_cases', '@daily_cases'),
                            ('daily_deaths', '@daily_deaths')],
                  mode='vline'))

    p.legend.label_text_font_size = '8pt'
    return p


# Plot counties graphs in grid
def all_counties_graph(state, df):

    # Filter data down to specific state.
    df_state = df[df['state'] == state]
    print(df_state)

    # Find list of all counties in this state then sort this list.
    counties = sorted(list(set(df_state['county'])))

    # Find the largest number of cases, exclude the none number if there are.
    # Inflating 2% of the max_cases number to make the graph more elegant.
    max_cases = max(x for x in df_state['daily_cases'] if pd.notna(x)) * 1.02

    # Populating a list of individual couty graph into a grid and plot them.
    p_list = []
    for i in counties:
        p_list.append(make_county_graph(state, i, df_state, max_y=max_cases))

    grid = gridplot(p_list, ncols=3)
    show(grid)


def list_of_state_graph(states_lst, df):
    for state in states_lst:
        all_counties_graph(state, df)
        time.sleep(1)
        
output_notebook()

sql_str = """
    select date, state, county,
    cases-lag(cases, 1) over(partition by county order by date) as daily_cases,
    deaths-lag(deaths,1) over(partition by county order by date) as daily_deaths
    from covid19.us_counties
    where state in ('Washington', 'Alabama')
    and date > date_sub(current_date(), interval 1 MONTH)
    and lower(substr(county,1,1)) in ('a','k')
    order by state, county, date"""

col_names = ['date', 'state', 'county', 'daily_cases', 'daily_deaths']

df = dataframe_from_sql(col_names, sql_str)
# source = ColumnDataSource(df)

# c.all_counties_graph('Washington', df)
# c.all_counties_graph('Alabama', df)
list_of_state_graph(["Alabama", "Washington"], df)


          date    state   county  daily_cases  daily_deaths
0   2020-06-08  Alabama  Autauga          NaN           NaN
1   2020-06-09  Alabama  Autauga         10.0           0.0
2   2020-06-10  Alabama  Autauga         13.0           1.0
3   2020-06-11  Alabama  Autauga         20.0           0.0
4   2020-06-12  Alabama  Autauga         11.0           0.0
5   2020-06-13  Alabama  Autauga          8.0           0.0
6   2020-06-14  Alabama  Autauga         26.0           0.0
7   2020-06-15  Alabama  Autauga          7.0           0.0
8   2020-06-16  Alabama  Autauga          5.0           1.0
9   2020-06-17  Alabama  Autauga          2.0           0.0
10  2020-06-18  Alabama  Autauga         25.0           1.0
11  2020-06-19  Alabama  Autauga         11.0           0.0
12  2020-06-20  Alabama  Autauga         20.0           1.0
13  2020-06-21  Alabama  Autauga          3.0           0.0
14  2020-06-22  Alabama  Autauga          8.0           0.0
15  2020-06-23  Alabama  Autauga        

           date       state     county  daily_cases  daily_deaths
28   2020-06-08  Washington      Adams          NaN           NaN
29   2020-06-09  Washington      Adams          1.0           0.0
30   2020-06-10  Washington      Adams          7.0           0.0
31   2020-06-11  Washington      Adams          2.0           0.0
32   2020-06-12  Washington      Adams          0.0           0.0
..          ...         ...        ...          ...           ...
191  2020-07-01  Washington  Klickitat          0.0           0.0
192  2020-07-02  Washington  Klickitat          7.0           0.0
193  2020-07-03  Washington  Klickitat          0.0           0.0
194  2020-07-04  Washington  Klickitat          1.0           0.0
195  2020-07-05  Washington  Klickitat          0.0           0.0

[168 rows x 5 columns]
