In [1]:
import os
import datetime
import time
from google.cloud import bigquery  # This is for running sql to get the data
import pandas as pd
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, save
from bokeh.models import (HoverTool, ColumnDataSource,
                         DatetimeTickFormatter, FuncTickFormatter, Range1d)
from bokeh.layouts import gridplot


# This is for future use if can.
def make_dataframe_helper(col_specs, data_lst):
    """Constructs dataframe from list of tuples/lists."""
    dict = {}
    for col_name, idx in col_specs:
        dict[col_name] = [x[idx] for x in data_lst]
    return pd.DataFrame.from_dict(dict)


def dataframe_from_sql(col_names, sql_str):
    dict = {}
    for x in col_names:
        dict[x] = []

    client = bigquery.Client(project='paul-henry-tremblay')
    result = client.query(sql_str)

    for rows in result:
        for x in col_names:
            dict[x].append(rows.get(x))
    # source = ColumnDataSource(df)
    return pd.DataFrame.from_dict(dict)
    # return source


def make_county_graph(state, county, df, max_y=None):
    # Filter state
    df_county = df[(df['state'] == state) & (df['county'] == county)]
    df_county_dates = [
        datetime.datetime(x.year, x.month, x.day) for x in df_county['date']
    ]

    p = figure(plot_height=400,
               plot_width=450,
               title='{county} County {state} State'.format(state=state,
                                                            county=county),
               x_axis_label="Date",
               y_axis_label="Daily new cases and deaths",
               toolbar_location="right")

    if max_y is not None:
        p.y_range = Range1d(0, max_y)

    p.line(x=df_county_dates,
           y=df_county['daily_cases'],
           line_width=2,
           color='blue',
           legend_label='daily_cases')

    p.line(x=df_county_dates,
           y=df_county['daily_deaths'],
           line_width=3,
           color='red',
           legend_label='daily_deaths')
    p.xaxis.formatter = DatetimeTickFormatter(days=['%m/%d', '%a%d%y'])
    p.legend.location = 'top_left'
    p.add_tools(
        HoverTool(tooltips=[('date', '@date'), ('daily_cases', '@daily_cases'),
                            ('daily_deaths', '@daily_deaths')],
                  mode='vline'))

    p.legend.label_text_font_size = '8pt'
    return p


# Plot counties graphs in grid
def all_counties_graph(state, df):

    # Filter data down to specific state.
    df_state = df[df['state'] == state]

    # Find list of all counties in this state then sort this list.
    counties = sorted(list(set(df_state['county'])))

    # Find the largest number of cases, exclude the none number if there are.
    # Inflating 2% of the max_cases number to make the graph more elegant.
    max_cases = max(x for x in df_state['daily_cases'] if pd.notna(x)) * 1.02

    # Populating a list of individual couty graph into a grid and plot them.
    p_list = []
    for i in counties:
        p_list.append(make_county_graph(state, i, df_state, max_y=max_cases))

    grid = gridplot(p_list, ncols=2)
    show(grid)
    
    
    name= 'l_line_{state}_State.html'.format(state=state)
    path='/home/lizhi/projects/covid19/l_ready_file'
    filename = os.path.join(path,name)
  
    # Save the plot by passing the plot -object and output path
    save(obj=grid, filename=filename)
    
def list_of_state_graph(states_lst, df):
    for state in states_lst:
        all_counties_graph(state, df)
        time.sleep(1)# generate plots for one state following another state
        
output_notebook()
sql_str = """
    select date, state, county,
    cases-lag(cases, 1) over(partition by county order by date) as daily_cases,
    deaths-lag(deaths,1) over(partition by county order by date) as daily_deaths
    from covid19.us_counties
    where state in ('Washington', 'Alabama')
    and date > date_sub(current_date(), interval 1 MONTH)
    and lower(substr(county,1,1)) in ('a','k')
    order by state, county, date"""

col_names = ['date', 'state', 'county', 'daily_cases', 'daily_deaths']

df = dataframe_from_sql(col_names, sql_str)


list_of_state_graph(["Alabama", "Washington"], df)



NameError: name 'os' is not defined