In [3]:
import os
import datetime
import time
from google.cloud import bigquery  # This is for running sql to get the data
import pandas as pd
import math
import numpy as np
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, save
from bokeh.models import (CategoricalColorMapper, HoverTool, ColumnDataSource,
                          Panel, DatetimeTickFormatter, FuncTickFormatter,
                          SingleIntervalTicker, LinearAxis, Range1d)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, Tabs,
                                  CheckboxButtonGroup, TableColumn, DataTable,
                                  Select)
from bokeh.layouts import gridplot, column, row, WidgetBox
from bokeh.palettes import Category20_16


def get_data_path(dir_path, local_path, data_path='data'):
    return os.path.join(dir_path, data_path, local_path)


def tidy_name(s):
    """Replace spaces with underscores and make it all lower cases. """
    return s.replace(' ', '_').lower()


def get_days_less_than_0(lst):
    """Finds the number of trailing list entries less than 1, then the number plus 1."""
    return 1 + sum(1 for _ in it.takewhile(lambda x: x < 1, reversed(lst)))


def get_double_rate(b):
    if b == 1:
        return 0
    x = math.log(2) / math.log(b)
    if math.isnan(x):
        return x
    return round(x, 2)


def geometric_series(start, ratio, num_elem):
    """returns num_elem values from geometric_series
    that begins at start and has ratio."""

    lst = []
    for i in range(num_elem):
        lst.append(start)
        start = start * ratio
    return lst


def double_rate_line(start, rate, the_len, base=2):
    ratio = math.pow(base, 1.0 / rate)
    return geometric_series(start * ratio, ratio, the_len)


# This is for future use if can.
def make_dataframe_helper(col_specs, data_lst):
    """Constructs dataframe from list of tuples/lists."""
    dict = {}
    for col_name, idx in col_specs:
        dict[col_name] = [x[idx] for x in data_lst]
    return pd.DataFrame.from_dict(dict)


def dataframe_from_sql(col_names, sql_str):
    dict = {}
    for x in col_names:
        dict[x] = []

    client = bigquery.Client(project='paul-henry-tremblay')
    result = client.query(sql_str)

    for rows in result:
        for x in col_names:
            dict[x].append(rows.get(x))
    # source = ColumnDataSource(pd.DataFrame.from_dict(dict))
    return pd.DataFrame.from_dict(dict)
    #return source
    


def make_county_graph(state, county, df, max_y=None):
    # Filter state
    df_county = df[(df['state'] == state) & (df['county'] == county)]
      
    df_county['date'] = pd.to_datetime(df_county['date'])
 
    df_county['ToolTipDates'] = df_county.date.map(lambda x: x.strftime("%b %d")) 
      
    
    source = ColumnDataSource(df_county,)
    
    
    #thresholds = [10 for x in range(0, len(df_county_date))]
    
    p = figure(plot_height=400,
               plot_width=450,
               title='{county} County {state} State'.format(state=state,
                                                            county=county),
               x_axis_label="Date",
               y_axis_label="Daily new cases and deaths",
               toolbar_location="right")

    if max_y is not None:
        p.y_range = Range1d(0, max_y)

    p.line(x='date',y='daily_cases', source = source, line_width=2, color='blue', legend_label='daily_cases')
    p.line(x='date',y='daily_deaths', source = source,line_width=3, color='red', legend_label='daily_deaths')
   
    
    p.xaxis.formatter = DatetimeTickFormatter(days=['%m/%d', '%a%d%y'])
    p.legend.location = 'top_left'
    p.legend.label_text_font_size = '8pt'
    p.xgrid.grid_line_color = None
    p.add_tools(HoverTool(tooltips=[('date', '@ToolTipDates'), ('daily_cases', '@daily_cases'),                        ('daily_deaths', '@daily_deaths')],
                  mode='vline'))

    
    return p


# Plot counties graphs in grid
def all_counties_graph(state, df):

    # Filter data down to specific state.
    df_state = df[df['state'] == state]

    # Find list of all counties in this state then sort this list.
    counties = sorted(list(set(df_state['county'])))
    
    # Create the checkbox selection of state in state_lst
    county_selection = CheckboxGroup(labels=counties, active=[0, 1])
    county_selection.on_change('active', update)
    print(county_selection)

    # Find the largest number of cases, exclude the none number if there are.
    # Inflating 2% of the max_cases number to make the graph more elegant.
    max_cases = max(x for x in df_state['daily_cases'] if pd.notna(x)) * 1.02

    # Populating a list of individual couty graph into a grid and plot them.
    p_list = []
    for i in counties:
        p_list.append(make_county_graph(state, i, df_state, max_y=max_cases))

    grid = gridplot(p_list, ncols=2)
    show(grid)
    
    name= '{state}_State.html'.format(state=state)
    path='/home/lizhi/projects/covid19/lizzie_covid19'
    filename = os.path.join(path,name)
  
    # Save the plot by passing the plot -object and output path
    save(obj=grid, filename=filename)


def list_of_state_graph(states_lst, df):
    for state in states_lst:
        all_counties_graph(state, df)
        time.sleep(1)

def  update(attr, old, new):
    # Get a list of state to plot
    states_to_plot = [state_selection.labels[i] for i in state_selection.active]

    new_source = list_of_state_graph(state_lst,df)
    source.data.update = (new_source.data)
    
                        
output_notebook()
sql_str = """
    select date, state, county,
    cases-lag(cases, 1) over(partition by county order by date) as daily_cases,
    deaths-lag(deaths,1) over(partition by county order by date) as daily_deaths
    from covid19.us_counties
    where state in ('Washington', 'Alabama')
    and date > date_sub(current_date(), interval 1 MONTH)
    and lower(substr(county,1,1)) in ('a','k')
    order by state, county, date"""

col_names = ['date', 'state', 'county', 'daily_cases', 'daily_deaths']

df = dataframe_from_sql(col_names, sql_str)

list_of_state_graph(["Alabama", "Washington"], df)



  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
