In [5]:
import os
import datetime
import time
from google.cloud import bigquery  # This is for running sql to get the data
import pandas as pd
import geopandas as gpd
import numpy as np
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import (CategoricalColorMapper, HoverTool, ColumnDataSource,
                          Panel, DatetimeTickFormatter, FuncTickFormatter,
                          SingleIntervalTicker, LinearAxis, Range1d)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, Tabs,
                                  CheckboxButtonGroup, TableColumn, DataTable,
                                  Select)
from bokeh.layouts import gridplot, column, row, WidgetBox
from bokeh.palettes import Category20_16


def get_data_path(dir_path, local_path, data_path='data'):
    return os.path.join(dir_path, data_path, local_path)



def dataframe_from_sql(col_names, sql_str):
    dict = {}
    for x in col_names:
        dict[x] = []

    client = bigquery.Client(project='paul-henry-tremblay')
    result = client.query(sql_str)

    for rows in result:
        for x in col_names:
            dict[x].append(rows.get(x))
    # source = ColumnDataSource(pd.DataFrame.from_dict(dict))
    return pd.DataFrame.from_dict(dict)
    #return source
    


def make_county_graph(state, county, df, max_y=None):
    # Filter state
    df_county = df.loc[(df['state'] == state) & (df['county'] == county)]
     
    df_county.loc[:,'date'] = pd.to_datetime(df_county.loc[:,'date'])
   
    df_county.loc[:,'ToolTipDates'] = df_county.loc[:,'date'].map(lambda x: x.strftime("%b %d")) 
    
    source = ColumnDataSource(df_county)
      
    #thresholds = [10 for x in range(0, len(df_county_date))]
    
    p = figure(plot_height=400,
               plot_width=600,
               title='{county} County {state} State'.format(state=state,
                                                            county=county),
               x_axis_label="Date",
               y_axis_label="Daily new cases and deaths",
               toolbar_location="right")

    if max_y is not None:
        p.y_range = Range1d(0, max_y)

    p.vbar(x='date',
           top='daily_cases',
           source = source,
           line_width=3,
           width=datetime.timedelta(days=1), 
           color='blue',
           legend_label='daily_cases')
    
    p.vbar(x='date',
           top='daily_deaths', 
           source = source,
           line_width=3,
           width=datetime.timedelta(days=1),
           color='red',
           legend_label='daily_deaths')

    
    p.xaxis.formatter = DatetimeTickFormatter(days=['%m/%d', '%a%d%y'])
    p.legend.location = 'top_left'

    p.add_tools(HoverTool(tooltips=[('date', '@ToolTipDates'),
                                    ('daily_cases', '@daily_cases'),
                                    ('daily_deaths', '@daily_deaths')], mode='vline'))
    
    #p.add_tools(HoverTool(tooltips=[('date', '@ToolTipDates'), ('x,y','$x,$y')],
    #              mode='vline'))
    
    p.legend.label_text_font_size = '8pt'
    return p


# Plot counties graphs in grid
def all_counties_graph(state, df):

    # Filter data down to specific state.
    #df_state = df[df['state'] == state]
    df_state = df.loc[df['state'] == state]
    
    # Find list of all counties in this state then sort this list.
    counties = sorted(list(set(df_state['county'])))

    # Find the largest number of cases, exclude the none number if there are.
    # Inflating 2% of the max_cases number to make the graph more elegant.
    max_cases = max(x for x in df_state['daily_cases'] if pd.notna(x)) * 1.05

    # Populating a list of individual county graph into a grid and plot them.
    p_list = []
    for i in counties:
        p_list.append(make_county_graph(state, i, df_state, max_y=max_cases))

    grid = gridplot(p_list, ncols=1)
    show(grid)


def list_of_state_graph(states_lst, df):
    for state in states_lst:
        all_counties_graph(state, df)
        time.sleep(1)

                        
output_notebook()
sql_str = """
    select date, state, county,
    cases-lag(cases, 1) over(partition by county order by date) as daily_cases,
    deaths-lag(deaths,1) over(partition by county order by date) as daily_deaths
    from covid19.us_counties
    where state in ('Washington', 'Alabama')
    and date > date_sub(current_date(), interval 1 MONTH)
    and lower(substr(county,1,1)) in ('a','k')
    order by state, county, date"""
    
col_names = ['date', 'state', 'county', 'daily_cases', 'daily_deaths']

df = dataframe_from_sql(col_names, sql_str)
# source = ColumnDataSource(df)

list_of_state_graph(["Alabama", "Washington"], df)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)


In [27]:
df.columns.tolist()

['date', 'state', 'county', 'daily_cases', 'daily_deaths']

In [4]:
import geopandas as gpd

# File path
points_fp = r"/home/lizhi/projects/covid19/lizzie_covid19/addresses.shp"

# Read the data
points = gpd.read_file(points_fp)

DriverError: /home/lizhi/projects/covid19/lizzie_covid19/addresses.shp: No such file or directory