In [None]:
# helpful links
'https://towardsdatascience.com/data-visualization-with-bokeh-in-python-part-ii-interactions-a4cf994e2512'
'https://realpython.com/lessons/using-groupfilter-and-cdsview/'

In [None]:
import pandas
import math

In [None]:
# County population data from us census
#     https://www.census.gov/data/datasets/time-series/demo/popest/2010s-counties-total.html#par_textimage_70769902
#     https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv

pop_data_all = pandas.read_csv('./co-est2019-alldata.csv', encoding='IBM850')
#with open('./co-est2019-alldata.csv') as f:
#    text = f.read()
pop_data = pop_data_all[pop_data_all.SUMLEV == 50][
    ['STATE', 'COUNTY', 'STNAME', 'CTYNAME', 'POPESTIMATE2019']
]
pop_data['fips'] = pop_data.STATE * 1000 + pop_data.COUNTY
pop_data[pop_data.fips == 6037]

In [None]:
pop_data = pop_data[['fips', 'POPESTIMATE2019']]
pop_data = pop_data.rename(columns={
    'POPESTIMATE2019': 'population',
})
pop_data = pop_data.set_index('fips')
#pop_data = pop_data.reset_index(drop=True)
#pop_data['county_state'] = pop_data['county'].str.cat(counties_data['state'], sep =", ")
#pop_data[(pop_data.state_fips == 6) & (pop_data.county_fips == 37)]
pop_data[pop_data.index == 6037]

In [None]:
nytimes_counties_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
counties_data = pandas.read_csv(nytimes_counties_url, parse_dates=['date'])
# fips codes are left out for, ie, New York City, and "Unknown" groupings for states
counties_data = counties_data[counties_data.fips.notna()]
counties_data = counties_data.astype({'fips': int})
#counties_data['state_fips'] = counties_data.fips // 1000
#counties_data['county_fips'] = counties_data.fips % 1000
#counties_data['county_state'] = counties_data['county'].str.cat(counties_data['state'], sep =", ")
#all_counties = (counties_data['county_state'].unique())
counties_data = pandas.merge(counties_data, pop_data, left_on='fips', right_on=pop_data.index)
counties_data['cases_per_million'] = counties_data.cases / (counties_data.population / 1e6)
counties_data['deaths_per_million'] = counties_data.deaths / (counties_data.population / 1e6)

In [None]:
# Confirm all counties in nytimes data have population data
counties_fips = set(counties_data.fips.unique())
pop_fips = set(pop_data.index.unique())
assert len(counties_fips - pop_fips) == 0

In [None]:
#la_data = counties_data[counties_data.county_state == 'Los Angeles, California']
#oc_data = counties_data[counties_data.county_state == 'Orange, California']

la_data = counties_data[(counties_data.state == 'California') & (counties_data.county == 'Los Angeles')]
oc_data = counties_data[(counties_data.state == 'California') & (counties_data.county == 'Orange')]

la_data

In [None]:
def get_data_since(data, condition_func):
    condition = condition_func(data)
    since_data = data[condition].reset_index(drop=True)
    day0 = since_data.date.min()
    since_data['days'] = (since_data.date - day0).apply(lambda x: x.days)
    return since_data

def deaths_per_mill_greater_1(data):
    return data.deaths_per_million >= 1.0

la_since_1_death_per_mill_data = get_data_since(la_data, deaths_per_mill_greater_1)
oc_since_1_death_per_mill_data = get_data_since(oc_data, deaths_per_mill_greater_1)

oc_since_1_death_per_mill_data

In [None]:
from collections import OrderedDict

# Thanks to Kenneth Kelly + Ohad Schneider:
# https://stackoverflow.com/a/13781114/920545
kelly_colors_dict = OrderedDict(
    black=(0,0,0),
    vivid_yellow=(255, 179, 0),
    strong_purple=(128, 62, 117),
    vivid_orange=(255, 104, 0),
    very_light_blue=(166, 189, 215),
    vivid_red=(193, 0, 32),
    grayish_yellow=(206, 162, 98),
    medium_gray=(129, 112, 102),

    # these aren't good for people with defective color vision:
    vivid_green=(0, 125, 52),
    strong_purplish_pink=(246, 118, 142),
    strong_blue=(0, 83, 138),
    strong_yellowish_pink=(255, 122, 92),
    strong_violet=(83, 55, 122),
    vivid_orange_yellow=(255, 142, 0),
    strong_purplish_red=(179, 40, 81),
    vivid_greenish_yellow=(244, 200, 0),
    strong_reddish_brown=(127, 24, 13),
    vivid_yellowish_green=(147, 170, 0),
    deep_yellowish_brown=(89, 51, 21),
    vivid_reddish_orange=(241, 58, 19),
    dark_olive_green=(35, 44, 22),
)
kelly_colors = list(kelly_colors_dict.values())

In [None]:
from bokeh.plotting import figure, output_file, output_notebook, show

# display output inline in notebook
output_notebook()

# # output to static HTML file
# output_file("deaths.html")
# p = figure(title="Covid 19", x_axis_label='Date', y_axis_label='Deaths', x_axis_type='datetime', y_axis_type='log')
# p.line(x='date', y='deaths', source=la_data, line_width=3, color=kelly_colors[0])
# p.line(x='date', y='deaths', source=oc_data, line_width=3, color=kelly_colors[1])

# # output to static HTML file
# output_file("deaths_million_since_1.html")
p = figure(title="Covid 19 - deaths since 1/million",
           x_axis_label='Days since 1 death/million', y_axis_label='Deaths/million',
           y_axis_type='log')
p.line(x='days', y='deaths_per_million', source=la_since_1_death_per_mill_data,
       line_width=3, color=kelly_colors[0], legend_label='LA')
p.line(x='days', y='deaths_per_million', source=oc_since_1_death_per_mill_data,
       line_width=3, color=kelly_colors[1], legend_label='OC')

# show the results
show(p)