# Exploration of NY Times COVID-19 data - pick your county

Data from The New York Times, based on reports from state and local health agencies.
The Times is reporting at https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html.

**Remember to `git pull upstream master` every day.** Data includes up to yesterday's total.

In [21]:
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import numpy as np

import bokeh.plotting
import bokeh.models
import bokeh.io

import colorcet

import tqdm

bokeh.io.output_notebook()

In [2]:
%load_ext blackcellmagic

In [2]:
yesterday = pd.to_datetime(pd.to_datetime("today").date()) - pd.DateOffset(days=1)

In [3]:
df_counties = pd.read_csv("us-counties.csv")
df_counties['date'] = pd.to_datetime(df_counties['date'])

In [4]:
def get_county_data(county, state):
    """
    Makes a Pandas Data Frame with data for a given county.
    Arguments:
    `county`: county name
    `state`: state name
    """
    df = df_counties.loc[(df_counties['county']==county) & (df_counties['state']==state)]
    
    return df


In [9]:
def covid_plots(county, state, measurement, timespan="all", lab_re_opening_label=False):
    """
    Plots data for a given county and state.
    Arguments:
    `county`: county name
    `state`: state name
    `measurement`: measurement to plot, such as 'cases',
                'deaths', 'new cases (7 day average)', etc.
    `timespan`: timespan to plot. Defaults to 'all'. 
                Other option is 'two weeks'.
    `lab_re_opening_label`: whether to include an annotation for 
                the date of Caltech lab re-openings. Defaults to `False`.
        
    Depends on get_county_data() function.
    """

    df = get_county_data(county, state)
    yesterday = pd.to_datetime(pd.to_datetime("today").date()) - pd.DateOffset(days=1)
    two_weeks_ago = yesterday - pd.DateOffset(days=13)

    # Calculate new cases per day

    cases_array = np.array(df["cases"])
    new_cases_array = np.empty(np.shape(cases_array))
    new_cases_array[0] = cases_array[0]

    for i, n in enumerate(cases_array):
        if i > 0:
            new_cases_array[i] = cases_array[i] - cases_array[i - 1]

    df["new cases"] = new_cases_array

    # Calculate 7-day average for new cases per day
    df["new cases (7 day average)"] = df["new cases"].rolling(window=7).mean()

    p = bokeh.plotting.figure(
        frame_height=300,
        frame_width=600,
        title=f"{county} County, {state}",
        x_axis_type="datetime",
        x_axis_label="date",
        y_axis_label=str(measurement),
    )

    p.line(
        source=df, x="date", y=measurement, line_width=2,
    )

    p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(
        use_scientific=False
    )

    if lab_re_opening_label:
        lab_re_opening = bokeh.models.Span(
            location=pd.to_datetime("2020-06-08"),
            dimension="height",
            line_color="black",
            line_dash="dashed",
            line_width=2,
        )

        re_opening_label = bokeh.models.Label(
            x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
            y=25,
            y_units="screen",
            text="lab re-opening",
        )

        p.add_layout(lab_re_opening)
        p.add_layout(re_opening_label)

    if timespan == "two weeks":
        p.x_range.start = two_weeks_ago
        p.x_range.end = yesterday

    bokeh.io.show(p)

In [22]:
covid_plots("Los Angeles",
            "California",
            "new cases (7 day average)",
            lab_re_opening_label=True,
           )

In [23]:
covid_plots("Lee",
            "Florida",
            "new cases (7 day average)",
           )

In [24]:
covid_plots("Suffolk",
            "Massachusetts",
            "new cases (7 day average)",
           )

# To Do
- overlay two(+?) counties
- per capita (county populations from https://www.census.gov/data/datasets/time-series/demo/popest/2010s-counties-total.html)