# Exploration of NY Times COVID-19 data for LA County

Data from The New York Times, based on reports from state and local health agencies.
The Times is reporting at https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html.

**Remember to `git pull upstream master` every day.** Data includes up to yesterday's total.

In [29]:
import pandas as pd
import numpy as np

import bokeh.plotting
import bokeh.models
import bokeh.io

import colorcet

import tqdm

bokeh.io.output_notebook()

In [4]:
%load_ext blackcellmagic

In [172]:
yesterday = pd.to_datetime(pd.to_datetime("today").date()) - pd.DateOffset(days=1)

## Load data for US counties

In [173]:
df_counties = pd.read_csv("us-counties.csv")

df_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


## LA County

In [174]:
df_LA_county = df_counties.loc[df_counties['county']=='Los Angeles']

df_LA_county.tail()

Unnamed: 0,date,county,state,fips,cases,deaths
291954,2020-07-03,Los Angeles,California,6037.0,110310,3454
295036,2020-07-04,Los Angeles,California,6037.0,113497,3454
298121,2020-07-05,Los Angeles,California,6037.0,114993,3487
301207,2020-07-06,Los Angeles,California,6037.0,116570,3534
304297,2020-07-07,Los Angeles,California,6037.0,120539,3579


In [175]:
df_LA_county['date'] = pd.to_datetime(df_LA_county['date'])

df_LA_county.tail()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,date,county,state,fips,cases,deaths
291954,2020-07-03,Los Angeles,California,6037.0,110310,3454
295036,2020-07-04,Los Angeles,California,6037.0,113497,3454
298121,2020-07-05,Los Angeles,California,6037.0,114993,3487
301207,2020-07-06,Los Angeles,California,6037.0,116570,3534
304297,2020-07-07,Los Angeles,California,6037.0,120539,3579


### Total cases and deaths over time

In [186]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Total cases",
)

p.line(
    source=df_LA_county,
    x='date',
    y='cases',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

In [187]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Total deaths",
)

p.line(
    source=df_LA_county,
    x='date',
    y='deaths',
    color='orange',
    line_width=2,
)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

### New cases per day over time

In [178]:
# Two weeks ago from today
two_weeks_ago = yesterday - pd.DateOffset(days=13)

In [179]:
df_LA_county.head()

Unnamed: 0,date,county,state,fips,cases,deaths
9,2020-01-26,Los Angeles,California,6037.0,1,0
14,2020-01-27,Los Angeles,California,6037.0,1,0
19,2020-01-28,Los Angeles,California,6037.0,1,0
24,2020-01-29,Los Angeles,California,6037.0,1,0
29,2020-01-30,Los Angeles,California,6037.0,1,0


In [180]:
cases_array = np.array(df_LA_county["cases"])
new_cases_array = np.empty(np.shape(cases_array))

for i, n in enumerate(cases_array):
    if i > 0:
        new_cases_array[i] = cases_array[i] - cases_array[i-1]
        
df_LA_county["new cases"] = new_cases_array

df_LA_county.tail()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,date,county,state,fips,cases,deaths,new cases
291954,2020-07-03,Los Angeles,California,6037.0,110310,3454,2643.0
295036,2020-07-04,Los Angeles,California,6037.0,113497,3454,3187.0
298121,2020-07-05,Los Angeles,California,6037.0,114993,3487,1496.0
301207,2020-07-06,Los Angeles,California,6037.0,116570,3534,1577.0
304297,2020-07-07,Los Angeles,California,6037.0,120539,3579,3969.0


In [188]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day",
)

p.line(
    source=df_LA_county,
    x='date',
    y='new cases',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

bokeh.io.show(p)

In [189]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County, 14 day trend",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day",
)

p.line(
    source=df_LA_county,
    x='date',
    y='new cases',
    line_width=2,
)

p.x_range.start = two_weeks_ago
p.x_range.end = yesterday

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

bokeh.io.show(p)

#### Seven-day moving average

In [183]:
df_LA_county['new cases (7 day average)'] = df_LA_county['new cases'].rolling(window=7).mean()

df_LA_county.tail()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,date,county,state,fips,cases,deaths,new cases,new cases (7 day average)
291954,2020-07-03,Los Angeles,California,6037.0,110310,3454,2643.0,2439.714286
295036,2020-07-04,Los Angeles,California,6037.0,113497,3454,3187.0,2589.428571
298121,2020-07-05,Los Angeles,California,6037.0,114993,3487,1496.0,2442.714286
301207,2020-07-06,Los Angeles,California,6037.0,116570,3534,1577.0,2256.857143
304297,2020-07-07,Los Angeles,California,6037.0,120539,3579,3969.0,2430.0


In [190]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day (7 day average)",
)

p.line(
    source=df_LA_county,
    x='date',
    y='new cases (7 day average)',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

In [191]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County, 14 day trend",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day (7 day average)",
)

p.line(
    source=df_LA_county, x="date", y="new cases (7 day average)", line_width=2,
)

p.x_range.start = two_weeks_ago
p.x_range.end = yesterday

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

bokeh.io.show(p)

print("New cases (7-day average):")
new_cases = float(
    df_LA_county.loc[df_LA_county["date"] == yesterday]["new cases (7 day average)"]
)
print(f"Yesterday ({yesterday:%Y-%m-%d}): {new_cases:.1f}")
new_cases = float(
    df_LA_county.loc[df_LA_county["date"] == two_weeks_ago]["new cases (7 day average)"]
)
print(f"Two weeks ago ({two_weeks_ago:%Y-%m-%d}): {new_cases:.1f}")

New cases (7-day average):
Yesterday (2020-07-07): 2430.0
Two weeks ago (2020-06-24): 1757.3
