# Exploration of NY Times COVID-19 data for LA County

Data from The New York Times, based on reports from state and local health agencies.
The Times is reporting at https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html.

**Remember to `git pull upstream master` every day.** Data includes up to yesterday's total.

Start date: 2020-07-08

End date: 2020-07-12

In [192]:
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import numpy as np

import bokeh.plotting
import bokeh.models
import bokeh.io

import colorcet

import tqdm

bokeh.io.output_notebook()

In [2]:
%load_ext blackcellmagic

In [193]:
yesterday = pd.to_datetime(pd.to_datetime("today").date()) - pd.DateOffset(days=1)

## Load COVID-19 data for US counties

In [194]:
df_counties = pd.read_csv("us-counties.csv")
df_counties['date'] = pd.to_datetime(df_counties['date'])

df_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


## Load population data for US counties

In [195]:
df_pop = pd.read_excel(
    "co-est2019-annres.xlsx",
    usecols="A,M",
    skiprows=[0, 1, 2, 4],
    skipfooter=6,
).rename(
    columns={"Unnamed: 0": "geographic area", 2019: "population"}
)

df_pop.tail()

Unnamed: 0,geographic area,population
3137,".Sweetwater County, Wyoming",42343
3138,".Teton County, Wyoming",23464
3139,".Uinta County, Wyoming",20226
3140,".Washakie County, Wyoming",7805
3141,".Weston County, Wyoming",6927


In [196]:
df_pop[['county', 'state']] = df_pop['geographic area'].str.split("County,", expand=True)[[0, 1]]
df_pop['county']=df_pop['county'].str.strip(' .')
df_pop['state']=df_pop['state'].str.strip(' ')
df_pop = df_pop.drop(columns='geographic area')

df_pop.head()

Unnamed: 0,population,county,state
0,55869,Autauga,Alabama
1,223234,Baldwin,Alabama
2,24686,Barbour,Alabama
3,22394,Bibb,Alabama
4,57826,Blount,Alabama


## Merge COVID-19 and population dataframes

In [197]:
df_counties = pd.merge(df_counties, df_pop,)
df_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083


### Add cases and deaths per 100,000

In [198]:
df_counties['cases (per 100,000)'] = df_counties['cases'] / df_counties['population'] * 100000
df_counties['deaths (per 100,000)'] = df_counties['deaths'] / df_counties['population'] * 100000

df_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population,"cases (per 100,000)","deaths (per 100,000)"
0,2020-01-21,Snohomish,Washington,53061.0,1,0,822083,0.121642,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0,822083,0.121642,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0,822083,0.121642,0.0
3,2020-01-24,Snohomish,Washington,53061.0,1,0,822083,0.121642,0.0
4,2020-01-25,Snohomish,Washington,53061.0,1,0,822083,0.121642,0.0


## LA County

In [199]:
df_LA_county = df_counties.loc[df_counties['county']=='Los Angeles']

df_LA_county.tail()

Unnamed: 0,date,county,state,fips,cases,deaths,population,"cases (per 100,000)","deaths (per 100,000)"
843,2020-07-07,Los Angeles,California,6037.0,120539,3579,10039107,1200.694444,35.650581
844,2020-07-08,Los Angeles,California,6037.0,123004,3642,10039107,1225.248421,36.278127
845,2020-07-09,Los Angeles,California,6037.0,124738,3689,10039107,1242.520874,36.746296
846,2020-07-10,Los Angeles,California,6037.0,127358,3738,10039107,1268.618812,37.234387
847,2020-07-11,Los Angeles,California,6037.0,130242,3793,10039107,1297.346467,37.782245


### Total cases and deaths over time

In [200]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Total cases",
)

p.line(
    source=df_LA_county,
    x='date',
    y='cases',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

In [201]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Total deaths",
)

p.line(
    source=df_LA_county,
    x='date',
    y='deaths',
    color='orange',
    line_width=2,
)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

In [202]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Total cases (per 100,000)",
)

p.line(
    source=df_LA_county,
    x='date',
    y='cases (per 100,000)',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

In [203]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Total deaths (per 100,000)",
)

p.line(
    source=df_LA_county,
    x='date',
    y='deaths (per 100,000)',
    color='orange',
    line_width=2,
)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

### New cases per day over time

In [204]:
# Two weeks ago from today
two_weeks_ago = yesterday - pd.DateOffset(days=13)

In [205]:
df_LA_county.head()

Unnamed: 0,date,county,state,fips,cases,deaths,population,"cases (per 100,000)","deaths (per 100,000)"
680,2020-01-26,Los Angeles,California,6037.0,1,0,10039107,0.009961,0.0
681,2020-01-27,Los Angeles,California,6037.0,1,0,10039107,0.009961,0.0
682,2020-01-28,Los Angeles,California,6037.0,1,0,10039107,0.009961,0.0
683,2020-01-29,Los Angeles,California,6037.0,1,0,10039107,0.009961,0.0
684,2020-01-30,Los Angeles,California,6037.0,1,0,10039107,0.009961,0.0


In [206]:
cases_array = np.array(df_LA_county["cases"])
new_cases_array = np.empty(np.shape(cases_array))

for i, n in enumerate(cases_array):
    if i > 0:
        new_cases_array[i] = cases_array[i] - cases_array[i-1]
        
df_LA_county["new cases"] = new_cases_array

df_LA_county.tail()

Unnamed: 0,date,county,state,fips,cases,deaths,population,"cases (per 100,000)","deaths (per 100,000)",new cases
843,2020-07-07,Los Angeles,California,6037.0,120539,3579,10039107,1200.694444,35.650581,3969.0
844,2020-07-08,Los Angeles,California,6037.0,123004,3642,10039107,1225.248421,36.278127,2465.0
845,2020-07-09,Los Angeles,California,6037.0,124738,3689,10039107,1242.520874,36.746296,1734.0
846,2020-07-10,Los Angeles,California,6037.0,127358,3738,10039107,1268.618812,37.234387,2620.0
847,2020-07-11,Los Angeles,California,6037.0,130242,3793,10039107,1297.346467,37.782245,2884.0


In [207]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day",
)

p.line(
    source=df_LA_county,
    x='date',
    y='new cases',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

bokeh.io.show(p)

In [208]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County, 14 day trend",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day",
)

p.line(
    source=df_LA_county,
    x='date',
    y='new cases',
    line_width=2,
)

p.x_range.start = two_weeks_ago
p.x_range.end = yesterday

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

bokeh.io.show(p)

#### Seven-day moving average

In [209]:
df_LA_county['new cases (7 day average)'] = df_LA_county['new cases'].rolling(window=7).mean()

df_LA_county.tail()

Unnamed: 0,date,county,state,fips,cases,deaths,population,"cases (per 100,000)","deaths (per 100,000)",new cases,new cases (7 day average)
843,2020-07-07,Los Angeles,California,6037.0,120539,3579,10039107,1200.694444,35.650581,3969.0,2430.0
844,2020-07-08,Los Angeles,California,6037.0,123004,3642,10039107,1225.248421,36.278127,2465.0,2499.571429
845,2020-07-09,Los Angeles,California,6037.0,124738,3689,10039107,1242.520874,36.746296,1734.0,2438.714286
846,2020-07-10,Los Angeles,California,6037.0,127358,3738,10039107,1268.618812,37.234387,2620.0,2435.428571
847,2020-07-11,Los Angeles,California,6037.0,130242,3793,10039107,1297.346467,37.782245,2884.0,2392.142857


In [210]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day (7 day average)",
)

p.line(
    source=df_LA_county,
    x='date',
    y='new cases (7 day average)',
    line_width=2,
)

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

# Lab re-opening

lab_re_opening = bokeh.models.Span(
    location = pd.to_datetime("2020-06-08"),
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2,
)

re_opening_label = bokeh.models.Label(
    x=pd.to_datetime("2020-06-08") - pd.DateOffset(days=35),
    y=25,
    y_units='screen',
    text='lab re-opening')

p.add_layout(lab_re_opening)
p.add_layout(re_opening_label)

bokeh.io.show(p)

In [211]:
p = bokeh.plotting.figure(
    frame_height=300,
    frame_width=600,
    title="LA County, 14 day trend",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="New cases per day (7 day average)",
)

p.line(
    source=df_LA_county, x="date", y="new cases (7 day average)", line_width=2,
)

p.x_range.start = two_weeks_ago
p.x_range.end = yesterday

p.yaxis[0].formatter = bokeh.models.formatters.BasicTickFormatter(use_scientific=False)

bokeh.io.show(p)

print("New cases (7-day average):")
new_cases = float(
    df_LA_county.loc[df_LA_county["date"] == yesterday]["new cases (7 day average)"]
)
print(f"Yesterday ({yesterday:%Y-%m-%d}): {new_cases:.1f}")
new_cases = float(
    df_LA_county.loc[df_LA_county["date"] == two_weeks_ago]["new cases (7 day average)"]
)
print(f"Two weeks ago ({two_weeks_ago:%Y-%m-%d}): {new_cases:.1f}")

New cases (7-day average):
Yesterday (2020-07-11): 2392.1
Two weeks ago (2020-06-28): 2071.0


In [212]:
%load_ext watermark
%watermark -v -p numpy,pandas,bokeh,colorcet,jupyterlab

CPython 3.7.7
IPython 7.13.0

numpy 1.18.1
pandas 0.24.2
bokeh 2.0.2
colorcet 2.0.2
jupyterlab 1.2.6
