# Texas Flu and Pneumonia deaths

In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook
idx=pd.IndexSlice

In [65]:
# Flu and Pneumonia underlying death data from CDC WONDER. Query used: 
# https://wonder.cdc.gov/controller/saved/D76/D91F167
udf = pd.read_csv(
    "data/tx-pi-deaths-underlying-deaths-2010-thru-2018.csv",
    usecols=[2,3])

# the last rows are metadata that should not be included
# in the dataset:
udf = udf.dropna()

# rename columns:
old2new_cols = {
    "Month Code": "date",
    "Deaths": "deaths"}
udf = udf.rename(columns=old2new_cols)

# convert date strings to datetime objects:
udf.loc[:,"date"] = udf["date"].apply(lambda x: pd.to_datetime(x,format="%Y/%m"))

# set date column as index:
udf = udf.set_index(["date"])

In [72]:
# Flu and pneumonia multiple cause of death data from CDC WONDER. Query used:
# https://wonder.cdc.gov/controller/saved/D77/D91F168
mdf = pd.read_csv(
    "data/tx-pi-deaths-multiple-deaths-2010-thru-2018.csv",
    usecols=[2,3])

# apply the same formatting as the underlying death data:
mdf = mdf.dropna()
mdf = mdf.rename(columns=old2new_cols)
mdf.loc[:, "date"] = mdf.date.apply(lambda x: pd.to_datetime(x,format="%Y/%m"))
mdf = mdf.set_index(["date"])

In [95]:
# calculate P&I multiple cause of death per year:
mpi_df = mdf.resample("Y", label="left").sum()

# offset to Jan 1 of each year:
mpi_df.index = mpi_df.index + pd.Timedelta(days=1)
mpi_df

Unnamed: 0_level_0,deaths
date,Unnamed: 1_level_1
2010-01-01,10313.0
2011-01-01,10456.0
2012-01-01,10740.0
2013-01-01,11636.0
2014-01-01,11808.0
2015-01-01,11747.0
2016-01-01,11045.0
2017-01-01,11558.0
2018-01-01,12394.0


In [119]:
# plot P&I multiple cause of death data per year:
plt.figure()
plt.title("""Texas flu and pneumonia related deaths
year totals from 2010 through 2018""")
plt.scatter(
    mpi_df.index,
    mpi_df.deaths,
    s=100)
plt.plot(
    mpi_df.index,
    mpi_df.deaths,
    alpha=0.5)
plt.grid()
plt.xlabel("year")
plt.ylabel("deaths in year")


<IPython.core.display.Javascript object>

Text(0, 0.5, 'deaths in year')

# 2017-2018 Flu Season

In [115]:
# looks like 2017-2018 flu season was the most severe. 
# Flu seasons seem to be defined as Oct to Oct. 
plt.figure()
plt.title("""Texas 2017-2018 flu season
flu and pneumonia related deaths per month""")
plt.scatter(
    mdf.loc["2017-10-01":"2018-10-01",:].index,
    mdf.loc["2017-10-01":"2018-10-01","deaths"],
    s=100)
plt.plot(
    mdf.loc["2017-10-01":"2018-10-01",:].index,
    mdf.loc["2017-10-01":"2018-10-01","deaths"],
    alpha=0.5)
plt.xticks(
    mdf.loc["2017-10-01":"2018-10-01",:].index, 
    rotation=45, 
    ha="right")
plt.xlabel("date")
plt.ylabel("deaths per month")
plt.grid()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [117]:
# same data, but as a cumulative sum:
plt.figure()
plt.title("""Texas 2017-2018 flu season
flu and pneumonia related cumulative deaths""")
plt.scatter(
    mdf.loc["2017-10-01":"2018-10-01",:].index,
    mdf.loc["2017-10-01":"2018-10-01","deaths"].cumsum(),
    s=100)
plt.plot(
    mdf.loc["2017-10-01":"2018-10-01",:].index,
    mdf.loc["2017-10-01":"2018-10-01","deaths"].cumsum(),
    alpha=0.5)
plt.xticks(
    mdf.loc["2017-10-01":"2018-10-01",:].index, 
    rotation=45, 
    ha="right")
plt.xlabel("date")
plt.ylabel("cumulative deaths")
plt.grid()
plt.tight_layout()

<IPython.core.display.Javascript object>

In [181]:
# slice out 2017-2018 flu season data:
flu_season_deaths = mdf.loc["2017-10-01":"2018-10-01",:]
monthly_flu_deaths = flu_season_deaths.resample("M").sum()
monthly_flu_deaths["cumulative"] = monthly_flu_deaths.cumsum()

# calculate days since season start:
monthly_flu_deaths["days_since"] = monthly_flu_deaths.index - monthly_flu_deaths.index[0]
monthly_flu_deaths.loc[:,"days_since"] = monthly_flu_deaths.days_since.dt.days
monthly_flu_deaths

Unnamed: 0_level_0,deaths,cumulative,days_since
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-10-31,881.0,881.0,0
2017-11-30,887.0,1768.0,30
2017-12-31,1341.0,3109.0,61
2018-01-31,2202.0,5311.0,92
2018-02-28,1500.0,6811.0,120
2018-03-31,1138.0,7949.0,151
2018-04-30,827.0,8776.0,181
2018-05-31,863.0,9639.0,212
2018-06-30,827.0,10466.0,242
2018-07-31,745.0,11211.0,273


# COVID-19 deaths data per month

In [164]:
# read deaths data:
import dshstexas
covid_deaths = dshstexas.read(
    "bin/data-2020-09-11/TexasCOVID19DailyCountyFatalityCountData.xlsx",
    datetime_header=True)

# we don't care about county-level data here, so let's index by date 
# instead of county:
covid_deaths = covid_deaths.transpose()
covid_deaths["cumulative"] = covid_deaths.sum(axis=1)
covid_deaths = pd.DataFrame(covid_deaths.loc[:,"cumulative"])

# let's get deaths per day:
covid_deaths["deaths"] = covid_deaths.cumulative.diff()

# deaths per month:
monthly_covid_deaths = pd.DataFrame(covid_deaths.deaths.resample("M", label="left").sum())
monthly_covid_deaths["cumulative"] = monthly_covid_deaths.deaths.cumsum()

# correct datetime index:
monthly_covid_deaths.index = monthly_covid_deaths.index + pd.Timedelta(days=1)

monthly_covid_deaths

Unnamed: 0,deaths,cumulative
2020-03-01,99.0,99.0
2020-04-01,829.0,928.0
2020-05-01,815.0,1743.0
2020-06-01,1208.0,2951.0
2020-07-01,5919.0,8870.0
2020-08-01,3671.0,12541.0
2020-09-01,114.0,12655.0


In [165]:
plt.figure()
plt.scatter(
    monthly_covid_deaths.index, 
    monthly_covid_deaths.cumulative,
    s=100)
plt.plot(
    monthly_covid_deaths.index,
    monthly_covid_deaths.cumulative,
    alpha=0.5)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f574e42ebd0>]

In [173]:
monthly_covid_deaths["days_since"] = monthly_covid_deaths.index - monthly_covid_deaths.index[0]
monthly_covid_deaths.loc[:,"days_since"] = monthly_covid_deaths.days_since.dt.days
monthly_covid_deaths

Unnamed: 0,deaths,cumulative,days_since
2020-03-01,99.0,99.0,0
2020-04-01,829.0,928.0,31
2020-05-01,815.0,1743.0,61
2020-06-01,1208.0,2951.0,92
2020-07-01,5919.0,8870.0,122
2020-08-01,3671.0,12541.0,153
2020-09-01,114.0,12655.0,184


In [174]:
plt.figure()
plt.scatter(
    monthly_covid_deaths.days_since,
    monthly_covid_deaths.cumulative,
    s=100)
plt.plot(
    monthly_covid_deaths.days_since,
    monthly_covid_deaths.cumulative,
    alpha=0.5)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x7f574c326490>]

# Compare 2017-2018 flu and available COVID-19 death data

In [203]:
plt.figure()
plt.title("""Texas COVID-19 and the 2017-2018 flu season
cumulative deaths comparison""")

# flu data:
flu_label="""2017-2018 flu season deaths
365 days; %d deaths""" %(monthly_flu_deaths.cumulative[-1])
plt.scatter(
    monthly_flu_deaths.days_since,
    monthly_flu_deaths.cumulative,
    s=100,
    color="green",
    label=flu_label)
plt.plot(
    monthly_flu_deaths.days_since,
    monthly_flu_deaths.cumulative,
    alpha=0.5,
    color="green",
    label="")

# COVID-19 data:
covid_label="""COVID-19 deaths
%d days; %d deaths""" %(
    monthly_covid_deaths.days_since[-1],
    monthly_covid_deaths.cumulative[-1])
plt.scatter(
    monthly_covid_deaths.days_since,
    monthly_covid_deaths.cumulative,
    s=100,
    color="blue",
    label=covid_label)
plt.plot(
    monthly_covid_deaths.days_since,
    monthly_covid_deaths.cumulative,
    alpha=0.5,
    color="blue",
    label="")

# finish plot:
plt.grid()
plt.legend()
plt.xlabel("days since flu season start\nor COVID-19 reporting start")
plt.ylabel("cumulative deaths")
plt.tight_layout()

# save figure:
plt.savefig("bin/PRELIM-2017-and-2018-flu-compared-to-COVID-19.png",dpi=200)

<IPython.core.display.Javascript object>