### setup

In [1]:
%load_ext lab_black

In [2]:
import requests
import urllib.request
import numpy as np
import datetime
import random

In [3]:
import pandas as pd

# import gspread
# from oauth2client.service_account import ServiceAccountCredentials

In [4]:
import altair as alt
import altair_stiles as altstiles

alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [5]:
from datawrapper import Datawrapper

dw = Datawrapper(
    access_token="FtIwtvFtoGLaRT9a3gjX69PLu4wSuRyKddoOz6SOPw3k9wWyNICMHTkcPhOGCR5Z"
)

In [6]:
# scope = ["https://spreadsheets.google.com/feeds"]
# credentials = ServiceAccountCredentials.from_json_keyfile_name(
#    "jupyter-integration-credentials.json", scope
# )
# gc = gspread.authorize(credentials)

In [7]:
# spreadsheet_key = "1_RXzXkHPEyDAiDKmz98wTSC9UNo-8OyjGTT540X_vJk"
# book = gc.open_by_key(spreadsheet_key)

In [8]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [9]:
# Opening the worksheet by using Worksheet ID
# workbook = gc.open_by_key(spreadsheet_key)
# Selecting which sheet to pulling the data
# sheet = workbook.worksheet("Sheet1")
# Pulling the data and transform it to the data frame
# values = sheet.get_all_values()

#### read in data

In [10]:
raw = pd.read_csv("data/raw/NCHS_quarterly_provisional.csv")

clean

In [11]:
raw.columns = raw.columns.str.lower().str.replace(" ", "_")

In [12]:
raw["date"] = (
    raw["year_and_quarter"]
    .str.replace(" Q1", "-03-31")
    .str.replace(" Q2", "-06-30")
    .str.replace(" Q3", "-09-30")
    .str.replace(" Q4", "-12-31")
)

In [13]:
raw["date"] = pd.to_datetime(raw["date"])

### covid by itself
age-adjusted rates and 3-month rather than 12-month to capture shifts from vaccines and variants

In [14]:
covid = raw[
    (raw["time_period"] == "3-month period")
    & (raw["cause_of_death"] == "COVID-19")
    & (raw["rate_type"] == "Age-adjusted")
    & (~raw["overall_rate"].isna())
].drop(list(raw)[8:69], axis=1)

In [15]:
covid["rate"] = covid["overall_rate"].astype(float)

In [16]:
alt.Chart(covid).mark_bar().encode(x="date", y="rate").properties(width=500)

In [17]:
alt.Chart(covid).mark_line(point=True).encode(x="date", y="rate").properties(width=500)

In [18]:
covid_slim = covid[["date", "rate"]]

In [19]:
covid_slim

Unnamed: 0,date,rate
200,2020-03-31,6.7
244,2020-06-30,109.3
288,2020-09-30,69.8
332,2020-12-31,152.7
376,2021-03-31,157.9
420,2021-06-30,34.9
464,2021-09-30,115.7
508,2021-12-31,107.5


In [20]:
dw.add_data(chart_id="kbU31", data=covid_slim)

<Response [204]>

### causes of interest?

In [21]:
causes = [
    "Suicide",
    "Heart disease",
    "COVID-19",
    "Chronic liver disease and cirrhosis",
    "Unintentional injuries",
]

In [22]:
raw.value_counts("time_period")

time_period
12 months ending with quarter    528
3-month period                   528
dtype: int64

In [23]:
causes_of_interest = raw[
    (raw["time_period"] == "12 months ending with quarter")
    & (raw["cause_of_death"].isin(causes))
    & (raw["rate_type"] == "Age-adjusted")
    & (~raw["overall_rate"].isna())
].drop(list(raw)[8:69], axis=1)

In [24]:
causes_of_interest["rate"] = causes_of_interest["overall_rate"].astype(float)

In [25]:
causes_of_interest_slim = causes_of_interest[["date", "cause_of_death", "rate"]]

In [26]:
causes_of_interest_annual = causes_of_interest[
    causes_of_interest["year_and_quarter"].str.contains("Q4")
][["date", "cause_of_death", "rate"]]

causes_of_interest_annual["year"] = causes_of_interest_annual["date"].dt.year

In [27]:
alt.Chart(causes_of_interest_slim).mark_line().encode(
    x="date", y="rate", color="cause_of_death:N"
)

In [28]:
alt.Chart(causes_of_interest_slim).mark_line().encode(
    x=alt.X("date"), y=alt.Y("rate")
).properties(width=300, height=120).facet(
    facet=alt.Facet("cause_of_death:O"), columns=2
)

In [29]:
alt.Chart(causes_of_interest_slim).mark_bar().encode(
    x=alt.X("date"), y=alt.Y("rate")
).properties(width=120, height=120).facet(
    facet=alt.Facet("cause_of_death:O"), columns=2
)

play with increases and decreases in rates of each cause of death (i guess they would all be increases if they cause life expectancy to go down?)

In [30]:
chg = (
    causes_of_interest_slim.pivot(index="date", columns="cause_of_death", values="rate")
    .sort_values("date", ascending=True)
    .reset_index()
)

In [31]:
chg

cause_of_death,date,COVID-19,Chronic liver disease and cirrhosis,Heart disease,Suicide,Unintentional injuries
0,2019-03-31,,11.0,161.4,14.2,47.7
1,2019-06-30,,11.2,161.7,14.1,48.0
2,2019-09-30,,11.2,161.8,14.0,48.4
3,2019-12-31,,11.3,161.5,13.9,49.3
4,2020-03-31,1.7,11.5,160.8,13.9,50.3
5,2020-06-30,29.1,11.8,162.7,13.7,52.9
6,2020-09-30,46.5,12.5,165.5,13.6,55.7
7,2020-12-31,85.0,13.3,168.2,13.5,57.6
8,2021-03-31,120.8,13.9,167.5,13.5,59.9
9,2021-06-30,106.2,14.2,173.3,13.6,62.0


In [32]:
chg["Heart disease"] = chg["Heart disease"] - chg["Heart disease"].shift(periods=1)

In [33]:
chg["Chronic liver disease and cirrhosis"] = chg[
    "Chronic liver disease and cirrhosis"
] - chg["Chronic liver disease and cirrhosis"].shift(periods=1)
chg["Suicide"] = chg["Suicide"] - chg["Suicide"].shift(periods=1)
chg["Unintentional injuries"] = chg["Unintentional injuries"] - chg[
    "Unintentional injuries"
].shift(periods=1)

In [34]:
chg = chg.drop(columns={"COVID-19"})

In [35]:
chg.columns

Index(['date', 'Chronic liver disease and cirrhosis', 'Heart disease',
       'Suicide', 'Unintentional injuries'],
      dtype='object', name='cause_of_death')

In [36]:
chg_long = pd.melt(
    chg,
    id_vars="date",
    value_vars=[
        "Chronic liver disease and cirrhosis",
        "Heart disease",
        "Suicide",
        "Unintentional injuries",
    ],
)

In [37]:
(
    alt.Chart(chg_long)
    .mark_bar()
    .encode(
        x=alt.X("date"),
        y=alt.Y("value"),
        color=alt.condition(
            alt.datum.value > 0, alt.value("#00eeef"), alt.value("orange")
        ),
    )
    .properties(width=120, height=120)
    .facet(facet=alt.Facet("cause_of_death"), columns=2)
)