# Analyzing CDC deaths in California re: Covid-19

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

### Load deaths data from past years and 2020

In [6]:
# metadata: https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/muzy-jte6

In [7]:
url_current = "https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD"

In [8]:
# #metadata: https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/3yf8-kanr

In [9]:
url_history = "https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD"

### Read the current data (2020-21) and the historical data (2014-2019)

In [10]:
df_current = pd.read_csv(
    url_current,
    parse_dates=["Week Ending Date"],
    dtype={"MMWR Year": str, "MMWR Week": str},
)

In [11]:
df_history = pd.read_csv(
    url_history,
    parse_dates=["Week Ending Date"],
    dtype={"MMWR Year": str},
)

### Clean up slight differences in two dataframes' column headers

In [12]:
df_history.rename(columns={"All  Cause": "All Cause"}, inplace=True)

### Concatenate them into one frame

In [13]:
df = pd.concat(
    [
        df_history[
            [
                "Jurisdiction of Occurrence",
                "MMWR Year",
                "MMWR Week",
                "Week Ending Date",
                "All Cause",
                "Natural Cause",
            ]
        ],
        df_current[
            [
                "Jurisdiction of Occurrence",
                "MMWR Year",
                "MMWR Week",
                "Week Ending Date",
                "All Cause",
                "Natural Cause",
            ]
        ],
    ]
)

### Strip out junk from column headers

In [14]:
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
    .str.replace("*", "", regex=False)
    .str.replace("(s)", "s", regex=False)
)

### Make a copy of the dataframe, excluding this year

In [15]:
deaths = df[df["week_ending_date"] < "2021/01/01"].copy()

### Make the data tall

In [16]:
deaths_melt = pd.melt(
    deaths,
    id_vars=[
        "jurisdiction_of_occurrence",
        "mmwr_year",
        "mmwr_week",
        "week_ending_date",
    ],
    value_vars=["all_cause", "natural_cause"],
    var_name="cause",
    value_name="count",
)

### Types

In [17]:
deaths_melt["mmwr_year"] = deaths_melt["mmwr_year"].astype(str)
deaths_melt["mmwr_week"] = deaths_melt["mmwr_week"].astype(int)

### Just deaths in California

In [18]:
ca_deaths = deaths_melt[deaths_melt["jurisdiction_of_occurrence"] == "California"]

### What's the mean number of deaths per week, 2014-2019?

In [19]:
ca_deaths.head(10)

Unnamed: 0,jurisdiction_of_occurrence,mmwr_year,mmwr_week,week_ending_date,cause,count
1565,California,2014,1,2014-01-04,all_cause,5263.0
1566,California,2014,2,2014-01-11,all_cause,5362.0
1567,California,2014,3,2014-01-18,all_cause,5502.0
1568,California,2014,4,2014-01-25,all_cause,5326.0
1569,California,2014,5,2014-02-01,all_cause,5184.0
1570,California,2014,6,2014-02-08,all_cause,5176.0
1571,California,2014,7,2014-02-15,all_cause,5075.0
1572,California,2014,8,2014-02-22,all_cause,4933.0
1573,California,2014,9,2014-03-01,all_cause,4741.0
1574,California,2014,10,2014-03-08,all_cause,4743.0


In [20]:
all_ca_deaths_then = ca_deaths[
    (ca_deaths["mmwr_year"] != "2020") & (ca_deaths["cause"] == "all_cause")
]

In [21]:
all_ca_deaths_now = ca_deaths[
    (ca_deaths["mmwr_year"] == "2020") & (ca_deaths["cause"] == "all_cause")
]

In [22]:
ca_deaths_then_grouped = (
    all_ca_deaths_then.groupby(["mmwr_week"]).agg({"count": "mean"}).reset_index()
)

In [23]:
ca_deaths_now_grouped = (
    all_ca_deaths_now.groupby(["mmwr_week", "week_ending_date"])
    .agg({"count": "mean"})
    .reset_index()
)

In [24]:
ca_deaths_then_grouped["count"] = ca_deaths_then_grouped["count"].astype(int)

In [25]:
then_now = ca_deaths_then_grouped.merge(
    ca_deaths_now_grouped, on="mmwr_week", how="inner"
)

In [26]:
then_now.rename(
    columns={"mmwr_week": "week", "count_x": "Normal", "count_y": "2020"}, inplace=True
)

In [27]:
then_now_melt = pd.melt(
    then_now,
    id_vars=["week", "week_ending_date"],
    value_vars=["Normal", "2020"],
    var_name="year",
    value_name="count",
)

In [28]:
then_now_melt["week_ending_date"] = then_now_melt["week_ending_date"].astype(
    "datetime64[ns]"
)

In [29]:
then_now_melt.dtypes

week                         int64
week_ending_date    datetime64[ns]
year                        object
count                      float64
dtype: object

In [30]:
then_now_melt.to_csv("output/then_now_melt_ca.csv", index=False)

In [31]:
then_now_melt = then_now_melt.sort_values("year", ascending=True)

### Chart it

In [32]:
domain = ["2020", "Normal"]
range_ = ["#82c6df", "#3580b1"]

chart = (
    alt.Chart(then_now_melt)
    .mark_area(opacity=0.5)
    .encode(
        x=alt.X(
            "week_ending_date:T",
            title="",
            axis=alt.Axis(format="%B", tickCount=10, grid=False),
        ),
        y=alt.Y(
            "count",
            title=" ",
            stack=None,
            axis=alt.Axis(
                tickSize=0,
                domainOpacity=0,
                tickCount=4,
                offset=4,
                gridWidth=0.6,
                gridColor="#dddddd",
                format="",
            ),
            scale=alt.Scale(domain=(3000, 10000)),
        ),
        color=alt.Color(
            "year", title=" ", scale=alt.Scale(domain=domain, range=range_)
        ),
    )
    .properties(
        width=620,
        height=350,
        title="Average weekly deaths in California vs. deaths in 2020",
    )
    .configure_view(strokeOpacity=0)
    .configure_legend(orient="top", symbolType="square")
)
chart

In [33]:
chart.save("visualization_ca.png")