# Analyzing CDC deaths in California re: Covid-19

In [1]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Load deaths data from past years and 2020

In [2]:
# metadata: https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/muzy-jte6

In [3]:
url_current = "https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD"

In [4]:
# #metadata: https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/3yf8-kanr

In [5]:
url_history = "https://data.cdc.gov/api/views/3yf8-kanr/rows.csv?accessType=DOWNLOAD"

### Read the current data (2020-21) and the historical data (2014-2019)

In [6]:
df_current = pd.read_csv(
    url_current,
    parse_dates=["Week Ending Date"],
    dtype={"MMWR Year": str, "MMWR Week": str},
)

In [7]:
df_history = pd.read_csv(
    url_history,
    parse_dates=["Week Ending Date"],
    dtype={"MMWR Year": str},
)

### Clean up slight differences in two dataframes' column headers

In [8]:
df_history.rename(columns={"All  Cause": "All Cause"}, inplace=True)

### Concatenate them into one frame

In [9]:
df = pd.concat(
    [
        df_history[
            [
                "Jurisdiction of Occurrence",
                "MMWR Year",
                "MMWR Week",
                "Week Ending Date",
                "All Cause",
                "Natural Cause",
            ]
        ],
        df_current[
            [
                "Jurisdiction of Occurrence",
                "MMWR Year",
                "MMWR Week",
                "Week Ending Date",
                "All Cause",
                "Natural Cause",
            ]
        ],
    ]
)

### Strip out junk from column headers

In [10]:
df.columns = (
    df.columns.str.strip()
    .str.lower()
    .str.replace(" ", "_", regex=False)
    .str.replace(":", "", regex=False)
    .str.replace("/", "_", regex=False)
    .str.replace(",", "_", regex=False)
    .str.replace("*", "", regex=False)
    .str.replace("(s)", "s", regex=False)
)

### Make a copy of the dataframe, excluding this year (and only the U.S. as a juridiction)

In [11]:
deaths = df[(df['week_ending_date'] < '2021/01/01') & (df['jurisdiction_of_occurrence'] == 'United States')].copy()

### Make the data tall

In [12]:
deaths_melt = pd.melt(deaths, id_vars=['jurisdiction_of_occurrence', 'mmwr_year', 'mmwr_week', 'week_ending_date']\
    , value_vars=['all_cause', 'natural_cause'],
        var_name='cause', value_name='count')

In [13]:
deaths_melt['mmwr_year'] = deaths_melt['mmwr_year'].astype(str)
deaths_melt['mmwr_week'] = deaths_melt['mmwr_week'].astype(int)

### Just California

In [14]:
usa_deaths = deaths_melt.copy()

### What's the mean number of deaths per week, 2014-2019?

In [15]:
all_usa_deaths_then = usa_deaths[(usa_deaths['mmwr_year'] != '2020') & (usa_deaths['cause'] == 'all_cause')]

In [16]:
all_usa_deaths_now = usa_deaths[(usa_deaths['mmwr_year'] == '2020') & (usa_deaths['cause'] == 'all_cause')] 

In [17]:
usa_deaths_then_grouped = all_usa_deaths_then.groupby(['mmwr_week']).agg({'count':'mean'}).reset_index()

In [18]:
usa_deaths_now_grouped = all_usa_deaths_now.groupby(['mmwr_week', 'week_ending_date']).agg({'count':'mean'}).reset_index()

In [19]:
usa_deaths_then_grouped['count'] = usa_deaths_then_grouped['count'].astype(int)

In [20]:
then_now = usa_deaths_then_grouped.merge(usa_deaths_now_grouped, on='mmwr_week', how='inner')

In [21]:
then_now.rename(columns={"mmwr_week": "week", "count_x": "Normal", "count_y": "2020"}, inplace=True)

In [22]:
then_now_melt = pd.melt(then_now, id_vars=['week', 'week_ending_date']\
                      , value_vars=['Normal', '2020'],
        var_name='year', value_name='count')

In [23]:
then_now_melt['week_ending_date'] = then_now_melt['week_ending_date'].astype('datetime64[ns]')

In [24]:
then_now_melt.dtypes

week                         int64
week_ending_date    datetime64[ns]
year                        object
count                      float64
dtype: object

In [25]:
then_now_melt.to_csv('output/then_now_melt.csv', index=False)

In [26]:
then_now_melt = then_now_melt.sort_values('year', ascending=True)

### Chart it

In [27]:
domain = ['2020', 'Normal']
range_ = ['#82c6df', '#3580b1']

chart = alt.Chart(then_now_melt).mark_area(opacity=.5).encode(
    x=alt.X('week_ending_date:T', title='',axis=alt.Axis(format='%B', tickCount=10, grid=False)),
    y=alt.Y('count', title=' ', stack=None, axis=alt.Axis(tickSize=0,domainOpacity=0,\
                                   tickCount=4,offset=4, gridWidth=.6, gridColor='#dddddd', format=''), \
            scale=alt.Scale(domain=(3000,100000))),
    color=alt.Color('year', title=' ', scale=alt.Scale(domain=domain, range=range_))
).properties(width=620, height=350,
    title='Average weekly deaths in United States vs. deaths in 2020'
 ).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='square'
)
chart

In [28]:
chart.save('visualization_usa.png')