# Deaths in 2020 - Percentage above normal, by state

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
import altair as alt
import altair_latimes as lat

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

In [6]:
df = pd.read_csv(
    "input/deaths_by_jurisdiction.csv",
    parse_dates=["week_ending_date"],
    dtype={"mmwr_year": str, "mmwr_week": int},
)

### Make a copy of the dataframe, excluding this year

In [7]:
# df["week_ending_date"] = pd.to_datetime(df["week_ending_date"])

In [8]:
df["mmwr_year"] = df["mmwr_year"].astype(str)

In [9]:
deaths = df[df["mmwr_year"] <= "2020"].copy()

In [10]:
deaths.columns

Index(['jurisdiction_of_occurrence', 'mmwr_year', 'mmwr_week',
       'week_ending_date', 'all_cause', 'natural_cause'],
      dtype='object')

In [11]:
states_grp = (
    deaths.groupby(["jurisdiction_of_occurrence", "mmwr_year"])
    .agg({"all_cause": sum})
    .reset_index()
)

### Widen the dataframe 

In [12]:
states_pivot = pd.pivot_table(
    states_grp,
    values="all_cause",
    index="jurisdiction_of_occurrence",
    columns="mmwr_year",
    aggfunc=sum,
).reset_index()

In [13]:
states_pivot.head()

mmwr_year,jurisdiction_of_occurrence,2014,2015,2016,2017,2018,2019,2020
0,Alabama,50229.0,50661.0,51130.0,52132.0,53146.0,53057.0,64271.0
1,Alaska,4081.0,4170.0,4305.0,4255.0,4289.0,4503.0,5048.0
2,Arizona,52761.0,54382.0,56583.0,57885.0,59495.0,60450.0,78217.0
3,Arkansas,30508.0,30830.0,30847.0,31707.0,31744.0,32183.0,38349.0
4,California,250552.0,258512.0,260595.0,267106.0,268145.0,268775.0,326077.0


### Mean of the pre-pandemic years

In [14]:
col_list = list(states_pivot)
col_list.remove("2020")

In [15]:
states_pivot["2014_2019_avg"] = states_pivot[col_list].mean(axis=1).round()

### Percentage change: 2014-2019 to 2020

In [16]:
states_pivot["pct_change"] = (
    (states_pivot["2020"] - states_pivot["2014_2019_avg"])
    / states_pivot["2014_2019_avg"]
).round(2)

In [17]:
states_pivot_slim = states_pivot[
    ["jurisdiction_of_occurrence", "2014_2019_avg", "2020", "pct_change"]
]

In [18]:
states_pivot_slim.sort_values("pct_change", ascending=False).head()

mmwr_year,jurisdiction_of_occurrence,2014_2019_avg,2020,pct_change
33,New York City,53926.0,83069.0,0.54
2,Arizona,56926.0,78217.0,0.37
30,New Jersey,72599.0,96739.0,0.33
45,Texas,197052.0,257039.0,0.3
31,New Mexico,17829.0,23093.0,0.3


In [19]:
states_pivot_slim.sort_values("pct_change", ascending=False).tail()

mmwr_year,jurisdiction_of_occurrence,2014_2019_avg,2020,pct_change
19,Maine,14269.0,15851.0,0.11
11,Hawaii,11242.0,12204.0,0.09
48,Vermont,5710.0,6252.0,0.09
40,Puerto Rico,29441.0,31337.0,0.06
34,North Carolina,92091.0,97954.0,0.06


---

### Chart it

In [20]:
base = alt.Chart(states_pivot_slim).encode(
    x="pct_change:Q",
    y=alt.Y(
        "jurisdiction_of_occurrence:O",
        sort="-x",
        title=" ",
        axis=alt.Axis(
            tickSize=0,
            domainOpacity=0,
            tickCount=4,
            offset=4,
            gridWidth=0.6,
            gridColor="#dddddd",
        ),
    ),
)

bars = base.mark_bar().encode(
    x=alt.X("pct_change:Q", axis=alt.Axis(format="%", tickCount=6, grid=False)),
    y=alt.Y(
        "jurisdiction_of_occurrence:O",
        sort="-x",
        title=" ",
        axis=alt.Axis(
            tickSize=0,
            domainOpacity=0,
            tickCount=4,
            offset=4,
            gridWidth=0.6,
            gridColor="#dddddd",
        ),
    ),
    color=alt.condition(
        alt.datum.jurisdiction_of_occurrence == "California",
        alt.value("#3580b1"),
        alt.value("#82c6df"),
    ),
)

text = base.mark_text(
    align="left",
    baseline="middle",
    dx=4,
).encode(text=alt.Text("pct_change:Q", format=".0%"))

viz = (
    (bars + text)
    .properties(
        height=1200,
        width=320,
        title="Percentage increase in deaths in 2020 vs. 2014-2019",
    )
    .configure_view(strokeOpacity=0)
)

In [21]:
viz

In [22]:
viz.save("visualization_state_bars.png")

---

### Export 

In [23]:
states_pivot_slim.to_csv("output/states_pct_change_all_deaths.csv", index=False)