# Summary
- **Description:** The [dataset](https://data.cdc.gov/NCHS/Provisional-COVID-19-Deaths-by-Sex-and-Age/9bhg-hcku/about_data), sourced from the CDC, contains provisional COVID-19 death counts by age group, sex, state, etc.
- **Main question:** How do COVID-19 mortality trends vary across different age groups and sexes?
- **Link to recording:** https://indiana-my.sharepoint.com/:v:/g/personal/nabdelaz_iu_edu/EWeyapuOGIdDsgjR62zAopIBXzGOjnv-Wn-AGWXHfhBk6w?e=8oX35g&nav=eyJyZWZlcnJhbEluZm8iOnsicmVmZXJyYWxBcHAiOiJTdHJlYW1XZWJBcHAiLCJyZWZlcnJhbFZpZXciOiJTaGFyZURpYWxvZy1MaW5rIiwicmVmZXJyYWxBcHBQbGF0Zm9ybSI6IldlYiIsInJlZmVycmFsTW9kZSI6InZpZXcifX0%3D

In [None]:
!pip install dash
!pip install us

import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import us
from dash import Dash, html, dcc, Input, Output, callback



In [None]:
# @title main code!
# PREPROCESSING:
df = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vQyz_547X0UIXtKSWfesU1LFvu2YzZUMCGggeXWVEWky5Mpu6gVWT07K6vdK8JDyECKROZPelK7gBDd/pub?gid=419818930&single=true&output=csv')

# Filter only rows where "Month" is empty (NaN)
df = df[df["Month"].isna()]

# Filter out rows where "Year" is empty (NaN)
df = df[~df["Year"].isna()]

# New column for state codes
df["State Code"] = df["State"].apply(lambda x: us.states.lookup(x).abbr if us.states.lookup(x) else None)

# Estimated population counts (2020 - from CDC)
state_populations = {
    "Alabama": 5024279,
    "Alaska": 733391,
    "Arizona": 7151502,
    "Arkansas": 3011524,
    "California": 39538223,
    "Colorado": 5773714,
    "Connecticut": 3605944,
    "Delaware": 989948,
    "Florida": 21538187,
    "Georgia": 10711908,
    "Hawaii": 1455271,
    "Idaho": 1839106,
    "Illinois": 12812508,
    "Indiana": 6785528,
    "Iowa": 3190369,
    "Kansas": 2937880,
    "Kentucky": 4505836,
    "Louisiana": 4657757,
    "Maine": 1362359,
    "Maryland": 6177224,
    "Massachusetts": 7029917,
    "Michigan": 10077331,
    "Minnesota": 5706494,
    "Mississippi": 2961279,
    "Missouri": 6154913,
    "Montana": 1084225,
    "Nebraska": 1961504,
    "Nevada": 3104614,
    "New Hampshire": 1377529,
    "New Jersey": 9288994,
    "New Mexico": 2117522,
    "New York": 20201249,
    "North Carolina": 10439388,
    "North Dakota": 779094,
    "Ohio": 11799448,
    "Oklahoma": 3959353,
    "Oregon": 4237256,
    "Pennsylvania": 13002700,
    "Rhode Island": 1097379,
    "South Carolina": 5118425,
    "South Dakota": 886667,
    "Tennessee": 6910840,
    "Texas": 29145505,
    "Utah": 3271616,
    "Vermont": 643077,
    "Virginia": 8631393,
    "Washington": 7693612,
    "West Virginia": 1793716,
    "Wisconsin": 5893718,
    "Wyoming": 576851
}

# Add population column
df["Population"] = df["State"].map(state_populations)

# Calculate deaths per 100,000 people
df["Deaths per 100k"] = (df["COVID-19 Deaths"] / df["Population"]) * 100000


app = Dash("Final Web App")

app.layout = html.Div(children=[
    html.H1(children='COVID-19 Cases in the US'),

    # Slider for selecting a year
    dcc.Slider(
        id="year-slider",
        min=int(df["Year"].min()),
        max=int(df["Year"].max()),
        value=int(df["Year"].min()),
        step=1,
        marks={int(year): str(int(year)) for year in sorted(df["Year"].unique())}
    ),

    # Map: COVID-19 deaths per state
    dcc.Graph(id="map-graph"),
    # Radio to output graph based on either age or sex
    dcc.RadioItems(
        id="graph-mode",
        options=[
            {"label": "Group by Age", "value": "Age Group"},
            {"label": "Group by Sex", "value": "Sex"}
        ],
        value="Age Group",
        labelStyle={'display': 'inline-block', 'margin-right': '15px'}
    ),
    dcc.Dropdown(
          id='graph-type',
          options=[
              {"label": "Bar Chart", "value": "bar"},
              {"label": "Line Chart", "value": "line"}
          ],
          value="bar",
          clearable=False,
          style={'width': '300px', 'margin-bottom': '20px'}
    ),
    # Graph: COVID-19 deaths
    dcc.Graph(id="graph"),

])

# Callback for map
@app.callback(
    Output("map-graph", "figure"),
    Input("year-slider", "value")
)
def update_map(year):
  map_df = df[(df["Year"] == year) & (df["Age Group"] == "All Ages") & (df["Sex"] == "All Sexes") & (df["State"] != "United States")]

  # Change brightness of text on map
  z = map_df["Deaths per 100k"]
  z_norm = (z - z.min()) / (z.max() - z.min())
  fig = go.Figure(data=go.Choropleth(
      locations=map_df["State Code"],
      z=z,
      locationmode="USA-states",
      colorscale='Reds',
      colorbar_title="Deaths per 100k",
      text=map_df["State Code"],
      customdata=map_df[["State", "COVID-19 Deaths", "Deaths per 100k"]],
      hovertemplate="<b>%{customdata[0]}</b><br>Total Deaths: %{customdata[1]:,.0f}<br>Deaths per 100k: %{customdata[2]:.1f}<extra></extra>"
  ))

  # Output state codes on map
  for i, row in map_df.iterrows():
    val = row["Deaths per 100k"]
    norm_val = (val - z.min()) / (z.max() - z.min())
    text_color = "white" if norm_val > 0.5 else "black"

    fig.add_trace(go.Scattergeo(
        locationmode="USA-states",
        locations=[row["State Code"]],
        text=row["State Code"],
        mode="text",
        showlegend=False,
        textfont=dict(color=text_color, size=10),
        hoverinfo="skip"
    ))

  fig.update_layout(
    title=f"COVID-19 Deaths per 100k People by State in {year}",
    geo=dict(
        scope="usa",
        showlakes=False,
        lakecolor="LightBlue"
    ),
    dragmode=False
  )

  fig.add_annotation(
    x=0.45,
    y=0.85,
    xref="paper",
    yref="paper",
    text="Unusually high for North Dakota",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,
    bgcolor="white",
    font=dict(size=12, color="black")
  )


  return fig

# Callback for graph
@app.callback(
    Output("graph", "figure"),
    Input("year-slider", "value"),
    Input("map-graph", "clickData"),
    Input("graph-mode", "value"),
    Input("graph-type", "value")
)
def update_graph(year, clickData, mode, graphType):
  # Default to the united states if no state was clicked on
  if clickData:
    state_code = clickData["points"][0]["location"]
    full_state = us.states.lookup(state_code).name
  else:
    full_state = "United States"

  graph_df = df[df["State"] == full_state]

  # LINE GRAPH
  if graphType == "line":
    if mode == "Age Group":
      graph_df = graph_df[(graph_df["Age Group"] != "All Ages") & (graph_df["Sex"] == "All Sexes")]
      valid_ages = ["0-17 years", "18-29 years", "30-39 years", "40-49 years", "50-64 years", "65-74 years", "75-84 years", "85 years and over"]
      graph_df = graph_df[graph_df["Age Group"].isin(valid_ages)]
      age_mapping = {
          "0-17 years": "0-17",
          "18-29 years": "18-39", "30-39 years": "18-39",
          "40-49 years": "40-64", "50-64 years": "40-64",
          "65-74 years": "65-84", "75-84 years": "65-84",
          "85 years and over": "85+"
      }
      graph_df["Simplified Age Group"] = graph_df["Age Group"].map(age_mapping)
      grouped_df = graph_df.groupby(["Year", "Simplified Age Group"], as_index=False)["COVID-19 Deaths"].sum()
      fig = px.line(grouped_df, x="Year", y="COVID-19 Deaths", color="Simplified Age Group",
                          title=f"COVID-19 Deaths Over Time by Age Group in {full_state}", markers=True)
    else:
      graph_df = graph_df[(graph_df["Age Group"] == "All Ages") & (graph_df["Sex"] != "All Sexes")]
      fig = px.line(graph_df, x="Year", y="COVID-19 Deaths", color="Sex",
                          title=f"COVID-19 Deaths Over Time by Sex in {full_state}", markers=True)

  # BAR GRAPH
  else:
    graph_df = graph_df[graph_df["Year"] == year]
    if mode == "Age Group":
      graph_df = graph_df[(graph_df["Age Group"] != "All Ages") & (graph_df["Sex"] == "All Sexes")]
      valid_ages = ["0-17 years", "18-29 years", "30-39 years", "40-49 years", "50-64 years", "65-74 years", "75-84 years", "85 years and over"]
      graph_df = graph_df[graph_df["Age Group"].isin(valid_ages)]
      age_mapping = {
          "0-17 years": "0-17",
          "18-29 years": "18-39", "30-39 years": "18-39",
          "40-49 years": "40-64", "50-64 years": "40-64",
          "65-74 years": "65-84", "75-84 years": "65-84",
          "85 years and over": "85+"
      }
      graph_df["Simplified Age Group"] = graph_df["Age Group"].map(age_mapping)
      grouped_df = graph_df.groupby("Simplified Age Group", as_index=False)["COVID-19 Deaths"].sum()
      age_order = ["0-17", "18-39", "40-64", "65-84", "85+"]
      grouped_df["Simplified Age Group"] = pd.Categorical(grouped_df["Simplified Age Group"],
                                                                categories=age_order,
                                                                ordered=True)
      grouped_df = grouped_df.sort_values("Simplified Age Group")
      fig = px.bar(grouped_df, x="Simplified Age Group", y="COVID-19 Deaths",
                         title=f"COVID-19 Deaths by Age Group in {full_state} ({year})",
                         color="Simplified Age Group", text="COVID-19 Deaths")
    else:
      graph_df = graph_df[(graph_df["Age Group"] == "All Ages") & (graph_df["Sex"] != "All Sexes")]
      grouped_df = graph_df.groupby("Sex", as_index=False)["COVID-19 Deaths"].sum()
      fig = px.bar(grouped_df, x="Sex", y="COVID-19 Deaths",
                         title=f"COVID-19 Deaths by Sex in {full_state} ({year})",
                         color="Sex", text="COVID-19 Deaths")

  if graphType == "bar":
    fig.update_traces(textposition="outside")
  return fig

if __name__ == '__main__':
    app.run(debug=True, jupyter_mode="inline", jupyter_height=1000)

<IPython.core.display.Javascript object>