In [65]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re

Load and inspect the data

In [43]:
tab=pd.read_csv("Provisional_Percent_of_Deaths_for_COVID-19,_Influenza,_and_RSV_by_Select_Characteristics_20251225.csv.gz",compression="gzip").drop_duplicates()
tab

Unnamed: 0,data_as_of,start_date,end_date,group,year,month,mmwr_week,weekending_date,state,demographic_type,demographic_values,pathogen,deaths,total_deaths,percent_deaths,provisional,suppressed
0,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Race/Ethnicity,"AI/AN, NH",Combined,,384,,,One or more data cells have counts between 1-9...
1,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Race/Ethnicity,"Black, NH",Combined,47,7498,0.63,,
2,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Race/Ethnicity,"White, NH",Combined,314,45526,0.69,,
3,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Race/Ethnicity,"Multiple/Other, NH",Combined,,292,,,One or more data cells have counts between 1-9...
4,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Race/Ethnicity,Not Available,Combined,0,0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19867,2025-12-17,2019-12-08,2019-12-14,By Week,2019,12,50,2019-12-14,United States,Sex,Male,COVID-19,0,29238,0.00,,
19868,2025-12-17,2019-12-15,2019-12-21,By Week,2019,12,51,2019-12-21,United States,Sex,Female,COVID-19,0,28228,0.00,,
19869,2025-12-17,2019-12-15,2019-12-21,By Week,2019,12,51,2019-12-21,United States,Sex,Male,COVID-19,0,29197,0.00,,
19870,2025-12-17,2019-12-22,2019-12-28,By Week,2019,12,52,2019-12-28,United States,Sex,Female,COVID-19,0,28365,0.00,,


Not immediately clear what some of the catrgorical fields mean, let us examine them more closely...

In [44]:
tab["group"].unique()

array(['By Week'], dtype=object)

In [45]:
tab["demographic_type"].unique()

array(['Race/Ethnicity', 'Age Group', 'Sex'], dtype=object)

In [46]:
tab["pathogen"].unique()

array(['Combined', 'COVID-19', 'Influenza', 'RSV'], dtype=object)

In [47]:
tab["group"].unique()

array(['By Week'], dtype=object)

Quick spot-check:

In [48]:
tab[(tab["mmwr_week"]==1)&(tab["year"]==2020)&(tab["demographic_values"]=="65+ years")]

Unnamed: 0,data_as_of,start_date,end_date,group,year,month,mmwr_week,weekending_date,state,demographic_type,demographic_values,pathogen,deaths,total_deaths,percent_deaths,provisional,suppressed
9525,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Age Group,65+ years,COVID-19,0,44657,0.0,,
10765,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Age Group,65+ years,Combined,239,44657,0.54,,
11800,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Age Group,65+ years,Influenza,227,44657,0.51,,
13145,2025-12-17,2019-12-29,2020-01-04,By Week,2020,1,1,2020-01-04,United States,Age Group,65+ years,RSV,12,44657,0.03,,


Confirms what data description website suggests : 'deaths' is what we want.

Subset

In [49]:
relevant=tab[(tab["demographic_type"]=="Age Group") & (tab["pathogen"]=="RSV")][["year","mmwr_week","deaths","suppressed","demographic_values"]]

In [50]:
relevant

Unnamed: 0,year,mmwr_week,deaths,suppressed,demographic_values
13145,2020,1,12,,65+ years
13146,2020,2,13,,65+ years
13147,2020,3,21,,65+ years
13148,2020,4,16,,65+ years
13149,2020,5,12,,65+ years
...,...,...,...,...,...
15312,2023,45,,One or more data cells have counts between 1-9...,0-17 years
15313,2023,46,,One or more data cells have counts between 1-9...,0-17 years
15314,2023,47,,One or more data cells have counts between 1-9...,0-17 years
15315,2023,48,,One or more data cells have counts between 1-9...,0-17 years


In [51]:
relevant["demographic_values"].unique()

array(['65+ years', '0-17 years', '18-64 years'], dtype=object)

looks like some are suppressed due to small amount of data. Let us remove missing data.

In [52]:
relevant=relevant[~relevant["deaths"].isna()]
relevant=relevant.drop(columns=["suppressed"])
relevant

Unnamed: 0,year,mmwr_week,deaths,demographic_values
13145,2020,1,12,65+ years
13146,2020,2,13,65+ years
13147,2020,3,21,65+ years
13148,2020,4,16,65+ years
13149,2020,5,12,65+ years
...,...,...,...,...
15307,2023,40,0,0-17 years
15308,2023,41,0,0-17 years
15309,2023,42,0,0-17 years
15310,2023,43,0,0-17 years


Similar to hispotalization analysis, let us subset to age cohorts who could be affected by changes in RSV vaccine.

In [53]:
relevant=relevant.rename({"demographic_values":"Age Category"},axis=1)

In [54]:
relevant["Age Category"].unique()

array(['65+ years', '0-17 years', '18-64 years'], dtype=object)

Spot-check: year x week x age should be unique

In [55]:
assert len(relevant[["year","mmwr_week","Age Category"]].drop_duplicates()) == len(relevant)

Cast deaths to int

In [56]:
relevant['deaths'] = relevant['deaths'].astype('int64')

Generate summary

In [57]:
summary = (
    relevant
    .groupby(["year", "Age Category"], as_index=False)
    .agg(Total_Deaths=("deaths", "sum"),
         Weeks_Observed=("mmwr_week", "nunique"),
))
summary

Unnamed: 0,year,Age Category,Total_Deaths,Weeks_Observed
0,2018,0-17 years,0,31
1,2018,18-64 years,0,28
2,2018,65+ years,124,30
3,2019,0-17 years,0,26
4,2019,18-64 years,0,36
5,2019,65+ years,83,27
6,2020,0-17 years,0,41
7,2020,18-64 years,0,33
8,2020,65+ years,187,39
9,2021,0-17 years,0,34


Cast year to int

In [58]:
summary["year"] = summary["year"].astype("int")
summary

Unnamed: 0,year,Age Category,Total_Deaths,Weeks_Observed
0,2018,0-17 years,0,31
1,2018,18-64 years,0,28
2,2018,65+ years,124,30
3,2019,0-17 years,0,26
4,2019,18-64 years,0,36
5,2019,65+ years,83,27
6,2020,0-17 years,0,41
7,2020,18-64 years,0,33
8,2020,65+ years,187,39
9,2021,0-17 years,0,34


Subset to relevant years (2018+)

In [63]:
summary=summary[summary["year"]>=2018]

In [64]:
summary

Unnamed: 0,year,Age Category,Total_Deaths,Weeks_Observed
0,2018,0-17 years,0,31
1,2018,18-64 years,0,28
2,2018,65+ years,124,30
3,2019,0-17 years,0,26
4,2019,18-64 years,0,36
5,2019,65+ years,83,27
6,2020,0-17 years,0,41
7,2020,18-64 years,0,33
8,2020,65+ years,187,39
9,2021,0-17 years,0,34


Generate tables

In [61]:
from pathlib import Path

In [67]:

out_dir = Path("tables_death_svg")
out_dir.mkdir(parents=True, exist_ok=True)



for age_cat, sub in summary.groupby("Age Category", sort=True):
    sub = sub.sort_values("year")[["year", "Total_Deaths", "Weeks_Observed"]]

    # Convert to strings for stable table rendering
    cell_text = [
        [str(int(y)), f"{rate:.1f}", str(int(w))]
        for y, rate, w in sub.itertuples(index=False, name=None)
    ]
    col_labels = ["year", "Total_Deaths", "Weeks_Observed"]

    # Figure sizing: scale height with number of rows
    nrows = len(cell_text) + 1
    fig_h = max(1.5, 0.35 * nrows)
    fig, ax = plt.subplots(figsize=(7.0, fig_h))
    ax.axis("off")

    tbl = ax.table(
        cellText=cell_text,
        colLabels=col_labels,
        loc="center",
        cellLoc="center",
        colLoc="center",
    )

    tbl.auto_set_font_size(False)
    tbl.set_fontsize(10)
    tbl.scale(1.0, 1.25)

    for (row, col), cell in tbl.get_celld().items():
        if row == 0:
            continue  # header
        year_val = sub.iloc[row - 1]["year"]
        if year_val == 2023:
            cell.set_text_props(weight="bold")

    ax.set_title(f"Annual RSV mortality — {age_cat}", pad=12)

    # Minimal filename cleanup inline
    fname_age = re.sub(r"[^\w\-]+", "_", age_cat).strip("_")
    fname = out_dir / f"annual_rsv_table_{fname_age}.svg"

    fig.savefig(fname, format="svg", bbox_inches="tight")
    plt.close(fig)

print(f"Wrote SVG tables to: {out_dir.resolve()}")

Wrote SVG tables to: /Users/mcnoon/rsv/tables_death_svg
