In [1]:
"""Total PM2.5 time series plots."""

import pandas as pd
import seaborn as sns

In [2]:
df = pd.read_parquet("data/total + wildfire pm and demographic data 11-22-2023.parquet")
sns.set_context("paper", font_scale=1.5, rc={"figure.dpi": 300})

In [4]:
# calculate population weighted mean concentration by race for the CONUS and by region

race_ethnicities = [
    "Hispanic",
    "NH White",
    "NH Black",
    "NH American Indian and Alaska Native",
    "NH Asian",
    "NH Native Hawaiian and Other Pacific Islander",
    "NH Other",
    "Total Population",
]

years = [str(x) for x in range(2007, 2019)]
datasets = ["PM_nofire", "PM_wf", "wfpm25_childs", "PM_total"]

dflist = []
i = 0

for race_ethnicity in race_ethnicities:
    for year in years:
        for dataset in datasets:
            try:
                pop_weighted_mean = (
                    df[f"{dataset}_{year}"] * df[race_ethnicity]
                ).sum() / df[race_ethnicity].sum()

                dft = pd.DataFrame(
                    {
                        "EPA Region": "CONUS",
                        "Race/ethnicity": race_ethnicity,
                        "Year": year,
                        "Dataset": dataset,
                        "Pop-weighted mean": pop_weighted_mean,
                    },
                    index=[i],
                )
                dflist.append(dft)
                i += 1
            except:
                pass

df_pop_weighted = pd.concat(dflist)

In [5]:
df_pop_weighted["Region sort"] = df_pop_weighted["EPA Region"].map(
    {
        "CONUS": 0,
        "Region 1": 1,
        "Region 2": 2,
        "Region 3": 3,
        "Region 4": 4,
        "Region 5": 5,
        "Region 6": 6,
        "Region 7": 7,
        "Region 8": 8,
        "Region 9": 9,
        "Region 10": 10,
    }
)

df_pop_weighted["Year"] = df_pop_weighted["Year"].astype(int)

df_pop_weighted["Dataset"] = df_pop_weighted["Dataset"].replace(
    {
        "wfpm25_childs": "Wildfire PM2.5 (Childs et al.)",
        "PM_nofire": "Non-fire",
        "PM_wf": "Fire",
        "PM_total": "Total",
    }
)

In [None]:
for i, race_ethnicity in enumerate(["Total Population"]):
    df_test = (
        df_pop_weighted.loc[
            (df_pop_weighted["Race/ethnicity"] == race_ethnicity)
            & (df_pop_weighted["EPA Region"] == "CONUS")
            & ~df_pop_weighted["Dataset"].isin(
                ["Total", "Wildfire PM2.5 (Childs et al.)"]
            )
        ]
        .set_index(["Year", "Dataset"])
        .unstack("Dataset")["Pop-weighted mean"]
    )
df_test.head()

In [None]:
# create a stacked bar plot for each racial/ethnic group
import matplotlib.pyplot as plt

sns.set_theme("notebook", "ticks")

fig, ax = plt.subplots(1, 1, figsize=(8, 4.5), sharex=True, sharey=True)

for i, race_ethnicity in enumerate(["Total Population"]):
    df_pop_weighted.loc[
        (df_pop_weighted["Race/ethnicity"] == race_ethnicity)
        & (df_pop_weighted["EPA Region"] == "CONUS")
        & ~df_pop_weighted["Dataset"].isin(["Total", "Wildfire PM2.5 (Childs et al.)"])
    ].set_index(["Year", "Dataset"]).unstack("Dataset")["Pop-weighted mean"][
        ["Non-fire", "Fire"]
    ].plot(
        ax=ax,
        kind="bar",
        stacked=True,
        title="",
        color=["gray", "darkgreen"],
        legend=False,
    )

    # set ylim
    ax.set_ylim(6.8, 12.2)

    # set y axis label with subscript
    ax.set_ylabel("Pop. weighted mean PM$_{2.5}$ (μg/m³)", fontsize=14)


# set shared legend for whole plot
ax.legend(
    bbox_to_anchor=(0.5, -0.2),
    loc="upper center",
    ncol=2,
    title="",
    frameon=False,
    fontsize=12,
)

ax.tick_params(labelrotation=45)
ax.set_xlabel("")

# remove top and right axis frames
sns.despine()


fig.tight_layout()

# save figure
fig.savefig(
    "figures/2006-2020 population-weighted means (total).png",
    dpi=300,
    bbox_inches="tight",
)

In [None]:
# number of people living in areas with total (wildfire + non-fire) pm2.5 > 8 ug/m3

print(df.loc[df["PM_total_2017"] > 8]["Total Population"].sum())

# number of people living in areas with non-fire PM2.5 > 8 ug/m3
print(df.loc[df["PM_nofire_2017"] > 8]["Total Population"].sum())

In [None]:
df_total_pop = (
    df_pop_weighted.loc[
        (df_pop_weighted["Race/ethnicity"] == "Total Population")
        & (df_pop_weighted["EPA Region"] == "CONUS")
        & (df_pop_weighted["Dataset"] != "Wildfire PM2.5 (Childs et al.)")
    ]
    .set_index(["Year", "Dataset"])
    .unstack("Dataset")["Pop-weighted mean"]
)
df_total_pop

In [None]:
df_total_pop["% Fire"] = df_total_pop["Fire"] / df_total_pop["Total"] * 100
df_total_pop

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4.5), sharex=True, sharey=True)

# plot time series using seaborn of % fire
fig = sns.lineplot(
    data=df_total_pop,
    ax=ax,
    x="Year",
    y="% Fire",
    markers=True,
)
# remove top and right axis frames
sns.despine()
ax.tick_params(labelrotation=45)
ax.set_xlabel("")

# set tick location to every year
ax.xaxis.set_major_locator(plt.MaxNLocator(14))

In [None]:
# calculate percent difference in total pop weighted pm2.5 from 2007 to 2018
pm2007 = df_total_pop.loc[2007, "Total"]
pm2018 = df_total_pop.loc[2018, "Total"]

percent_difference = ((pm2018 - pm2007) / pm2007) * 100
percent_difference

In [None]:
# calculate percent difference in non-fire pop weighted pm2.5 from 2007 to 2018
pm2007 = df_total_pop.loc[2007, "Non-fire"]
pm2018 = df_total_pop.loc[2018, "Non-fire"]

percent_difference = ((pm2018 - pm2007) / pm2007) * 100
percent_difference