# Visualizing harmful PM<sub>2.5</sub> levels in the US by county

In [None]:
# %pip install pandas geopandas folium matplotlib mapclassify

In [None]:
import pandas as pd
import geopandas as gpd

In [None]:
# download the data from US NIH (https://hdpulse.nimhd.nih.gov/data-portal/physical/table?age=001&age_options=ageall_1&demo=234&demo_options=air_pollution_1&physicaltopic=002&physicaltopic_options=physical_2&race=00&race_options=raceall_1&sex=0&sex_options=sexboth_1&statefips=99&statefips_options=area_states)

county_pm25: pd.DataFrame = pd.read_csv(
    "HDPulse_data_export.csv",
    skiprows=5,
)

In [None]:
county_pm25

In [None]:
county_pm25_processed: pd.DataFrame = (
    county_pm25.assign(
        # make PM2.5 reading a float
        pm25_ug_per_m3=lambda x: pd.to_numeric(arg=x[x.keys()[-1]], errors="coerce"),
        # convert FIPS to a 5-digit string
        FIPS=lambda x: pd.to_numeric(x["FIPS"]),
    )
    .dropna(
        # drop rows with missing PM2.5 readings
        subset=[
            "FIPS",
            "pm25_ug_per_m3",
        ],
    )
    .assign(
        FIPS=lambda x: x["FIPS"].astype(int).astype(str).str.zfill(5),
    )
)

In [None]:
# optional sense check
county_pm25_processed

In [None]:
# download us county shape files from https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html

counties: gpd.GeoDataFrame = gpd.read_file(
    "cb_2017_us_county_500k",
)

In [None]:
counties_processed: gpd.GeoDataFrame = counties.assign(
    FIPS=lambda x: x["STATEFP"] + x["COUNTYFP"],
)

In [None]:
# optional sense check
counties_processed

In [None]:
# merge the two dataframes
counties_w_pm25 = counties_processed.merge(
    right=county_pm25_processed,
    on="FIPS",
    how="left",
)

In [None]:
# optional sense check
counties_w_pm25

In [None]:
# display the data on map by level of PM2.5
counties_w_pm25.explore(
    column="pm25_ug_per_m3",
)