In [1]:
import math

import geopandas as gpd
import pandas as pd
import requests

Monitoring site documentation: https://docs.airnowapi.org/docs/MonitoringSiteFactSheet.pdf

In [2]:
monitoring_site_col_names = [
    "AQSID", "parameter_name", "site_code", "site_name", "status",
    "agency_id", "agency_name", "epa_region", "latitude", "longitude",
    "elevation", "gmt_offset", "country_code", "msa_code", "msa_name",
    "state_code", "state_name", "county_code", "county_name"
]

In [3]:
monitoring_sites = pd.read_csv(
    "../data/source/monitoring_site_locations.dat", 
    sep="|", 
    encoding="latin",
)
monitoring_sites.head()

Unnamed: 0,060670006,PM10,0006,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,Unnamed: 13,Unnamed: 14,40900,"Sacramento--Arden-Arcade--Roseville, CA",06,CA,067,SACRAMENTO,Unnamed: 21,Unnamed: 22
0,60670006,NO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
1,60670006,WD,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
2,60670006,SO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
3,60670006,BC,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
4,60670006,RHUM,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,


It's unclear why pandas added four extra empty columns, but we should get rid of them and add the correct column names. This code drops the first row, but it's not relevant for our purposes.

In [4]:
monitoring_sites = monitoring_sites.drop(
    columns=[
        "Unnamed: 13", "Unnamed: 14",
        "Unnamed: 21", "Unnamed: 22"
    ]
)
monitoring_sites.columns = monitoring_site_col_names
monitoring_sites.head()

Unnamed: 0,AQSID,parameter_name,site_code,site_name,status,agency_id,agency_name,epa_region,latitude,longitude,elevation,gmt_offset,country_code,msa_code,msa_name,state_code,state_name,county_code,county_name
0,60670006,NO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
1,60670006,WD,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
2,60670006,SO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
3,60670006,BC,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
4,60670006,RHUM,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO


Filter sites to states of interest and their neighbors:

In [5]:
midatlantic_monitoring_sites = monitoring_sites.loc[
    lambda df: (
        (
            df["state_name"]
            .isin(
                [
                    "MA", "CT", "VT", "NY", "NJ", 
                    "PA", "OH", "MD", "DE", "WV",
                ]
            )
        )
        & (df["parameter_name"] == "PM2.5")
    )
]

Read in hourly observations:

In [6]:
hourly_obs = pd.read_csv("../data/source/hourly_obs.csv", na_values="-999")
hourly_obs.head()

Unnamed: 0,site,site_id,0,1,2,3,4,5,6,7,...,14,15,16,17,18,19,20,21,22,23
0,CHARLOTTETOWN,20104,0.7,0.6,0.4,0.6,0.8,1.1,1.2,1.1,...,1.2,1.7,2.5,3.3,3.2,3.6,4.3,3.9,3.7,3.6
1,CHARLOTTETOWN,20104,G,G,G,G,G,G,G,G,...,G,G,G,G,G,G,G,G,G,G
2,WELLINGTON,20301,3.3,1.5,0.9,1.2,0.7,0.9,1.7,1.4,...,2.4,2.9,2.9,3.0,3.3,3.7,3.7,3.6,3.5,3.8
3,WELLINGTON,20301,G,G,G,G,G,G,G,G,...,G,G,G,G,G,G,G,G,G,G
4,SOUTHAMPTON,20401,1.6,1.3,1.0,0.9,0.9,1.5,1.5,1.5,...,2.4,4.0,4.7,5.8,7.3,7.4,7.0,6.4,6.7,5.6


Drop the non-data rows:

In [7]:
hourly_obs = hourly_obs.iloc[::2].copy()

Drop empty rows:

In [8]:
hourly_obs = hourly_obs.dropna()

Convert the observations to AQI values using the EPA's formula:
$$ I_{p} = \frac{I_{Hi} - I_{Lo}}{BP_{Hi} - BP_{Lo}}(C_{p} - BP_{Lo}) + I_{Lo}$$

Where:
- $ I_{p} $: The index for pollutant $ p $
- $ C_{p} $: The truncated concentration for pollutant $ p $
- $ BP_{Hi} $: The concentration breakpoint higher than $ C_{p} $
- $ BP_{Lo} $: The concentration breakpoint lower than $ C_{p} $
- $ I_{Hi} $: The index value corresponding to $ BP_{Hi} $
- $ I_{Lo} $: The index value corresponding to $ BP_{Lo} $

In the case of PM2.5, this gives us the following function:

In [9]:
def calc_aqi(concentration):
    if concentration == -999.0:  # missing data
        return -999
    elif concentration <= 12.0:
        aqi = (50 - 0)/(12.0 - 0) * (concentration - 0)
    elif concentration <= 35.4:
        aqi = (100 - 50)/(35.4 - 12.1) * (concentration - 12.1) + 50
    elif concentration <= 55.4:
        aqi = (150 - 100)/(55.4 - 35.5) * (concentration - 35.5) + 100
    elif concentration <= 150.4:
        aqi = (200 - 150)/(150.4 - 55.5) * (concentration - 55.5) + 150
    elif concentration <= 250.4:
        aqi = (300 - 200)/(250.4-150.5) * (concentration - 150.5) + 200
    elif concentration <= 500.4:
        aqi = (500 - 300)/(500.4 - 250.5) * (concentration - 250.5) + 300 
    else:
        aqi = 501  # It's off the charts!
    return int(aqi // 1)

In [10]:
hourly_obs.loc[:, '0':'23'] = hourly_obs.loc[:, '0':'23'].applymap(float).applymap(calc_aqi)

Calculate the peak observation at each site:

In [11]:
hourly_obs["peak"] = hourly_obs.loc[:, '0':'23'].T.max()

Calculate the average observation at each site:

In [12]:
hourly_obs["average"] = hourly_obs.loc[:, '0':'23'].T.mean()

In [13]:
hourly_obs.head()

Unnamed: 0,site,site_id,0,1,2,3,4,5,6,7,...,16,17,18,19,20,21,22,23,peak,average
0,CHARLOTTETOWN,20104,2,2,1,2,3,4,5,4,...,10,13,13,15,17,16,15,15,17,7.833333
2,WELLINGTON,20301,13,6,3,5,2,3,7,5,...,12,12,13,15,15,15,14,15,15,8.708333
4,SOUTHAMPTON,20401,6,5,4,3,3,6,6,6,...,19,24,30,30,29,26,27,23,30,12.833333
22,Longueuil,50119,66,60,57,45,29,31,54,56,...,15,14,16,18,16,13,11,13,66,29.5
24,Brossard - Parc Sor,50122,75,59,56,51,29,30,50,57,...,16,14,15,18,18,19,13,13,75,31.333333


Combine the lat/long data with the peak AQI data:

In [14]:
peak_site_data = (
    midatlantic_monitoring_sites
    .merge(
        hourly_obs,
        left_on="AQSID",
        right_on="site_id",
    )
    .set_index("site_id")
)[["site_name", "state_name", "county_name", "latitude", "longitude", "peak", "average"]]
peak_site_data.head()

Unnamed: 0_level_0,site_name,state_name,county_name,latitude,longitude,peak,average
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
390030009,Lima,OH,ALLEN,40.771,-84.0539,154,103.416667
250250042,Boston - Roxbury,MA,SUFFOLK,42.329399,-71.082497,88,50.416667
250095005,Haverhill,MA,ESSEX,42.7708,-71.1028,92,41.583333
420070014,Beaver Falls,PA,BEAVER,40.747799,-80.316704,153,101.083333
840250030008,PittsfieldES,MA,BERKSHIRE,42.452299,-73.239648,190,161.25


Load facility data:

In [15]:
facilities = gpd.read_file("../data/source/prison_boundaries.geojson")
facilities.head()

Unnamed: 0,FID,FACILITYID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,TELEPHONE,TYPE,...,CAPACITY,SHAPE_Leng,GlobalID,CreationDate,Creator,EditDate,Editor,SHAPE_Length,SHAPE_Area,geometry
0,1,10002798,MIDLAND COUNTY CENTRAL DETENTION CENTER,400 S MAIN ST,MIDLAND,TX,79701,-999,(432) 688-4745,COUNTY,...,498.0,0.007275,{86DE7B8A-37D4-4D99-984B-D70D2B8C474F},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.007275,2.579058e-06,"MULTIPOLYGON (((-102.07687 31.99381, -102.0744..."
1,2,10002796,BARBARA CULVER JUVENILE DETENTION CENTER,3800 N LAMESA RD,MIDLAND,TX,79701,-999,(432) 688-4541,COUNTY,...,24.0,0.004645,{1C1E4C11-3209-4E90-85C7-957AE58CEB3C},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.004645,9.897645e-07,"MULTIPOLYGON (((-102.07655 32.03859, -102.0762..."
2,3,10002836,SAN JACINTO COUNTY JAIL,75 W CEDAR AVE,COLDSPRING,TX,77331,-999,(936) 653-4367,COUNTY,...,144.0,0.002327,{0764FA0C-E415-4F31-95AB-751C895E6CCF},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.002327,3.075715e-07,"MULTIPOLYGON (((-95.12992 30.59128, -95.12955 ..."
3,4,10003033,YELLOW MEDICINE COUNTY JAIL,415 9TH AVENUE,GRANITE FALLS,MN,56241,-999,(320) 313-3049,COUNTY,...,36.0,0.002272,{9A789E0C-3A99-44D7-A17C-3F0C940DC400},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.002272,1.900107e-07,"MULTIPOLYGON (((-95.54458 44.80976, -95.54434 ..."
4,5,10006288,NMJC THIEF RIVER FALLS SATELLITE HOME,118 N. SPRUCE AVENUE,THIEF RIVER FALLS,MN,56701,-999,(218) 681-3484,LOCAL,...,5.0,0.000644,{A4CD4104-0496-45C8-B09B-FC49557098D0},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.000644,2.457488e-08,"MULTIPOLYGON (((-96.16240 48.11734, -96.16221 ..."


Filter to open facilities in our states of interest:

In [16]:
facilities_midatlantic = (
    facilities.loc[
        lambda df: (
            df["STATE"].isin(["NY", "NJ", "PA", "DE"])
            & df["STATUS"].str.match("OPEN")
        )
    ]
    .copy()
    .drop_duplicates()
)

Find the centroid of each facility (we can ignore this warning, because we're just trying to find the nearest monitoring site, and this won't really matter for that):

In [17]:
facilities_midatlantic["centroid"] = (
    facilities_midatlantic["geometry"]
    .centroid
)


  .centroid


In [18]:
def distance(point, lat, long):
    """Calculate distance using the Haversine formula."""
    lat_1 = math.radians(point.y)
    lat_2 = math.radians(lat)
    lon_1 = math.radians(point.x)
    lon_2 = math.radians(long)
    d_lat = lat_1 - lat_2
    d_lon = lon_1 - lon_2
    alpha = (
        math.sin(d_lat / 2)**2 
        + math.cos(lat_1) 
        * math.cos(lat_2) 
        * math.sin(d_lon / 2)**2
    )
    radius = 3958.8  # radius of earth in miles
    return (
        2
        * radius
        * math.atan2(math.sqrt(alpha), math.sqrt(1 - alpha))
    )

Find the nearest monitoring site for each facility (using a horrifically inefficient algorithm, but the data is small enough that we don't really mind):

In [19]:
def dist_to_nearest_site(point, site_data):
    return site_data.apply(
        lambda x: distance(point, x["latitude"], x["longitude"]), axis=1
    ).min()

In [20]:
def nearest_site(point, site_data):
    nearest_site_dist = dist_to_nearest_site(point, site_data)
    if nearest_site_dist > 25:
        return pd.NA
    return site_data.assign(
        dist=site_data.apply(
            lambda x: distance(
                point,
                x["latitude"],
                x["longitude"],
            ),
            axis=1
        )
    ).loc[lambda df: df["dist"] == df["dist"].min()].index[0]

In [21]:
facilities_midatlantic["nearest_site"] = (
    facilities_midatlantic["centroid"]
    .map(lambda x: nearest_site(x, peak_site_data.dropna()))
)

In [22]:
joined_data = (
    facilities_midatlantic
    .dropna()
    .merge(
        peak_site_data.reset_index(),
        left_on="nearest_site",
        right_on="site_id",
    )
    .rename(
        columns={
            "FACILITYID": "facility_id",
            "NAME": "facility_name",
            "ADDRESS": "facility_address",
            "CITY": "facility_city",
            "STATE": "facility_state",
            "ZIP": "facility_zip",
            "TYPE": "facility_type",
            "SECURELVL": "facility_security_level",
            "CAPACITY": "facility_capacity",
            "STATUS": "facility_status",
            "SOURCE": "facility_data_source",
            "CreationDate": "facility_data_source_date",
            "EditDate": "facility_data_edit_date",
            "site_name": "monitoring_site_name",
            "state_name": "monitoring_site_state",
            "county_name": "monitoring_site_county",
            "latitude": "monitoring_site_latitude",
            "longitude": "monitoring_site_longitude",
            "peak": "peak_pm25_aqi",
            "average": "mean_pm25_aqi",
        }
    )
    .drop_duplicates()
    .set_index("facility_id")
).assign(
    facility_latitude=lambda df: df["centroid"].map(lambda x: x.y),
    facility_longitude=lambda df: df["centroid"].map(lambda x: x.x),
)[
    [
        "facility_name", "facility_address", "facility_city",
        "facility_state", "facility_zip", "facility_latitude",
        "facility_longitude", "facility_type",
        "facility_security_level", "facility_capacity",
        "facility_status", "facility_data_source",
        "facility_data_source_date", "facility_data_edit_date", 
        "monitoring_site_name", "monitoring_site_state", 
        "monitoring_site_county", "monitoring_site_latitude",
        "monitoring_site_longitude", "peak_pm25_aqi", 
        "mean_pm25_aqi", "geometry",
    ]
]

Number of facilities above 200:

In [23]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 200
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10005221,WAYNE COUNTY CORRECTIONAL FACILITY,44 MID-WAYNE DR,HONESDALE,PA,18431,41.548698,-75.208834,COUNTY,NOT AVAILABLE,201.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.6231,385,263.666667,"MULTIPOLYGON (((-75.20832 41.54933, -75.20821 ..."
10005223,MINSEC OF SCRANTON,539 LINDEN ST,SCRANTON,PA,18503,41.408550,-75.661082,LOCAL,NOT AVAILABLE,30.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.6231,385,263.666667,"MULTIPOLYGON (((-75.66103 41.40874, -75.66086 ..."
10003049,LACKAWANNA COUNTY PRISON,1371 N WASHINGTON AVE,SCRANTON,PA,18509,41.422658,-75.648372,COUNTY,MAXIMUM,1183.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.6231,385,263.666667,"MULTIPOLYGON (((-75.64832 41.42327, -75.64821 ..."
10003059,PA CHILD CARE,701 SATHERS DR,PITTSTON TOWNSHIP,PA,18640,41.312296,-75.725906,NOT AVAILABLE,JUVENILE,60.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.6231,385,263.666667,"MULTIPOLYGON (((-75.72628 41.31262, -75.72618 ..."
10003047,SCI WAYMART,11 FAIRVIEW DR,WAYMART,PA,18472,41.574452,-75.429367,STATE,MAXIMUM,1522.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.6231,385,263.666667,"MULTIPOLYGON (((-75.43174 41.57536, -75.43040 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10003117,SCI CHESTER,500 EAST FOURTH ST,CHESTER,PA,19013,39.851245,-75.351338,STATE,MEDIUM,1178.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Chester,PA,DELAWARE,39.835600,-75.3725,438,238.208333,"MULTIPOLYGON (((-75.35024 39.85255, -75.35016 ..."
10000708,SUSSEX VIOLATION OF PROBATION CENTER,23207 DUPONT BLVD,GEORGETOWN,DE,19947,38.657549,-75.368626,STATE,NOT AVAILABLE,250.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,SEAFORD,DE,SUSSEX,38.653900,-75.6106,223,186.416667,"MULTIPOLYGON (((-75.36911 38.65798, -75.36793 ..."
10000706,SUSSEX COMMUNITY CORRECTIONS CENTER,23207 DUPONT BLVD,GEORGETOWN,DE,19947,38.658484,-75.371968,STATE,NOT AVAILABLE,248.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,SEAFORD,DE,SUSSEX,38.653900,-75.6106,223,186.416667,"MULTIPOLYGON (((-75.37244 38.65862, -75.37211 ..."
10000707,SUSSEX CORRECTIONAL INSTITUTION,23203 DUPONT BLVD,GEORGETOWN,DE,19947,38.660047,-75.370246,STATE,MAXIMUM,1206.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,SEAFORD,DE,SUSSEX,38.653900,-75.6106,223,186.416667,"MULTIPOLYGON (((-75.37251 38.66097, -75.36791 ..."


Number of facilities above 300:

In [24]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 300
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10005221,WAYNE COUNTY CORRECTIONAL FACILITY,44 MID-WAYNE DR,HONESDALE,PA,18431,41.548698,-75.208834,COUNTY,NOT AVAILABLE,201.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,385,263.666667,"MULTIPOLYGON (((-75.20832 41.54933, -75.20821 ..."
10005223,MINSEC OF SCRANTON,539 LINDEN ST,SCRANTON,PA,18503,41.408550,-75.661082,LOCAL,NOT AVAILABLE,30.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,385,263.666667,"MULTIPOLYGON (((-75.66103 41.40874, -75.66086 ..."
10003049,LACKAWANNA COUNTY PRISON,1371 N WASHINGTON AVE,SCRANTON,PA,18509,41.422658,-75.648372,COUNTY,MAXIMUM,1183.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,385,263.666667,"MULTIPOLYGON (((-75.64832 41.42327, -75.64821 ..."
10003059,PA CHILD CARE,701 SATHERS DR,PITTSTON TOWNSHIP,PA,18640,41.312296,-75.725906,NOT AVAILABLE,JUVENILE,60.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,385,263.666667,"MULTIPOLYGON (((-75.72628 41.31262, -75.72618 ..."
10003047,SCI WAYMART,11 FAIRVIEW DR,WAYMART,PA,18472,41.574452,-75.429367,STATE,MAXIMUM,1522.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,385,263.666667,"MULTIPOLYGON (((-75.43174 41.57536, -75.43040 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10000733,SCI CAMP HILL,2500 LISBURN RD.,CAMP HILL,PA,17001,40.217967,-76.927382,STATE,NOT AVAILABLE,3272.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Harrisburg,PA,DAUPHIN,40.246992,-76.846988,387,207.750000,"MULTIPOLYGON (((-76.93098 40.21783, -76.92761 ..."
10003080,LEHIGH COUNTY PRISON,38 N 4TH ST,ALLENTOWN,PA,18102,40.604898,-75.466403,COUNTY,MAXIMUM,1270.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Allentown,PA,LEHIGH,40.611900,-75.432500,449,283.041667,"MULTIPOLYGON (((-75.46685 40.60510, -75.46621 ..."
10003081,LEHIGH COUNTY COMMUNITY CORRECTIONS CENTER,1600 RIVERSIDE DR,BETHLEHEM,PA,18016,40.612469,-75.412656,COUNTY,MINIMUM,400.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Allentown,PA,LEHIGH,40.611900,-75.432500,449,283.041667,"MULTIPOLYGON (((-75.41347 40.61299, -75.41285 ..."
10003119,DELAWARE COUNTY JUVENILE DETENTION CENTER,370 NORTH MIDDLETOWN RD,LIMA,PA,19037,39.921556,-75.444577,COUNTY,JUVENILE,66.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Chester,PA,DELAWARE,39.835600,-75.372500,438,238.208333,"MULTIPOLYGON (((-75.44538 39.92210, -75.44535 ..."


Number of sites above 400:

In [25]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 400
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10006642,ABRAXAS ACADEMY,1000 ACADEMY DRIVE,MORGANTOWN,PA,19543,40.191808,-75.916953,STATE,JUVENILE,156.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.383300,-75.968600,434,251.583333,"MULTIPOLYGON (((-75.91828 40.19098, -75.91822 ..."
10002270,BERKS COUNTY JAIL SYSTEM,1287 COUNTY WELFARE RD,LEESPORT,PA,19533,40.385264,-76.020695,COUNTY,NOT AVAILABLE,1546.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.383300,-75.968600,434,251.583333,"MULTIPOLYGON (((-76.02199 40.38624, -76.02124 ..."
10002271,BERKS COUNTY COMMUNITY REENTRY CENTER,1261 COUNTY WELFARE RD,LEESPORT,PA,19533,40.382898,-76.017713,COUNTY,NOT AVAILABLE,-999.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.383300,-75.968600,434,251.583333,"MULTIPOLYGON (((-76.01828 40.38317, -76.01798 ..."
10006847,BERKS COUNTY RESIDENTIAL CENTER,1040 BERKS ROAD,LEESPORT,PA,19533,40.379468,-76.019540,COUNTY,NOT AVAILABLE,-999.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.383300,-75.968600,434,251.583333,"MULTIPOLYGON (((-76.02031 40.37891, -76.02056 ..."
10000725,SCHUYLKILL COUNTY PRISON,230 SANDERSON ST,POTTSVILLE,PA,17901,40.688517,-76.199253,COUNTY,NOT AVAILABLE,277.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.383300,-75.968600,434,251.583333,"MULTIPOLYGON (((-76.19988 40.68868, -76.19886 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10006387,HAZEL D. PLANT WOMENS TREATMENT FACILITY,620 BAYLOR BOULEVARD,NEW CASTLE,DE,19720,39.702451,-75.582935,STATE,NOT AVAILABLE,96.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,403,225.000000,"MULTIPOLYGON (((-75.58326 39.70271, -75.58313 ..."
10003080,LEHIGH COUNTY PRISON,38 N 4TH ST,ALLENTOWN,PA,18102,40.604898,-75.466403,COUNTY,MAXIMUM,1270.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Allentown,PA,LEHIGH,40.611900,-75.432500,449,283.041667,"MULTIPOLYGON (((-75.46685 40.60510, -75.46621 ..."
10003081,LEHIGH COUNTY COMMUNITY CORRECTIONS CENTER,1600 RIVERSIDE DR,BETHLEHEM,PA,18016,40.612469,-75.412656,COUNTY,MINIMUM,400.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Allentown,PA,LEHIGH,40.611900,-75.432500,449,283.041667,"MULTIPOLYGON (((-75.41347 40.61299, -75.41285 ..."
10003119,DELAWARE COUNTY JUVENILE DETENTION CENTER,370 NORTH MIDDLETOWN RD,LIMA,PA,19037,39.921556,-75.444577,COUNTY,JUVENILE,66.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Chester,PA,DELAWARE,39.835600,-75.372500,438,238.208333,"MULTIPOLYGON (((-75.44538 39.92210, -75.44535 ..."


Number of sites above 500:

In [26]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 500
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10003113,ALTERNATIVE & SPECIAL DETENTION CENTER(ASD),8101 STATE RD,PHILADELPHIA,PA,19136,40.029802,-75.015976,LOCAL,MINIMUM,860.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-75.01688 40.03051, -75.01692 ..."
10003114,PHILADELPHIA DETENTION CENTER,8201 STATE RD,PHILADELPHIA,PA,19136,40.033064,-75.015325,LOCAL,MINIMUM,1677.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-75.01505 40.03410, -75.01506 ..."
10004005,ALBERT C WAGNER YOUTH CORRECTIONAL FACILITY,500 WARD AVE,CHESTERFIELD,NJ,8515,40.159583,-74.669523,STATE,JUVENILE,1032.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.67112 40.16118, -74.67039 ..."
10004002,CRAF JONES FARM,721 BEAR TAVERN RD,TRENTON,NJ,8628,40.274645,-74.829314,STATE,MINIMUM,-999.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.83027 40.27456, -74.82893 ..."
10004004,GARDEN STATE YOUTH CORRECTIONAL FACILITY,55 HOGBACK RD,CROSSWICKS,NJ,8515,40.159168,-74.676344,STATE,JUVENILE,1168.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.67780 40.16136, -74.67763 ..."
10001516,MID-STATE CORRECTIONAL FACILITY,866 RANGE RD,WRIGHTSTOWN,NJ,8562,39.998564,-74.583393,STATE,MEDIUM,696.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.58353 39.99979, -74.58210 ..."
10004003,NEW JERSEY STATE PRISON,600 CASS ST,TRENTON,NJ,8608,40.207273,-74.756369,STATE,MAXIMUM,1819.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.75761 40.20849, -74.75753 ..."
10004006,JOHNSTONE JUVENILE MEDIUM SECURITY FACILITY,"BURLINGTON ST, P.O. BOX 307",BORDENTOWN,NJ,8505,40.136752,-74.722248,STATE,JUVENILE,262.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.72350 40.13794, -74.72309 ..."
10004007,JOHNSTONE JUVENILE FEMALE SECURE CARE AND INTA...,"BURLINGTON ST, P.O. BOX 367",BORDENTOWN,NJ,8505,40.1429,-74.721582,STATE,JUVENILE,48.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.72421 40.14374, -74.72349 ..."
10006598,ANN KLEIN FORENSIC CENTER,STUYVESANT AVENUE,WEST TRENTON,NJ,8628,40.247051,-74.801885,STATE,NOT AVAILABLE,200.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,501,278.125,"MULTIPOLYGON (((-74.80296 40.24742, -74.80299 ..."


Write to files:

In [27]:
joined_data.loc[:, :"mean_pm25_aqi"].to_csv("../data/processed/pm25_aqi_by_facility.csv")

In [28]:
joined_data.to_file("../data/processed/pm25_aqi_by_facility.geojson")