In [1]:
import math

import geopandas as gpd
import pandas as pd
import requests

Monitoring site documentation: https://docs.airnowapi.org/docs/MonitoringSiteFactSheet.pdf

In [2]:
monitoring_site_col_names = [
    "AQSID", "parameter_name", "site_code", "site_name", "status",
    "agency_id", "agency_name", "epa_region", "latitude", "longitude",
    "elevation", "gmt_offset", "country_code", "msa_code", "msa_name",
    "state_code", "state_name", "county_code", "county_name"
]

In [3]:
monitoring_sites = pd.read_csv(
    "../data/source/monitoring_site_locations.dat", 
    sep="|", 
    encoding="latin",
)
monitoring_sites.head()

Unnamed: 0,060670006,PM10,0006,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,Unnamed: 13,Unnamed: 14,40900,"Sacramento--Arden-Arcade--Roseville, CA",06,CA,067,SACRAMENTO,Unnamed: 21,Unnamed: 22
0,60670006,NO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
1,60670006,WD,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
2,60670006,SO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
3,60670006,BC,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,
4,60670006,RHUM,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,...,,,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO,,


It's unclear why pandas added four extra empty columns, but we should get rid of them and add the correct column names. This code drops the first row, but it's not relevant for our purposes.

In [4]:
monitoring_sites = monitoring_sites.drop(
    columns=[
        "Unnamed: 13", "Unnamed: 14",
        "Unnamed: 21", "Unnamed: 22"
    ]
)
monitoring_sites.columns = monitoring_site_col_names
monitoring_sites.head()

Unnamed: 0,AQSID,parameter_name,site_code,site_name,status,agency_id,agency_name,epa_region,latitude,longitude,elevation,gmt_offset,country_code,msa_code,msa_name,state_code,state_name,county_code,county_name
0,60670006,NO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
1,60670006,WD,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
2,60670006,SO2,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
3,60670006,BC,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO
4,60670006,RHUM,6,Arden Arcade - Del Paso Manor,Active,CA1,Sacramento Metro. AQMD,R9,38.613804,-121.368007,25.3,-8.0,US,40900.0,"Sacramento--Arden-Arcade--Roseville, CA",6,CA,67,SACRAMENTO


Filter sites to states of interest and their neighbors:

In [5]:
monitoring_sites.loc[
    lambda df: (
        (
            df["state_name"]
            .isin(
                [
                    "MA", "CT", "VT", "NY", "NJ", 
                    "PA", "OH", "MD", "DE", "WV",
                ]
            )
        )
        & (df["parameter_name"] == "PM2.5")
    )
]

Unnamed: 0,AQSID,parameter_name,site_code,site_name,status,agency_id,agency_name,epa_region,latitude,longitude,elevation,gmt_offset,country_code,msa_code,msa_name,state_code,state_name,county_code,county_name
14,360850111,PM2.5,0111,Fresh Kills,Active,NY1,New York Dept. of Environmental Conservation,R2,40.580200,-74.199400,0.0,-5.0,US,35620.0,"New York-Northern New Jersey-Long Island, NY-...",36,NY,085,RICHMOND
21,390030009,PM2.5,0009,Lima,Active,OH1,Ohio EPA-DAPC,R5,40.771000,-84.053900,275.1,-5.0,US,30620.0,"Lima, OH",39,OH,003,ALLEN
385,250250042,PM2.5,0042,Boston - Roxbury,Active,MA1,Massachusetts Dept. of Environmental Protection,R1,42.329399,-71.082497,0.0,-5.0,US,14460.0,"Boston-Cambridge-Quincy, MA-NH",25,MA,025,SUFFOLK
445,250095005,PM2.5,5005,Haverhill,Active,MA1,Massachusetts Dept. of Environmental Protection,R1,42.770800,-71.102800,0.0,-5.0,US,14460.0,"Boston-Cambridge-Quincy, MA-NH",25,MA,009,ESSEX
494,420070014,PM2.5,0014,Beaver Falls,Active,PA1,Pennsylvania Dept. of Environmental Protection,R3,40.747799,-80.316704,0.0,-5.0,US,38300.0,"Pittsburgh, PA",42,PA,007,BEAVER
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18521,840340292002,PM2.5,2002,Toms River,Active,NJ1,New Jersey Dept. of Environmental Protection,R2,39.994908,-74.170447,8.8,-5.0,US,35620.0,"New York-Northern New Jersey-Long Island, NY-...",34,NJ,029,OCEAN
19105,420150011,PM2.5,0011,Towanda,Active,PA1,Pennsylvania Dept. of Environmental Protection,R3,41.705226,-76.512726,340.1,-5.0,US,,,42,PA,015,BRADFORD
19128,360551007,PM2.5,1007,Rochester,Active,NY1,New York Dept. of Environmental Conservation,R2,43.146100,-77.548100,0.0,-5.0,US,40380.0,"Rochester, NY",36,NY,055,MONROE
19140,360671015,PM2.5,1015,E Syracuse,Active,NY1,New York Dept. of Environmental Conservation,R2,43.052800,-76.059700,0.0,-5.0,US,45060.0,"Syracuse, NY",36,NY,067,ONONDAGA


Read in hourly observations:

In [6]:
hourly_obs = pd.read_csv("../data/source/hourly_obs.csv", na_values="-999")
hourly_obs.head()

Unnamed: 0,site,site_id,0,1,2,3,4,5,6,7,...,14,15,16,17,18,19,20,21,22,23
0,CHARLOTTETOWN,20104,0.7,0.6,0.4,0.6,0.8,1.1,1.2,1.1,...,1.2,1.7,2.5,3.3,3.2,3.6,4.3,3.9,3.7,3.6
1,CHARLOTTETOWN,20104,G,G,G,G,G,G,G,G,...,G,G,G,G,G,G,G,G,G,G
2,WELLINGTON,20301,3.3,1.5,0.9,1.2,0.7,0.9,1.7,1.4,...,2.4,2.9,2.9,3.0,3.3,3.7,3.7,3.6,3.5,3.8
3,WELLINGTON,20301,G,G,G,G,G,G,G,G,...,G,G,G,G,G,G,G,G,G,G
4,SOUTHAMPTON,20401,1.6,1.3,1.0,0.9,0.9,1.5,1.5,1.5,...,2.4,4.0,4.7,5.8,7.3,7.4,7.0,6.4,6.7,5.6


Drop the non-data rows:

In [7]:
hourly_obs = hourly_obs.iloc[::2]

Calculate the peak observation at each site:

In [8]:
hourly_obs["peak"] = hourly_obs.loc[:, '0':'23'].T.applymap(float).max()

Calculate the average observation at each site:

In [9]:
hourly_obs["average"] = hourly_obs.loc[:, '0':'23'].T.applymap(float).mean()

In [10]:
hourly_obs.head()

Unnamed: 0,site,site_id,0,1,2,3,4,5,6,7,...,16,17,18,19,20,21,22,23,peak,average
0,CHARLOTTETOWN,20104,0.7,0.6,0.4,0.6,0.8,1.1,1.2,1.1,...,2.5,3.3,3.2,3.6,4.3,3.9,3.7,3.6,4.3,1.970833
2,WELLINGTON,20301,3.3,1.5,0.9,1.2,0.7,0.9,1.7,1.4,...,2.9,3.0,3.3,3.7,3.7,3.6,3.5,3.8,3.8,2.2
4,SOUTHAMPTON,20401,1.6,1.3,1.0,0.9,0.9,1.5,1.5,1.5,...,4.7,5.8,7.3,7.4,7.0,6.4,6.7,5.6,7.4,3.2
6,JOHNSTON BUILDING -,30113,,,,,,,,,...,,,,,,,,,,
8,Lake Major,30120,3.2,3.0,2.9,3.6,3.2,2.5,1.6,2.5,...,,,,9.3,9.9,9.4,10.0,10.3,10.3,5.23


Combine the lat/long data with the peak AQI data:

In [11]:
peak_site_data = (
    monitoring_sites
    .merge(
        hourly_obs,
        left_on="AQSID",
        right_on="site_id",
    )
    .set_index("site_id")
)[["site_name", "state_name", "county_name", "latitude", "longitude", "peak", "average"]]
peak_site_data.head()

Unnamed: 0_level_0,site_name,state_name,county_name,latitude,longitude,peak,average
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
60670006,Arden Arcade - Del Paso Manor,CA,SACRAMENTO,38.613804,-121.368007,7.0,4.666667
60670006,Arden Arcade - Del Paso Manor,CA,SACRAMENTO,38.613804,-121.368007,7.0,4.666667
60670006,Arden Arcade - Del Paso Manor,CA,SACRAMENTO,38.613804,-121.368007,7.0,4.666667
60670006,Arden Arcade - Del Paso Manor,CA,SACRAMENTO,38.613804,-121.368007,7.0,4.666667
60670006,Arden Arcade - Del Paso Manor,CA,SACRAMENTO,38.613804,-121.368007,7.0,4.666667


Load facility data:

In [12]:
facilities = gpd.read_file("../data/source/prison_boundaries.geojson")
facilities.head()

Unnamed: 0,FID,FACILITYID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,TELEPHONE,TYPE,...,CAPACITY,SHAPE_Leng,GlobalID,CreationDate,Creator,EditDate,Editor,SHAPE_Length,SHAPE_Area,geometry
0,1,10002798,MIDLAND COUNTY CENTRAL DETENTION CENTER,400 S MAIN ST,MIDLAND,TX,79701,-999,(432) 688-4745,COUNTY,...,498.0,0.007275,{86DE7B8A-37D4-4D99-984B-D70D2B8C474F},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.007275,2.579058e-06,"MULTIPOLYGON (((-102.07687 31.99381, -102.0744..."
1,2,10002796,BARBARA CULVER JUVENILE DETENTION CENTER,3800 N LAMESA RD,MIDLAND,TX,79701,-999,(432) 688-4541,COUNTY,...,24.0,0.004645,{1C1E4C11-3209-4E90-85C7-957AE58CEB3C},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.004645,9.897645e-07,"MULTIPOLYGON (((-102.07655 32.03859, -102.0762..."
2,3,10002836,SAN JACINTO COUNTY JAIL,75 W CEDAR AVE,COLDSPRING,TX,77331,-999,(936) 653-4367,COUNTY,...,144.0,0.002327,{0764FA0C-E415-4F31-95AB-751C895E6CCF},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.002327,3.075715e-07,"MULTIPOLYGON (((-95.12992 30.59128, -95.12955 ..."
3,4,10003033,YELLOW MEDICINE COUNTY JAIL,415 9TH AVENUE,GRANITE FALLS,MN,56241,-999,(320) 313-3049,COUNTY,...,36.0,0.002272,{9A789E0C-3A99-44D7-A17C-3F0C940DC400},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.002272,1.900107e-07,"MULTIPOLYGON (((-95.54458 44.80976, -95.54434 ..."
4,5,10006288,NMJC THIEF RIVER FALLS SATELLITE HOME,118 N. SPRUCE AVENUE,THIEF RIVER FALLS,MN,56701,-999,(218) 681-3484,LOCAL,...,5.0,0.000644,{A4CD4104-0496-45C8-B09B-FC49557098D0},2022-01-07 15:27:25+00:00,HostedByHIFLD,2022-01-07 15:27:25+00:00,HostedByHIFLD,0.000644,2.457488e-08,"MULTIPOLYGON (((-96.16240 48.11734, -96.16221 ..."


Filter to open facilities in our states of interest:

In [13]:
facilities_midatlantic = (
    facilities.loc[
        lambda df: (
            df["STATE"].isin(["NY", "NJ", "PA", "DE"])
            & df["STATUS"].str.match("OPEN")
        )
    ]
    .copy()
    .drop_duplicates()
)

Find the centroid of each facility (we can ignore this warning, because we're just trying to find the nearest monitoring site, and this won't really matter for that):

In [14]:
facilities_midatlantic["centroid"] = (
    facilities_midatlantic["geometry"]
    .centroid
)


  .centroid


In [15]:
def distance(point, lat, long):
    """Calculate distance using the Haversine formula."""
    lat_1 = math.radians(point.y)
    lat_2 = math.radians(lat)
    lon_1 = math.radians(point.x)
    lon_2 = math.radians(long)
    d_lat = lat_1 - lat_2
    d_lon = lon_1 - lon_2
    alpha = (
        math.sin(d_lat / 2)**2 
        + math.cos(lat_1) 
        * math.cos(lat_2) 
        * math.sin(d_lon / 2)**2
    )
    radius = 3958.8  # radius of earth in miles
    return (
        2
        * radius
        * math.atan2(math.sqrt(alpha), math.sqrt(1 - alpha))
    )

Find the nearest monitoring site for each facility (using a horrifically inefficient algorithm, but the data is small enough that we don't really mind):

In [16]:
def nearest_site(point, site_data):
    return site_data.assign(
        dist=site_data.apply(
            lambda x: distance(
                point,
                x["latitude"],
                x["longitude"],
            ),
            axis=1
        )
    ).loc[lambda df: df["dist"] == df["dist"].min()].index[0]

In [17]:
facilities_midatlantic["nearest_site"] = (
    facilities_midatlantic["centroid"]
    .map(lambda x: nearest_site(x, peak_site_data.dropna()))
)

In [18]:
joined_data = (
    facilities_midatlantic
    .merge(
        peak_site_data.reset_index(),
        left_on="nearest_site",
        right_on="site_id",
    )
    .rename(
        columns={
            "FACILITYID": "facility_id",
            "NAME": "facility_name",
            "ADDRESS": "facility_address",
            "CITY": "facility_city",
            "STATE": "facility_state",
            "ZIP": "facility_zip",
            "TYPE": "facility_type",
            "SECURELVL": "facility_security_level",
            "CAPACITY": "facility_capacity",
            "STATUS": "facility_status",
            "SOURCE": "facility_data_source",
            "CreationDate": "facility_data_source_date",
            "EditDate": "facility_data_edit_date",
            "site_name": "monitoring_site_name",
            "state_name": "monitoring_site_state",
            "county_name": "monitoring_site_county",
            "latitude": "monitoring_site_latitude",
            "longitude": "monitoring_site_longitude",
            "peak": "peak_pm25_aqi",
            "average": "mean_pm25_aqi",
        }
    )
    .drop_duplicates()
    .set_index("facility_id")
).assign(
    facility_latitude=lambda df: df["centroid"].map(lambda x: x.y),
    facility_longitude=lambda df: df["centroid"].map(lambda x: x.x),
)[
    [
        "facility_name", "facility_address", "facility_city",
        "facility_state", "facility_zip", "facility_latitude",
        "facility_longitude", "facility_type",
        "facility_security_level", "facility_capacity",
        "facility_status", "facility_data_source_date",
        "facility_data_edit_date", "monitoring_site_name",
        "monitoring_site_state", "monitoring_site_county",
        "monitoring_site_latitude", "monitoring_site_longitude",
        "peak_pm25_aqi", "mean_pm25_aqi", "geometry",
    ]
]

Number of facilities above 200:

In [19]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 200
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10005221,WAYNE COUNTY CORRECTIONAL FACILITY,44 MID-WAYNE DR,HONESDALE,PA,18431,41.548698,-75.208834,COUNTY,NOT AVAILABLE,201.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.20832 41.54933, -75.20821 ..."
10005223,MINSEC OF SCRANTON,539 LINDEN ST,SCRANTON,PA,18503,41.408550,-75.661082,LOCAL,NOT AVAILABLE,30.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.66103 41.40874, -75.66086 ..."
10003049,LACKAWANNA COUNTY PRISON,1371 N WASHINGTON AVE,SCRANTON,PA,18509,41.422658,-75.648372,COUNTY,MAXIMUM,1183.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.64832 41.42327, -75.64821 ..."
10003059,PA CHILD CARE,701 SATHERS DR,PITTSTON TOWNSHIP,PA,18640,41.312296,-75.725906,NOT AVAILABLE,JUVENILE,60.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.72628 41.31262, -75.72618 ..."
10003047,SCI WAYMART,11 FAIRVIEW DR,WAYMART,PA,18472,41.574452,-75.429367,STATE,MAXIMUM,1522.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.43174 41.57536, -75.43040 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10000702,DIVISION OF YOUTH REHABILITATIVE SERVICES RESI...,1825 FAULKLAND RD,WILMINGTON,DE,19805,39.750162,-75.612572,STATE,JUVENILE,45.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.61186 39.75069, -75.61194 ..."
10000701,PLUMMER COMMUNITY CORRECTIONS CENTER,38 TODDS LN,WILMINGTON,DE,19802,39.756645,-75.529044,STATE,NOT AVAILABLE,246.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.52850 39.75648, -75.52904 ..."
10002260,BAYLOR WOMEN'S CORRECTIONAL INSTITUTION,660 BAYLOR BLVD,NEW CASTLE,DE,19720,39.703932,-75.581258,STATE,MAXIMUM,320.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.58220 39.70471, -75.58213 ..."
10005219,NEW CASTLE COUNTY DETENTION CENTER,963 CENTRE RD,WILMINGTON,DE,19805,39.754230,-75.610557,STATE,JUVENILE,64.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.61104 39.75479, -75.61070 ..."


Number of facilities above 300:

In [20]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 300
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10005221,WAYNE COUNTY CORRECTIONAL FACILITY,44 MID-WAYNE DR,HONESDALE,PA,18431,41.548698,-75.208834,COUNTY,NOT AVAILABLE,201.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.20832 41.54933, -75.20821 ..."
10005223,MINSEC OF SCRANTON,539 LINDEN ST,SCRANTON,PA,18503,41.408550,-75.661082,LOCAL,NOT AVAILABLE,30.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.66103 41.40874, -75.66086 ..."
10003049,LACKAWANNA COUNTY PRISON,1371 N WASHINGTON AVE,SCRANTON,PA,18509,41.422658,-75.648372,COUNTY,MAXIMUM,1183.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.64832 41.42327, -75.64821 ..."
10003059,PA CHILD CARE,701 SATHERS DR,PITTSTON TOWNSHIP,PA,18640,41.312296,-75.725906,NOT AVAILABLE,JUVENILE,60.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.72628 41.31262, -75.72618 ..."
10003047,SCI WAYMART,11 FAIRVIEW DR,WAYMART,PA,18472,41.574452,-75.429367,STATE,MAXIMUM,1522.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Scranton,PA,LACKAWANNA,41.442800,-75.623100,357.5,215.245833,"MULTIPOLYGON (((-75.43174 41.57536, -75.43040 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10000702,DIVISION OF YOUTH REHABILITATIVE SERVICES RESI...,1825 FAULKLAND RD,WILMINGTON,DE,19805,39.750162,-75.612572,STATE,JUVENILE,45.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.61186 39.75069, -75.61194 ..."
10000701,PLUMMER COMMUNITY CORRECTIONS CENTER,38 TODDS LN,WILMINGTON,DE,19802,39.756645,-75.529044,STATE,NOT AVAILABLE,246.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.52850 39.75648, -75.52904 ..."
10002260,BAYLOR WOMEN'S CORRECTIONAL INSTITUTION,660 BAYLOR BLVD,NEW CASTLE,DE,19720,39.703932,-75.581258,STATE,MAXIMUM,320.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.58220 39.70471, -75.58213 ..."
10005219,NEW CASTLE COUNTY DETENTION CENTER,963 CENTRE RD,WILMINGTON,DE,19805,39.754230,-75.610557,STATE,JUVENILE,64.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,MLK,DE,NEW CASTLE,39.739444,-75.558056,380.3,169.687500,"MULTIPOLYGON (((-75.61104 39.75479, -75.61070 ..."


Number of sites above 400:

In [21]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 400
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10006642,ABRAXAS ACADEMY,1000 ACADEMY DRIVE,MORGANTOWN,PA,19543,40.191808,-75.916953,STATE,JUVENILE,156.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.3833,-75.9686,418.3,204.383333,"MULTIPOLYGON (((-75.91828 40.19098, -75.91822 ..."
10002270,BERKS COUNTY JAIL SYSTEM,1287 COUNTY WELFARE RD,LEESPORT,PA,19533,40.385264,-76.020695,COUNTY,NOT AVAILABLE,1546.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.3833,-75.9686,418.3,204.383333,"MULTIPOLYGON (((-76.02199 40.38624, -76.02124 ..."
10002271,BERKS COUNTY COMMUNITY REENTRY CENTER,1261 COUNTY WELFARE RD,LEESPORT,PA,19533,40.382898,-76.017713,COUNTY,NOT AVAILABLE,-999.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.3833,-75.9686,418.3,204.383333,"MULTIPOLYGON (((-76.01828 40.38317, -76.01798 ..."
10006847,BERKS COUNTY RESIDENTIAL CENTER,1040 BERKS ROAD,LEESPORT,PA,19533,40.379468,-76.019540,COUNTY,NOT AVAILABLE,-999.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.3833,-75.9686,418.3,204.383333,"MULTIPOLYGON (((-76.02031 40.37891, -76.02056 ..."
10000725,SCHUYLKILL COUNTY PRISON,230 SANDERSON ST,POTTSVILLE,PA,17901,40.688517,-76.199253,COUNTY,NOT AVAILABLE,277.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Reading Airport,PA,BERKS,40.3833,-75.9686,418.3,204.383333,"MULTIPOLYGON (((-76.19988 40.68868, -76.19886 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10005816,SAINT JOHNS - RICHMOND HILL DETENTION CENTER,130-20 107TH AVENUE,RICHMOND HILL,NY,11419,40.686968,-73.813415,LOCAL,JUVENILE,12.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Queens,NY,QUEENS,40.7375,-73.8244,412.0,200.358333,"MULTIPOLYGON (((-73.81356 40.68709, -73.81340 ..."
10005814,QUEENS DETENTION COMPLEX,126-01 82ND AVENUE,KEW GARDENS,NY,11415,40.712991,-73.825486,LOCAL,NOT AVAILABLE,-999.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Queens,NY,QUEENS,40.7375,-73.8244,412.0,200.358333,"MULTIPOLYGON (((-73.82582 40.71362, -73.82561 ..."
10002308,SALEM COUNTY CORRECTIONAL FACILITY,125 CEMETERY RD,WOODSTOWN,NJ,08098,39.629881,-75.357215,COUNTY,MAXIMUM,464.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,RIVER ROAD PARK; BELLEFONTE,DE,NEW CASTLE,39.7611,-75.4919,428.1,193.000000,"MULTIPOLYGON (((-75.35853 39.63020, -75.35808 ..."
10003119,DELAWARE COUNTY JUVENILE DETENTION CENTER,370 NORTH MIDDLETOWN RD,LIMA,PA,19037,39.921556,-75.444577,COUNTY,JUVENILE,66.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,Chester,PA,DELAWARE,39.8356,-75.3725,423.0,186.008333,"MULTIPOLYGON (((-75.44538 39.92210, -75.44535 ..."


Number of sites above 500:

In [22]:
joined_data.loc[
    lambda df: df["peak_pm25_aqi"] > 500
]

Unnamed: 0_level_0,facility_name,facility_address,facility_city,facility_state,facility_zip,facility_latitude,facility_longitude,facility_type,facility_security_level,facility_capacity,...,facility_data_source_date,facility_data_edit_date,monitoring_site_name,monitoring_site_state,monitoring_site_county,monitoring_site_latitude,monitoring_site_longitude,peak_pm25_aqi,mean_pm25_aqi,geometry
facility_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10003113,ALTERNATIVE & SPECIAL DETENTION CENTER(ASD),8101 STATE RD,PHILADELPHIA,PA,19136,40.029802,-75.015976,LOCAL,MINIMUM,860.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,510.1,235.716667,"MULTIPOLYGON (((-75.01688 40.03051, -75.01692 ..."
10003114,PHILADELPHIA DETENTION CENTER,8201 STATE RD,PHILADELPHIA,PA,19136,40.033064,-75.015325,LOCAL,MINIMUM,1677.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,510.1,235.716667,"MULTIPOLYGON (((-75.01505 40.03410, -75.01506 ..."
10002304,BURLINGTON COUNTY DETENTION CENTER,54 GRANT ST,MOUNT HOLLY,NJ,8060,39.996071,-74.790918,COUNTY,NOT AVAILABLE,625.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,510.1,235.716667,"MULTIPOLYGON (((-74.79161 39.99611, -74.79108 ..."
10003110,CURRAN- FROMHOLD CORRECTIONAL FACILITY (CFCF),7901 STATE RD,PHILADELPHIA,PA,19136,40.029723,-75.019756,LOCAL,CLOSE,2560.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,510.1,235.716667,"MULTIPOLYGON (((-75.02106 40.03112, -75.02032 ..."
10003112,RIVERSIDE CORRECTIONAL FACILITY,8151 STATE ROAD,PHILADELPHIA,PA,19136,40.029348,-75.017085,LOCAL,CLOSE,824.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,510.1,235.716667,"MULTIPOLYGON (((-75.01796 40.02929, -75.01679 ..."
10003115,PHILADELPHIA INDUSTRIAL CORRECTIONAL CENTER,8301 STATE RD,PHILADELPHIA,PA,19136,40.034679,-75.013566,LOCAL,CLOSE,1230.0,...,2022-01-07 15:27:25+00:00,2022-01-07 15:27:25+00:00,TOR,PA,PHILADELPHIA,40.054171,-74.985166,510.1,235.716667,"MULTIPOLYGON (((-75.01309 40.03574, -75.01280 ..."


Write to files:

In [23]:
joined_data.loc[:, :"mean_pm25_aqi"].to_csv("../data/processed/pm25_aqi_by_facility.csv")

In [24]:
joined_data.to_file("../data/processed/pm25_aqi_by_facility.geojson")