In [3]:
import json, time, requests, config

General constant parameters for the API request

In [12]:
# CONSTANTS
#    This is the root of all AQS API URLs
API_REQUEST_URL = 'https://aqs.epa.gov/data/api'
USERNAME = config.username
APIKEY = config.api_key

#    These are some of the 'actions' we can ask the API to take or requests that we can make of the API
#
#    List actions provide information on API parameter values that are required by some other actions/requests
API_ACTION_LIST_CLASSES = '/list/classes?email={email}&key={key}'
API_ACTION_LIST_PARAMS = '/list/parametersByClass?email={email}&key={key}&pc={pclass}'
API_ACTION_LIST_SITES = '/list/sitesByCounty?email={email}&key={key}&state={state}&county={county}'
#
#    Monitor actions are requests for monitoring stations that meet specific criteria
API_ACTION_MONITORS_COUNTY = '/monitors/byCounty?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&state={state}&county={county}'
API_ACTION_MONITORS_BOX = '/monitors/byBox?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&minlat={minlat}&maxlat={maxlat}&minlon={minlon}&maxlon={maxlon}'
#
#    Summary actions are requests for summary data. These are for daily summaries
API_ACTION_DAILY_SUMMARY_COUNTY = '/dailyData/byCounty?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&state={state}&county={county}'
API_ACTION_DAILY_SUMMARY_BOX = '/dailyData/byBox?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&minlat={minlat}&maxlat={maxlat}&minlon={minlon}&maxlon={maxlon}'
#
AQI_PARAM_CLASS = "AQI POLLUTANTS"
#   Gaseous AQI pollutants CO, SO2, NO2, and O2
AQI_PARAMS_GASEOUS = "42101,42401,42602,44201"
#   Particulate AQI pollutants PM10, PM2.5, and Acceptable PM2.5
AQI_PARAMS_PARTICULATES = "81102,88101,88502"

#    It is always nice to be respectful of a free data resource.
#    We're going to observe a 100 requests per minute limit - which is fairly nice
API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = (1.0/100.0)-API_LATENCY_ASSUMED
#
#    This is a template that covers most of the parameters for the actions we might take, from the set of actions
#    above. In the examples below, most of the time parameters can either be supplied as individual values to a
#    function - or they can be set in a copy of the template and passed in with the template.
# 
AQS_REQUEST_TEMPLATE = {
    "email":      "",     
    "key":        "",      
    "state":      "",     # the two digit state FIPS # as a string
    "county":     "",     # the three digit county FIPS # as a string
    "begin_date": "",     # the start of a time window in YYYYMMDD format
    "end_date":   "",     # the end of a time window in YYYYMMDD format, begin_date and end_date must be in the same year
    "minlat":    0.0,
    "maxlat":    0.0,
    "minlon":    0.0,
    "maxlon":    0.0,
    "param":     "",     # a list of comma separated 5 digit codes, max 5 codes requested
    "pclass":    ""   }   # parameter class is only used by the List calls

CITY_LOCATIONS = {"omaha" : {"city":"Omaha",
               'county': "Douglas",
               "state":"Nebraska",
               "fips": "31055",
               "latlon": [41.26, -96.05]
               }}

Given our CITY_LOCATION of Omaha, NE, we can now find which monitoring locations are nearby. We will validate the distance of these monitoring location as well, over the years.

In [13]:
#    This implements the monitors request. This requests monitoring stations. This can be done by state, county, or bounding box. 

def request_monitors(email_address = None, key = None, param=None,
                          begin_date = None, end_date = None, fips = None,
                          endpoint_url = API_REQUEST_URL, 
                          endpoint_action = API_ACTION_MONITORS_COUNTY, 
                          request_template = AQS_REQUEST_TEMPLATE,
                          headers = None):
    
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    if param:
        request_template['param'] = param
    if begin_date:
        request_template['begin_date'] = begin_date
    if end_date:
        request_template['end_date'] = end_date
    if fips and len(fips)==5:
        request_template['state'] = fips[:2]
        request_template['county'] = fips[2:]            

    # Make sure there are values that allow us to make a call - these are always required
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_monitors()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_monitors()'")
    if not request_template['param']: 
        raise Exception("Must supply param values to call 'request_monitors()'")
    if not request_template['begin_date']: 
        raise Exception("Must supply a begin_date to call 'request_monitors()'")
    if not request_template['end_date']: 
        raise Exception("Must supply an end_date to call 'request_monitors()'")
    # Note we're not validating FIPS fields because not all of the monitors actions require the FIPS numbers
    
    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
    
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

In [14]:
#
#   These are rough estimates for creating bounding boxes based on a city location
#   You can find these rough estimates on the USGS website:
#   https://www.usgs.gov/faqs/how-much-distance-does-a-degree-minute-and-second-cover-your-maps
#
LAT_25MILES = 25.0 * (1.0/69.0)    # This is about 25 miles of latitude in decimal degrees
LON_25MILES = 25.0 * (1.0/54.6)    # This is about 25 miles of longitude in decimal degrees
#
#   Compute rough estimates for a bounding box around a given place
#   The bounding box is scaled in 50 mile increments. That is, the bounding box will have sides that
#   are rough multiples of 50 miles, with the center of the box around the indicated place.
#   The scale parameter determines the scale (size) of the bounding box
#
def bounding_latlon(place=None,scale=1.0):
    minlat = place['latlon'][0] - float(scale) * LAT_25MILES
    maxlat = place['latlon'][0] + float(scale) * LAT_25MILES
    minlon = place['latlon'][1] - float(scale) * LON_25MILES
    maxlon = place['latlon'][1] + float(scale) * LON_25MILES
    return [minlat,maxlat,minlon,maxlon]

In [15]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_PARTICULATES     # remember we have both gaseous and particulates
# 
#   We got the monitoring stations for omaha OR above (Deschutes county) - let's work with that one again
request_data['state'] = CITY_LOCATIONS['omaha']['fips'][:2]
request_data['county'] = CITY_LOCATIONS['omaha']['fips'][2:]
#
# the first example uses the default - request monitors by county, we'll just use a recent date for now
response_2020 = request_monitors(request_template=request_data, begin_date="20200701", end_date="20200731")
response_1973 = request_monitors(request_template=request_data, begin_date="19730701", end_date="19730731")
response_1990 = request_monitors(request_template=request_data, begin_date="19900701", end_date="19900731")
#
# the response should be similar to the 'list' request above - but in this case we should only get monitors that
# monitor the AQI_PARAMS_PARTICULATES set of params.
#
for i in [response_2020,response_1973,response_1990]:
    if i["Header"][0]['status'] == "Success":
        print(json.dumps(i['Data'],indent=4))
    else:
        print(json.dumps(i,indent=4))


[
    {
        "state_code": "31",
        "county_code": "055",
        "site_number": "0019",
        "parameter_code": "88101",
        "poc": 1,
        "parameter_name": "PM2.5 - Local Conditions",
        "open_date": "1999-01-01",
        "close_date": null,
        "concurred_exclusions": null,
        "dominant_source": null,
        "measurement_scale": "NEIGHBORHOOD",
        "measurement_scale_def": "500 M TO 4KM",
        "monitoring_objective": "OTHER; POPULATION EXPOSURE",
        "last_method_code": "170",
        "last_method_description": "Met One BAM-1020 Mass Monitor w/VSCC - Beta Attenuation",
        "last_method_begin_date": "2011-01-01",
        "naaqs_primary_monitor": "Y",
        "qa_primary_monitor": "Y",
        "monitor_type": "SLAMS",
        "networks": "NCORE",
        "monitoring_agency_code": "0816",
        "monitoring_agency": "Omaha-Douglas County Health Department",
        "si_id": 9620,
        "latitude": 41.247486,
        "longitude": -95.97

In [17]:
print(f"Number of monitoring stations returned for 2020 is: {len(response_2020['Data'])}")
print(f"Number of monitoring stations returned for 1990 is: {len(response_1990['Data'])}")
print(f"Number of monitoring stations returned for 1973 is: {len(response_1973['Data'])}")

Number of monitoring stations returned for 2020 is: 8
Number of monitoring stations returned for 1990 is: 5
Number of monitoring stations returned for 1973 is: 0


In [18]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_PARTICULATES     # same particulate request as the one abover
# 
#   Not going to use these - comment them out
#request_data['state'] = CITY_LOCATIONS['bend']['fips'][:2]
#request_data['county'] = CITY_LOCATIONS['bend']['fips'][2:]
#
#   Now, we need bounding box parameters

#   50 mile box
bbox = bounding_latlon(CITY_LOCATIONS['omaha'],scale=1.0)
#   100 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=2.0)
#   150 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=3.0)
#   200 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=4.0)

# the bbox response comes back as a list - [minlat,maxlat,minlon,maxlon]

#   put our bounding box into the request_data
request_data['minlat'] = bbox[0]
request_data['maxlat'] = bbox[1]
request_data['minlon'] = bbox[2]
request_data['maxlon'] = bbox[3]

#   we need to change the action for the API from the default to the bounding box - same recent date for now
response_2020 = request_monitors(request_template=request_data, begin_date="20200701", end_date="20200731",
                            endpoint_action = API_ACTION_MONITORS_BOX)
response_1990 = request_monitors(request_template=request_data, begin_date="19900701", end_date="19900731",
                            endpoint_action = API_ACTION_MONITORS_BOX)
response_1973 = request_monitors(request_template=request_data, begin_date="19730701", end_date="19730731",
                            endpoint_action = API_ACTION_MONITORS_BOX)

for i in [response_2020,response_1973,response_1990]:
    if i["Header"][0]['status'] == "Success":
        print(json.dumps(i['Data'],indent=4))
    else:
        print(json.dumps(i,indent=4))

[
    {
        "state_code": "31",
        "county_code": "055",
        "site_number": "0019",
        "parameter_code": "88101",
        "poc": 1,
        "parameter_name": "PM2.5 - Local Conditions",
        "open_date": "1999-01-01",
        "close_date": null,
        "concurred_exclusions": null,
        "dominant_source": null,
        "measurement_scale": "NEIGHBORHOOD",
        "measurement_scale_def": "500 M TO 4KM",
        "monitoring_objective": "OTHER; POPULATION EXPOSURE",
        "last_method_code": "170",
        "last_method_description": "Met One BAM-1020 Mass Monitor w/VSCC - Beta Attenuation",
        "last_method_begin_date": "2011-01-01",
        "naaqs_primary_monitor": "Y",
        "qa_primary_monitor": "Y",
        "monitor_type": "SLAMS",
        "networks": "NCORE",
        "monitoring_agency_code": "0816",
        "monitoring_agency": "Omaha-Douglas County Health Department",
        "si_id": 9620,
        "latitude": 41.247486,
        "longitude": -95.97

In [19]:
print(f"Number of monitoring stations within 50 miles of Omaha, NE returned for 2020 is: {len(response_2020['Data'])}")
print(f"Number of monitoring stations within 50 miles of Omaha, NE returned for 1990 is: {len(response_1990['Data'])}")
print(f"Number of monitoring stations within 50 miles of Omaha, NE returned for 1973 is: {len(response_1973['Data'])}")

Number of monitoring stations within 50 miles of Omaha, NE returned for 2020 is: 13
Number of monitoring stations within 50 miles of Omaha, NE returned for 1990 is: 9
Number of monitoring stations within 50 miles of Omaha, NE returned for 1973 is: 0


In [20]:
#   This implements the daily summary request. Daily summary provides a daily summary value for each sensor being requested
#    from the start date to the end date. 
#
def request_daily_summary(email_address = None, key = None, param=None,
                          begin_date = None, end_date = None, fips = None,
                          endpoint_url = API_REQUEST_URL, 
                          endpoint_action = API_ACTION_DAILY_SUMMARY_COUNTY, 
                          request_template = AQS_REQUEST_TEMPLATE,
                          headers = None):
    
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    if param:
        request_template['param'] = param
    if begin_date:
        request_template['begin_date'] = begin_date
    if end_date:
        request_template['end_date'] = end_date
    if fips and len(fips)==5:
        request_template['state'] = fips[:2]
        request_template['county'] = fips[2:]            

    # Make sure there are values that allow us to make a call - these are always required
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_daily_summary()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_daily_summary()'")
    if not request_template['param']: 
        raise Exception("Must supply param values to call 'request_daily_summary()'")
    if not request_template['begin_date']: 
        raise Exception("Must supply a begin_date to call 'request_daily_summary()'")
    if not request_template['end_date']: 
        raise Exception("Must supply an end_date to call 'request_daily_summary()'")
    # Note we're not validating FIPS fields because not all of the daily summary actions require the FIPS numbers
        
    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
        
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

In [25]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY

#   We got the monitoring stations for omaha 
request_data['state'] = CITY_LOCATIONS['omaha']['fips'][:2]
request_data['county'] = CITY_LOCATIONS['omaha']['fips'][2:]

gaseous_aqi = {}
particulate_aqi = {}

# Loop through 1973 (year of creation of US EPA) to 2021
for year in range(1970, 2022):

    # Fire season dates
    start = f"{year}0501"  # May 01
    end = f"{year}1031"     # October 31
    
    # Call the function with the specific date range for this year
    request_data['param'] = AQI_PARAMS_GASEOUS
    print("Daily Gas Summary")
    yearly_gaseous_aqi = request_daily_summary(request_template=request_data, begin_date=start, end_date=end)

    request_data['param'] = AQI_PARAMS_PARTICULATES
    print("Daily Particulate Summary")
    yearly_particulate_aqi = request_daily_summary(request_template=request_data, begin_date=start, end_date=end)
    
    # Append the data for this year to the list
    print("Building Dictionaries...")
    gaseous_aqi[year] = yearly_gaseous_aqi
    particulate_aqi[year] = yearly_particulate_aqi

Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulate Summary
Building Dictionaries...
Daily Gas Summary
Daily Particulat

In [36]:
with open("particulate_aqi.json",'w') as f:
    json.dump(particulate_aqi,f)

In [37]:
with open("gaseous_aqi.json", 'w') as f:
    json.dump(gaseous_aqi,f)