# US EPA Air Quality System API
This section of code goes through how I grabbed AQI data from the EPA. A lot of functions used in this code were taken from [this](https://colab.research.google.com/drive/1bxl9qrb_52RocKNGfbZ5znHVqFDMkUzf) notebook, I will go into greater detail on this fact throughout the notebook. The code to make the 'signup request' from the API was ommited for privacy reasons, see linked notebook for information on how to sign up. 

In [None]:
# THIS SECTION OF CODE WAS TAKEN FROM THE SECOND CELL FROM THE STATED NOTEBOOK

import requests

API_REQUEST_URL = 'https://aqs.epa.gov/data/api'

API_ACTION_SIGNUP = '/signup?email={email}'
tions/requests
API_ACTION_LIST_CLASSES = '/list/classes?email={email}&key={key}'
API_ACTION_LIST_PARAMS = '/list/parametersByClass?email={email}&key={key}&pc={pclass}'
API_ACTION_LIST_SITES = '/list/sitesByCounty?email={email}&key={key}&state={state}&county={county}'

API_ACTION_MONITORS_COUNTY = '/monitors/byCounty?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&state={state}&county={county}'
API_ACTION_MONITORS_BOX = '/monitors/byBox?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&minlat={minlat}&maxlat={maxlat}&minlon={minlon}&maxlon={maxlon}'

API_ACTION_DAILY_SUMMARY_COUNTY = '/dailyData/byCounty?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&state={state}&county={county}'
API_ACTION_DAILY_SUMMARY_BOX = '/dailyData/byBox?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}&minlat={minlat}&maxlat={maxlat}&minlon={minlon}&maxlon={maxlon}'

API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = (1.0/100.0)-API_LATENCY_ASSUMED
#
#
#    This is a template that covers most of the parameters for the actions we might take, from the set of actions
#    above. In the examples below, most of the time parameters can either be supplied as individual values to a
#    function - or they can be set in a copy of the template and passed in with the template.
# 
AQS_REQUEST_TEMPLATE = {
    "email":      "",     
    "key":        "",      
    "state":      "",     # the two digit state FIPS # as a string
    "county":     "",     # the three digit county FIPS # as a string
    "begin_date": "",     # the start of a time window in YYYYMMDD format
    "end_date":   "",     # the end of a time window in YYYYMMDD format, begin_date and end_date must be in the same year
    "minlat":    0.0,
    "maxlat":    0.0,
    "minlon":    0.0,
    "maxlon":    0.0,
    "param":     "",     # a list of comma separated 5 digit codes, max 5 codes requested
    "pclass":    ""      # parameter class is only used by the List calls
}


In [None]:
USERNAME = "fake_name@gmail.com"
APIKEY = 'fake_password'
#   Gaseous AQI pollutants CO, SO2, NO2, and O2
AQI_PARAMS_GASEOUS = "42101,42401,42602,44201"
#
#   Particulate AQI pollutants PM10, PM2.5, and Acceptable PM2.5
AQI_PARAMS_PARTICULATES = "81102,88101,88502"

In [None]:
# THIS SECTION OF CODE WAS TAKEN FROM THE 13th CELL FROM THE STATED NOTEBOOK
CITY_LOCATIONS = {
    'mine' :       {'city'   : 'Loveland',
                       'county' : 'Larimer',
                       'state'  : 'Colorado',
                       'fips'   : '08069',
                       'latlon' : [40.3955, -105.0746] }
}
def request_daily_summary(email_address = None, key = None, param=None,
                          begin_date = None, end_date = None, fips = None,
                          endpoint_url = API_REQUEST_URL, 
                          endpoint_action = API_ACTION_DAILY_SUMMARY_COUNTY, 
                          request_template = AQS_REQUEST_TEMPLATE,
                          headers = None):
    
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    if param:
        request_template['param'] = param
    if begin_date:
        request_template['begin_date'] = begin_date
    if end_date:
        request_template['end_date'] = end_date
    if fips and len(fips)==5:
        request_template['state'] = fips[:2]
        request_template['county'] = fips[2:]            

    # Make sure there are values that allow us to make a call - these are always required
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_daily_summary()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_daily_summary()'")
    if not request_template['param']: 
        raise Exception("Must supply param values to call 'request_daily_summary()'")
    if not request_template['begin_date']: 
        raise Exception("Must supply a begin_date to call 'request_daily_summary()'")
    if not request_template['end_date']: 
        raise Exception("Must supply an end_date to call 'request_daily_summary()'")
    # Note we're not validating FIPS fields because not all of the daily summary actions require the FIPS numbers
        
    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
        
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response


In [None]:
# THIS SECTION OF CODE WAS TAKEN FROM THE 15th CELL FROM THE STATED NOTEBOOK

EXTRACTION_FIELDS = ['sample_duration','observation_count','arithmetic_mean','aqi']

#
#    The function creates a summary record
def extract_summary_from_response(r=None, fields=EXTRACTION_FIELDS):
    ## the result will be structured around monitoring site, parameter, and then date
    result = dict()
    data = r["Data"]
    for record in data:
        # make sure the record is set up
        site = record['site_number']
        param = record['parameter_code']
        #date = record['date_local']    # this version keeps the respnse value YYYY-
        date = record['date_local'].replace('-','') # this puts it in YYYYMMDD format
        if site not in result:
            result[site] = dict()
            result[site]['local_site_name'] = record['local_site_name']
            result[site]['site_address'] = record['site_address']
            result[site]['state'] = record['state']
            result[site]['county'] = record['county']
            result[site]['city'] = record['city']
            result[site]['pollutant_type'] = dict()
        if param not in result[site]['pollutant_type']:
            result[site]['pollutant_type'][param] = dict()
            result[site]['pollutant_type'][param]['parameter_name'] = record['parameter']
            result[site]['pollutant_type'][param]['units_of_measure'] = record['units_of_measure']
            result[site]['pollutant_type'][param]['method'] = record['method']
            result[site]['pollutant_type'][param]['data'] = dict()
        if date not in result[site]['pollutant_type'][param]['data']:
            result[site]['pollutant_type'][param]['data'][date] = list()
        
        # now extract the specified fields
        extract = dict()
        for k in fields:
            if str(k) in record:
                extract[str(k)] = record[k]
            else:
                # this makes sure we always have the requested fields, even if
                # we have a missing value for a given day/month
                extract[str(k)] = None
        
        # add this extraction to the list for the day
        result[site]['pollutant_type'][param]['data'][date].append(extract)
    
    return result
'''
extract_gaseous = extract_summary_from_response(gaseous_aqi)
print("Summary of gaseous extraction ...")
#with open('extract_gaseous.json', 'w') as f:
#    json.dump(extract_gaseous, f)
print(json.dumps(extract_gaseous,indent=4))

extract_particulate = extract_summary_from_response(particulate_aqi)
print("Summary of particulate extraction ...")
#with open('extract_particulate.json', 'w') as f:
#    json.dump(extract_particulate, f)
print(json.dumps(extract_particulate,indent=4))
'''

## Getting data from 1963 to 2023
This code was made by me and by the help of ChatGPT. We pull in the requests every year and store them into a JSON file to be used in the `project` file. I gave chatgpt information on what I used to load in the AQI data (ie: `request_daily_summary` and `extract_summary_from_response`), and asked it: "Make me code to store this data from 1963 - 2023 in seperate JSON files". 

In [None]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_GASEOUS
request_data['state'] = CITY_LOCATIONS['mine']['fips'][:2]
request_data['county'] = CITY_LOCATIONS['mine']['fips'][2:]

# Sample function to pull air quality data and store it into JSON files
def fetch_and_store_aqi_data(start_year=1975, end_year=2023):
    # Initialize dictionaries to hold the AQI data
    gaseous_aqi_data = {}
    particulate_aqi_data = {}

    # Loop through each year and fire season (May 1st to October 31st)
    for year in range(start_year, end_year + 1):
        begin_date = f"{year}0501"
        end_date = f"{year}1031"
        
        request_data['param'] = AQI_PARAMS_GASEOUS
        # Assuming request_daily_summary and extract_summary_from_response are your existing functions
        gaseous_aqi = request_daily_summary(request_template=request_data, begin_date=begin_date, end_date=end_date)
        
        request_data['param'] = AQI_PARAMS_PARTICULATES
        particulate_aqi = request_daily_summary(request_template=request_data, begin_date=begin_date, end_date=end_date)
        
        # Extract and organize the AQI data
        extract_gaseous = extract_summary_from_response(gaseous_aqi)
        extract_particulate = extract_summary_from_response(particulate_aqi)
        
        # Store the extracted data into dictionaries
        gaseous_aqi_data[year] = extract_gaseous
        particulate_aqi_data[year] = extract_particulate

    # Save the dictionaries to JSON files
    with open('gaseous_aqi_data.json', 'w') as f:
        json.dump(gaseous_aqi_data, f)
        
    with open('particulate_aqi_data.json', 'w') as f:
        json.dump(particulate_aqi_data, f)

fetch_and_store_aqi_data()