In [28]:
import requests
import json
import pandas as pd


class AQSFetcher:
    """
    This class defines a template for an object that can fetch EPA open
    air quality data. Has the following attributes:
    - email: the email for the associated account
    - key: the identification key for the associated account
    - api_url (optional): the base URL for the API
    Has the following methods:
    - get_cbsas: gets a dataframe of the Core Based Statistcal Area (a
      metropolitan area with a central urban center and connecting transport)
    - get_state_codes: gets a dataframe with the states and their associated
      codes.
    - get_counties_by_state: gets a dataframe with counties and their associated
      codes for a given state.
    - get_sites_by_county: gets a dataframe with measurement sites and their
      associated ids from a given state and county code
    - get_parameter_classes: gets a dataframe with classes of parameters
      (things that can be measured) and some descriptions
    - get_parameter_list_by_class: gets a dataframe with a list of parameters
      and their associated codes given a particular class of parameters
    - annual_data_by_cbsa: given a cbsa, list of parameters, and timeframe,
      gets the annual summary dataframe from the EPA website and returns it.
    - annual_data_by_site: given site identification and parameters, gets
      the annual summary dataframe and returns it
    - annual_data_by_county: given county identification and parameters, gets
      the annual summary dataframe and returns it.
    - annual_data_by_state: given a state id and parameters, gets the annual
      summary dataframe and returns it.
    """

    def __init__(self, email, key, api_url='https://aqs.epa.gov/data/api'):
        """
        The class constructor. Can take in an alternative URL
        """
        self.email = email
        self.key = key
        self.api_url = api_url
        self.stub = f'?email={self.email}&key={self.key}'

    def get_cbsas(self):
        """
        Gets a list of Core Based Statistical Areas as a dataframe
        """
        url = self.api_url + '/list/cbsas' + self.stub
        response = requests.get(url)
        try:
            assert response.status_code == requests.codes.ok
            json_data = json.loads(response.content)['Data']
            df = pd.DataFrame.from_records(json_data)
            df.rename(columns={'value_represented': 'cbsa_name'}, inplace=True)
            return df
        except AssertionError:
            print('Bad URL!')

    def get_state_codes(self):
        """
        Gets a list of states and their associated codes that can be used to
        construct additional queries.
        """
        url = self.api_url + '/list/states' + self.stub
        response = requests.get(url)
        try:
            assert response.status_code == requests.codes.ok

            json_data = json.loads(response.content)['Data']
            df = pd.DataFrame.from_records(json_data)
            df.rename(
                columns={'value_represented': 'state_name'}, inplace=True)
            return df

        except AssertionError:
            print('Bad URL!')

    def get_counties_by_state(self, state):
        """
        Gets a list of counties for the given state, and their associated
        county ids. Takes in a state id as an integer wrapped as a string.
        """
        url = self.api_url + '/list/countiesByState' + self.stub
        url += f'&state={state}'
        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok

            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            df.rename(
                columns={'value_represented': 'county_name'}, inplace=True)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def get_sites_by_county(self, state, county):
        """
        Gets the ids of measurement sites by county. Takes in a state id and
        county id as wrapped string integers.
        """
        url = self.api_url + '/list/sitesByCounty' + self.stub
        url += f'&state={state}&county={county}'
        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok
            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            df.rename(columns={'value_represented': 'site_name'}, inplace=True)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def get_parameter_classes(self):
        """
        Gets the possible classes of parameters
        """
        url = self.api_url + '/list/classes' + self.stub
        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok
        except AssertionError:
            print('Bad URL!')

        json_data = json.loads(response.content)['Data']
        df = pd.DataFrame.from_records(json_data)
        df.rename(columns={
            'code': 'class_name',
            'value_represented': 'class_description'},
            inplace=True)
        return df

    def get_parameter_list_by_class(self, _class):
        """
        Given a class name, gets the possible parameters and their associated
        codes as a dataframe.
        """
        url = self.api_url + '/list/parametersByClass' + self.stub
        url += f'&pc={_class}'

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok

            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            df.rename(
                columns={'value_represented': 'parameter_description'},
                inplace=True)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def annual_data_by_cbsa(self, cbsa_code, params, bdate, edate):
        """
        Searches for annual data by the CBSA. These are generally large regions
        Takes the following arguments as integers or wrapped string integers:
        - cbsa_code: code for the cbsa area
        - params: id for the specified readings
        - bdate, edate: beginning and end dates in YYYYMMDD format
        """
        search_params = '&param='
        for p in params:
            search_params += str(p)
            search_params += ','
        search_params = search_params[:-1]
        search_params += f'&bdate={bdate}&edate={edate}&cbsa={cbsa_code}'
        url = self.api_url + '/annualData/byCBSA' + self.stub + search_params

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok
            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def annual_data_by_site(self, state, county, site, params, bdate, edate):
        """
        Searches for annual data by measurement site.
        Takes in arguments as integers or wrapped string integers:
        - state: id of the state
        - county: id of the county
        - site: id of the measurement site
        - params: id of the desired type of measurement
        - bdate, edate: beginning and end dates of the measurement in YYYYMMDD
          format
        """
        search_params = '&param='
        for p in params:
            search_params += str(p)
            search_params += ','
        search_params = search_params[:-1]
        search_params += (
            f'&state={state}' +
            f'&county={county}' +
            f'&bdate={bdate}' +
            f'&edate={edate}' +
            f'&site={site}')
        url = self.api_url + '/annualData/bySite' + self.stub + search_params

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok
            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            return df
        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def annual_data_by_county(self, state, county, params, bdate, edate):
        """
        Gets the annual data by county. 
        Takes the following parameters as integers or wrapped string integers:
        - state: state id code
        - county: county code
        - param: ids of desired parameters to measure
        - bdate, edate: start and end dates in YYYYMMDD format
        """
        url = self.api_url + '/annualData/byCounty' + self.stub
        search_params = '&param='
        for p in params:
            search_params += str(p)
            search_params += ','
        search_params = search_params[:-1]
        search_params += (
            f'&state={state}' +
            f'&county={county}' +
            f'&bdate={bdate}' +
            f'&edate={edate}')
        url += search_params

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok

            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def annual_data_by_state(self, state, params, bdate, edate):
        """
        Gets the annual data by state.
        Takes the following parameters as integers or wrapped string integers:
        - state: state id code
        - param: ids of desired parameters to measure
        - bdate, edate: start and end dates in YYYYMMDD format
        """
        url = self.api_url + '/annualData/byState' + self.stub
        search_params = '&param='
        for p in params:
            search_params += str(p)
            search_params += ','
        search_params = search_params[:-1]
        search_params += (
            f'&state={state}' +
            f'&bdate={bdate}' +
            f'&edate={edate}')
        url += search_params

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok

            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')
            
    def daily_data_by_state(self, state, params, bdate, edate):
        """
        Gets the annual data by state.
        Takes the following parameters as integers or wrapped string integers:
        - state: state id code
        - param: ids of desired parameters to measure
        - bdate, edate: start and end dates in YYYYMMDD format
        """
        url = self.api_url + '/dailyData/byState' + self.stub
        search_params = '&param='
        for p in params:
            search_params += str(p)
            search_params += ','
        search_params = search_params[:-1]
        search_params += (
            f'&state={state}' +
            f'&bdate={bdate}' +
            f'&edate={edate}')
        url += search_params

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok

            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

    def get_monitors_at_site(self, state, county, site, params, bdate, edate):
        """
        Gets information about the monitoring aparatus at a particular site.
        Takes the following arguments as integers or wrapped string integers:
        - state: id of the state
        - county: id of the county
        - site: id of the site
        - params: a list of parameter ids to search for
        - bdate, edate: start and end dates in YYYYMMDD format
        """
        search_params = '&param='
        for p in params:
            search_params += str(p)
            search_params += ','
        search_params = search_params[:-1]

        search_params += (
            f'&state={state}' +
            f'&county={county}' +
            f'&bdate={bdate}' +
            f'&edate={edate}' +
            f'&site={site}')

        url = self.api_url + '/monitors/bySite' + self.stub + search_params

        response = requests.get(url)

        try:
            assert response.status_code == requests.codes.ok
            jsn = json.loads(response.content)
            json_header = jsn['Header']
            json_data = jsn['Data']

            if json_header[0]['rows'] == 0:
                raise ValueError

            df = pd.DataFrame.from_records(json_data)
            return df

        except AssertionError:
            print('Bad URL!')
        except ValueError:
            print('No matching data could be found!')

In [6]:
aqs_fetcher = AQSFetcher('reedabook15@gmail.com', 'tawnykit38')


In [7]:
state_codes = aqs_fetcher.get_state_codes()
state_codes.head()



Unnamed: 0,code,state_name
0,1,Alabama
1,2,Alaska
2,4,Arizona
3,5,Arkansas
4,6,California


In [9]:

parameter_classes = aqs_fetcher.get_parameter_classes()

parameter_classes

Unnamed: 0,class_name,class_description
0,AIRNOW MAPS,The parameters represented on AirNow maps (881...
1,ALL,Select all Parameters Available
2,AQI POLLUTANTS,Pollutants that have an AQI Defined
3,CORE_HAPS,Urban Air Toxic Pollutants
4,CRITERIA,Criteria Pollutants
5,CSN DART,List of CSN speciation parameters to populate ...
6,FORECAST,Parameters routinely extracted by AirNow (STI)
7,HAPS,Hazardous Air Pollutants
8,IMPROVE CARBON,IMPROVE Carbon Parameters
9,IMPROVE_SPECIATION,PM2.5 Speciated Parameters Measured at IMPROVE...


In [13]:
aqi_pollutant_parameters = aqs_fetcher.get_parameter_list_by_class('AQI POLLUTANTS')

aqi_pollutant_parameters

Unnamed: 0,code,parameter_description
0,42101,Carbon monoxide
1,42401,Sulfur dioxide
2,42602,Nitrogen dioxide (NO2)
3,44201,Ozone
4,81102,PM10 Total 0-10um STP
5,88101,PM2.5 - Local Conditions
6,88502,Acceptable PM2.5 AQI & Speciation Mass


In [12]:
tn_state_code = state_codes.loc[state_codes.state_name == 'Tennessee'].code.values[0]
tn_state_code

'47'

In [27]:
co_param_code = aqi_pollutant_parameters.loc[aqi_pollutant_parameters.parameter_description == 'Carbon monoxide'].code.values[0]


tn_data = aqs_fetcher.daily_data_by_state(tn_state_code, [co_param_code], 20170101, 20190101)

Bad URL!


In [24]:
ca_state_code = state_codes.loc[state_codes.state_name == 'California'].code.values[0]
co_param_code = aqi_pollutant_parameters.loc[aqi_pollutant_parameters.parameter_description == 'Carbon monoxide'].code.values[0]

In [25]:
ca_daily_co_data = aqs_fetcher.daily_data_by_state(ca_state_code, [co_param_code], 20130101, 20180101)

Bad URL!


In [32]:
url1 = 'https://aqs.epa.gov/data/api/monitors/byState?email=reedabook15@gmail.com&key=tawnykit38&param=42401&bdate=20150501&edate=20150502&state=15'
print(url1)

https://aqs.epa.gov/data/api/monitors/byState?email=reedabook15@gmail.com&key=tawnykit38&param=42401&bdate=20150501&edate=20150502&state=15
