In [6]:
import requests
import json
import pandas as pd


You need to get your API for AQS and make a txt file `myEmailKey.txt` to store your email and key in the first two lines.

Also, note that some internet condition may not allow you to access the API key! Try different connection and devices!~

In [2]:
# Read email and API key from the file
with open("myEmailKey.txt", "r") as file:
    email = file.readline().strip()
    api_key = file.readline().strip()

# Define the base API URL
base_url = "https://aqs.epa.gov/data/api"

# Define the endpoint and parameters
endpoint = "/list/countiesByState"
params = {
    "email": email,
    "key": api_key,
    "state": "37"
}

# Make the GET request with SSL/TLS verification disabled (NOT recommended for production)
response = requests.get(base_url + endpoint, params=params)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print(f"Error: {response.status_code}")


{'Header': [{'status': 'Success', 'request_time': '2023-05-08T21:20:13-04:00', 'url': 'https://aqs.epa.gov/data/api/list/countiesByState?email=zhu.tim99%40gmail.com&key=copperswift75&state=37', 'rows': 100}], 'Data': [{'code': '001', 'value_represented': 'Alamance'}, {'code': '003', 'value_represented': 'Alexander'}, {'code': '005', 'value_represented': 'Alleghany'}, {'code': '007', 'value_represented': 'Anson'}, {'code': '009', 'value_represented': 'Ashe'}, {'code': '011', 'value_represented': 'Avery'}, {'code': '013', 'value_represented': 'Beaufort'}, {'code': '015', 'value_represented': 'Bertie'}, {'code': '017', 'value_represented': 'Bladen'}, {'code': '019', 'value_represented': 'Brunswick'}, {'code': '021', 'value_represented': 'Buncombe'}, {'code': '023', 'value_represented': 'Burke'}, {'code': '025', 'value_represented': 'Cabarrus'}, {'code': '027', 'value_represented': 'Caldwell'}, {'code': '029', 'value_represented': 'Camden'}, {'code': '031', 'value_represented': 'Carteret'}

In [3]:
## display data in a table

# Create a DataFrame from the JSON response
df = pd.DataFrame.from_dict(data["Data"])

df

Unnamed: 0,code,value_represented
0,001,Alamance
1,003,Alexander
2,005,Alleghany
3,007,Anson
4,009,Ashe
...,...,...
95,191,Wayne
96,193,Wilkes
97,195,Wilson
98,197,Yadkin


In [3]:
## similarly we can get different data using the same sturcture
# Read email and API key from the file
with open("myEmailKey.txt", "r") as file:
    email = file.readline().strip()
    api_key = file.readline().strip()

# Define the base API URL
base_url = "https://aqs.epa.gov/data/api"


In [4]:
# a function to get data from the API -- passing the endpoint and parameters

def get_data(base_url, endpoint, params):
    # Make the GET request with SSL/TLS verification disabled (NOT recommended for production)
    response = requests.get(base_url + endpoint, params=params)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        print(f"Error: {response.status_code}")





In [7]:
#Parameter Classes (groups of parameters, like criteria or all)
#	list/classes	email, key	https://aqs.epa.gov/data/api/list/classes?email=test@aqs.api&key=test

# Define the endpoint and parameters
endpoint = "/list/classes"
params = {
    "email": email,
    "key": api_key
}

# Get the data
data = get_data(base_url, endpoint, params)

# Create a DataFrame from the JSON response
df = pd.DataFrame.from_dict(data["Data"])

df



Unnamed: 0,code,value_represented
0,AIRNOW MAPS,The parameters represented on AirNow maps (881...
1,ALL,Select all Parameters Available
2,AQI POLLUTANTS,Pollutants that have an AQI Defined
3,CORE_HAPS,Urban Air Toxic Pollutants
4,CRITERIA,Criteria Pollutants
5,CSN DART,List of CSN speciation parameters to populate ...
6,FORECAST,Parameters routinely extracted by AirNow (STI)
7,HAPS,Hazardous Air Pollutants
8,IMPROVE CARBON,IMPROVE Carbon Parameters
9,IMPROVE_SPECIATION,PM2.5 Speciated Parameters Measured at IMPROVE...


In [8]:
filters = {
    "States" : "list/states",
    "Counties" : "list/countiesByState",
    "Sites" : "list/sitesByCounty",
    "Parameter Classes" : "list/classes",
    "Parameters" : "list/parametersByClass",
}

In [9]:
filters = {
    "States" : "list/states",
    "Counties" : "list/countiesByState",
    "Sites" : "list/sitesByCounty",
    "Parameter Classes" : "list/classes",
    "Parameters" : "list/parametersByClass",
}

## we want to see what parameters are available for a given class PM COARSE

# Define the endpoint and parameters
endpoint = "/list/parametersByClass"
params = {
    "email": email,
    "key": api_key,
    "pc": "PM COARSE"
}

# Get the data
data = get_data(base_url, endpoint, params)

# Create a DataFrame from the JSON response
df = pd.DataFrame.from_dict(data["Data"])

df






Unnamed: 0,code,value_represented
0,86101,PM10-2.5 - Local Conditions
1,86502,Acceptable PM10-2.5 - Local Conditions


In [13]:
## we want to see how many sites are available in LA



## find the stata code for LA
# Define the endpoint and parameters
endpoint = "/list/states"
params = {
    "email": email,
    "key": api_key,
}

# Get the data
data = get_data(base_url, endpoint, params)

# Create a DataFrame from the JSON response
df = pd.DataFrame.from_dict(data["Data"])

df[df["value_represented"]== "California"]

Unnamed: 0,code,value_represented
4,6,California


In [15]:
endpoint = "/list/countiesByState"
params = {
    "email": email,
    "key": api_key,
    "state": "06"
}

# Get the data
data = get_data(base_url, endpoint, params)

# Create a DataFrame from the JSON response
df = pd.DataFrame.from_dict(data["Data"])

df[df["value_represented"]== "Los Angeles"]

Unnamed: 0,code,value_represented
18,37,Los Angeles


In [16]:
## get info about the sites in LA

endpoint = "/list/sitesByCounty"
params = {
    "email": email,
    "key": api_key,
    "state": "06",
    "county": "037"
}

# Get the data
data = get_data(base_url, endpoint, params)

# Create a DataFrame from the JSON response
df = pd.DataFrame.from_dict(data["Data"])

df

Unnamed: 0,code,value_represented
0,0001,
1,0002,Azusa
2,0003,
3,0004,
4,0005,
...,...,...
91,9401,
92,9403,
93,9405,
94,9407,


In [17]:
## quarterly summary data by county e.g Example; returns quarterly summary FRM/FEM and non-FRM PM2.5 data for Wake County for 2016: https://aqs.epa.gov/data/api/quarterlyData/byCounty?email=test@aqs.api&key=test&param=88101,88502&bdate=20160101&edate=20160228&state=37&county=183

## we want LA county data for 2019 qualters 1 

endpoint = "/quarterlyData/byCounty"
params = {
    "email": email,
    "key": api_key,
    "param": "88101,88502",
    "bdate": "20190101",
    "edate": "20191231",
    "state": "06",
    "county": "037"
}

# Get the data
data = get_data(base_url, endpoint, params)

# Create a DataFrame from the JSON response

df = pd.DataFrame.from_dict(data["Data"])

df

Unnamed: 0,state_code,county_code,site_number,parameter_code,poc,latitude,longitude,datum,parameter,sample_duration,...,local_site_name,address,state,county,city,tribal_code,tribal_land,cbsa_code,cbsa,date_of_last_change
0,06,037,0002,88101,21,34.13650,-117.92391,WGS84,PM2.5 - Local Conditions,24 HOUR,...,Azusa,"803 N. LOREN AVE., AZUSA",California,Los Angeles,Azusa,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2022-10-19
1,06,037,0002,88101,21,34.13650,-117.92391,WGS84,PM2.5 - Local Conditions,24 HOUR,...,Azusa,"803 N. LOREN AVE., AZUSA",California,Los Angeles,Azusa,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2022-10-19
2,06,037,0002,88101,21,34.13650,-117.92391,WGS84,PM2.5 - Local Conditions,24 HOUR,...,Azusa,"803 N. LOREN AVE., AZUSA",California,Los Angeles,Azusa,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2022-11-01
3,06,037,0002,88101,21,34.13650,-117.92391,WGS84,PM2.5 - Local Conditions,24 HOUR,...,Azusa,"803 N. LOREN AVE., AZUSA",California,Los Angeles,Azusa,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2022-11-01
4,06,037,0002,88101,21,34.13650,-117.92391,WGS84,PM2.5 - Local Conditions,24 HOUR,...,Azusa,"803 N. LOREN AVE., AZUSA",California,Los Angeles,Azusa,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2022-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474,06,037,1602,88101,2,34.01029,-118.06850,NAD83,PM2.5 - Local Conditions,24 HOUR,...,Pico Rivera #2,"4144 SAN GABRIEL RIVER PKWY, PICO RIVERA",California,Los Angeles,Pico Rivera,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2022-07-07
475,06,037,1602,88101,1,34.01029,-118.06850,NAD83,PM2.5 - Local Conditions,24 HOUR,...,Pico Rivera #2,"4144 SAN GABRIEL RIVER PKWY, PICO RIVERA",California,Los Angeles,Pico Rivera,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2023-04-28
476,06,037,1602,88101,1,34.01029,-118.06850,NAD83,PM2.5 - Local Conditions,24 HOUR,...,Pico Rivera #2,"4144 SAN GABRIEL RIVER PKWY, PICO RIVERA",California,Los Angeles,Pico Rivera,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2023-04-28
477,06,037,1602,88101,1,34.01029,-118.06850,NAD83,PM2.5 - Local Conditions,24 HOUR,...,Pico Rivera #2,"4144 SAN GABRIEL RIVER PKWY, PICO RIVERA",California,Los Angeles,Pico Rivera,,,31080,"Los Angeles-Long Beach-Anaheim, CA",2023-04-28


In [18]:
## see what data is in the df
df.columns




Index(['state_code', 'county_code', 'site_number', 'parameter_code', 'poc',
       'latitude', 'longitude', 'datum', 'parameter', 'sample_duration',
       'sample_duration_code', 'sample_duration_type', 'pollutant_standard',
       'year', 'quarter', 'units_of_measure', 'event_type',
       'observation_count', 'observation_percent', 'arithmetic_mean',
       'minimum_value', 'maximum_value', 'quarterly_criteria_met',
       'actual_days_gt_std', 'estimated_days_gt_std', 'valid_samples',
       'valid_day_count', 'scheduled_samples', 'percent_days',
       'percent_one_value', 'monitoring_agency_code', 'monitoring_agency',
       'local_site_name', 'address', 'state', 'county', 'city', 'tribal_code',
       'tribal_land', 'cbsa_code', 'cbsa', 'date_of_last_change'],
      dtype='object')

In [19]:
## Here are some useful data if we want to plot it on the map
# 'latitude', 'longitude', 'parameter', 'year', 'quarter', 'units_of_measure', 'arithmetic_mean', 'minimum_value', 'maximum_value', 'local_site_name','cbsa'

## make a subset of the data 

df = df[['latitude', 'longitude', 'parameter', 'year', 'quarter', 'units_of_measure', 'arithmetic_mean', 'minimum_value', 'maximum_value', 'local_site_name','cbsa']]
df


Unnamed: 0,latitude,longitude,parameter,year,quarter,units_of_measure,arithmetic_mean,minimum_value,maximum_value,local_site_name,cbsa
0,34.13650,-117.92391,PM2.5 - Local Conditions,2019,1,Micrograms/cubic meter (LC),7.2267,2.4,21.5,Azusa,"Los Angeles-Long Beach-Anaheim, CA"
1,34.13650,-117.92391,PM2.5 - Local Conditions,2019,2,Micrograms/cubic meter (LC),9.8067,4.7,17.4,Azusa,"Los Angeles-Long Beach-Anaheim, CA"
2,34.13650,-117.92391,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),12.4429,8.0,16.7,Azusa,"Los Angeles-Long Beach-Anaheim, CA"
3,34.13650,-117.92391,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),16.2800,8.0,70.0,Azusa,"Los Angeles-Long Beach-Anaheim, CA"
4,34.13650,-117.92391,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),16.2800,8.0,70.0,Azusa,"Los Angeles-Long Beach-Anaheim, CA"
...,...,...,...,...,...,...,...,...,...,...,...
474,34.01029,-118.06850,PM2.5 - Local Conditions,2019,4,Micrograms/cubic meter (LC),12.1857,4.5,27.4,Pico Rivera #2,"Los Angeles-Long Beach-Anaheim, CA"
475,34.01029,-118.06850,PM2.5 - Local Conditions,2019,1,Micrograms/cubic meter (LC),8.2067,2.5,19.2,Pico Rivera #2,"Los Angeles-Long Beach-Anaheim, CA"
476,34.01029,-118.06850,PM2.5 - Local Conditions,2019,2,Micrograms/cubic meter (LC),8.1600,3.6,14.3,Pico Rivera #2,"Los Angeles-Long Beach-Anaheim, CA"
477,34.01029,-118.06850,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),11.2750,7.6,15.6,Pico Rivera #2,"Los Angeles-Long Beach-Anaheim, CA"


In [23]:
## use site as index
df.set_index("local_site_name", inplace=True)
df

Unnamed: 0_level_0,latitude,longitude,parameter,year,quarter,units_of_measure,arithmetic_mean,minimum_value,maximum_value,cbsa
local_site_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Azusa,34.13650,-117.92391,PM2.5 - Local Conditions,2019,1,Micrograms/cubic meter (LC),7.2267,2.4,21.5,"Los Angeles-Long Beach-Anaheim, CA"
Azusa,34.13650,-117.92391,PM2.5 - Local Conditions,2019,2,Micrograms/cubic meter (LC),9.8067,4.7,17.4,"Los Angeles-Long Beach-Anaheim, CA"
Azusa,34.13650,-117.92391,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),12.4429,8.0,16.7,"Los Angeles-Long Beach-Anaheim, CA"
Azusa,34.13650,-117.92391,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),16.2800,8.0,70.0,"Los Angeles-Long Beach-Anaheim, CA"
Azusa,34.13650,-117.92391,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),16.2800,8.0,70.0,"Los Angeles-Long Beach-Anaheim, CA"
...,...,...,...,...,...,...,...,...,...,...
Pico Rivera #2,34.01029,-118.06850,PM2.5 - Local Conditions,2019,4,Micrograms/cubic meter (LC),12.1857,4.5,27.4,"Los Angeles-Long Beach-Anaheim, CA"
Pico Rivera #2,34.01029,-118.06850,PM2.5 - Local Conditions,2019,1,Micrograms/cubic meter (LC),8.2067,2.5,19.2,"Los Angeles-Long Beach-Anaheim, CA"
Pico Rivera #2,34.01029,-118.06850,PM2.5 - Local Conditions,2019,2,Micrograms/cubic meter (LC),8.1600,3.6,14.3,"Los Angeles-Long Beach-Anaheim, CA"
Pico Rivera #2,34.01029,-118.06850,PM2.5 - Local Conditions,2019,3,Micrograms/cubic meter (LC),11.2750,7.6,15.6,"Los Angeles-Long Beach-Anaheim, CA"


In [27]:
## take quarter 1 data as an example
df_q1 = df[df["quarter"] == "1"]
df_q1.reset_index(inplace=True)


In [29]:
import plotly.express as px

fig = px.scatter_mapbox(df_q1, lat="latitude", lon="longitude", hover_name="local_site_name", hover_data=["arithmetic_mean", "parameter"],
                        color_discrete_sequence=["fuchsia"], zoom=10, height=300)
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


In [31]:
import air_quality_util as aq


In [32]:
email, api_key = aq.get_api_key("myEmailKey.txt")

In [33]:
print(email, api_key, base_url)

zhu.tim99@gmail.com copperswift75 https://aqs.epa.gov/data/api


In [35]:
endpoint=aq.create_endpoint("Sites")
print(endpoint)

list/sitesByCounty


In [41]:
param = aq.param_requirement(email, api_key, "Sites")


{'email': 'zhu.tim99@gmail.com', 'key': 'copperswift75', 'state': '06', 'county': '037'}
