Table of Contents:
1. Import Modules and Packes
2. Set-up EPA AQS API
3. Load Data
4. Examine Data
5. Merge Data into DataFrame, Review Data

In [1]:
#1.Imports and Modules
import requests
#store the data we get as a dataframe
import pandas as pd
#convert the response as a strcuctured json
import json
#mathematical operations on lists
import numpy as np
#parse the datetimes we get from NOAA
from datetime import datetime

In [2]:
#2. Set-up EPA AQS API; county codes, parameters, etc for Data Extraction via API

#EPA AQS API: requires specific 'token ID'
#add the access token you got from NOAA
Token = "####"
email = "####@gmail.com"

#Station ID, County ID, State ID...etc needed to run API for data
#Dataset will be performed on 8-hour ozone emission average (O3)

In [3]:
#EPA FIPS County Codes .csv available "states_and_counties.csv"
county_codes = pd.read_csv('C:/Users/srubin/Desktop/Springboard/CapstoneProject2/_states_and_counties.csv')
county_codes.head()

#Acquire appropriate County Codes for top 10 most populous counties in U.S.
top10counties = ['Los Angeles', 'Cook', 'Harris', 'Maricopa','San Diego','Orange','Miami-Dade', 'Dallas', 'Kings','Riverside']
states = ['California', 'Illinois', 'Texas', 'Arizona', 'Florida', 'New York']
county_codes = county_codes[['State Name','County Code','County Name']]
county_codes.head()

county_codes.columns
county_codes.columns = ['StateName','CountyCode','CountyName']
county_codes.head()

top10counties = ['Los Angeles', 'Cook', 'Harris', 'Maricopa','San Diego','Orange','Miami-Dade', 'Dallas', 'Kings','Riverside']
#states = ['California', 'Illinois', 'Texas', 'Arizona', 'Florida', 'New York']
resultsCounties = county_codes[county_codes.StateName.isin(states) & county_codes.CountyName.isin(top10counties)]
resultsCounties

#drop a few extraneous rows
results_Counties = resultsCounties.drop([362,367,1865,2705])
results_Counties
#Here are the County Codes for the top most populous counties in the U.S.!

Unnamed: 0,StateName,CountyCode,CountyName
103,Arizona,13,Maricopa
201,California,31,Kings
204,California,37,Los Angeles
215,California,59,Orange
218,California,65,Riverside
222,California,73,San Diego
610,Illinois,31,Cook
1853,New York,47,Kings
2581,Texas,113,Dallas
2625,Texas,201,Harris


In [6]:
# returns daily summary ozone data for the Maricopa Co., AZ on 2019-01-01 - 2019-01-02 at all sampling sites w/in County.
Maricopa_url = "https://aqs.epa.gov/data/api/sampleData/byCounty?email=spencer.max.rubin@gmail.com&key=orangeheron75&param=44201&bdate=20190101&edate=20190102&state=04&county=013"
r_Maricopa = requests.get(Maricopa_url)
print(r_Maricopa.headers)
print(r_Maricopa.text)


{'Date': 'Mon, 05 Oct 2020 20:39:39 GMT', 'Server': 'nginx/1.17.3 + Phusion Passenger 6.0.4', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains', 'Content-Type': 'application/json; charset=utf-8', 'Status': '200 OK', 'Cache-Control': 'max-age=0, private, must-revalidate', 'Vary': 'Origin', 'ETag': 'W/"c326d9f6f1cbf9edb18569b24f7023f4"', 'X-Frame-Options': 'SAMEORIGIN', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'X-Runtime': '0.884731', 'X-Request-Id': '0763ab3f-eec8-4a73-af0c-689f7438eebe', 'X-Powered-By': 'Phusion Passenger 6.0.4', 'Content-Encoding': 'gzip', 'X-MW-LOGS': '/public/server/logs/web', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked'}
{
  "Header": [
    {
      "status": "Success",
      "request_time": "2020-10-05T16:39:38-04:00",
      "url": "https://aqs.epa.gov/data/api/sampleData/byCounty?email=spencer.max.rubin@gmail.com&key=orangeheron75&param=44201&bdate=20190101&edate=20

In [48]:
MaricopaCounty = r_Maricopa.json()
print(MaricopaCounty)

{'Header': [{'status': 'Success', 'request_time': '2020-10-05T16:39:38-04:00', 'url': 'https://aqs.epa.gov/data/api/sampleData/byCounty?email=spencer.max.rubin@gmail.com&key=orangeheron75&param=44201&bdate=20190101&edate=20190102&state=04&county=013', 'rows': 1056}], 'Data': [{'state_code': '04', 'county_code': '013', 'site_number': '0019', 'parameter_code': '44201', 'poc': 1, 'latitude': 33.48378, 'longitude': -112.14256, 'datum': 'NAD83', 'parameter': 'Ozone', 'date_local': '2019-01-01', 'time_local': '00:00', 'date_gmt': '2019-01-01', 'time_gmt': '07:00', 'sample_measurement': 0.016, 'units_of_measure': 'Parts per million', 'units_of_measure_code': '007', 'sample_duration': '1 HOUR', 'sample_duration_code': '1', 'sample_frequency': 'HOURLY', 'detection_limit': 0.005, 'uncertainty': None, 'qualifier': None, 'method_type': 'FEM', 'method': 'INSTRUMENTAL - ULTRA VIOLET ABSORPTION', 'method_code': '087', 'state': 'Arizona', 'county': 'Maricopa', 'date_of_last_change': '2019-03-14', 'cbs

In [55]:
#What are the keys in dictionary: MaricopaCounty?
MaricopaCounty.keys()
#"Header", "Data"... data is stored in "Data"

dict_keys(['Header', 'Data'])

In [58]:
#What are the values? How to extract the relevant data from the values
#print(MaricopaCounty.values()) lots of values...

#Extracting only the "Data" keys from dictionary
MaricopaData1 = {key:MaricopaCounty[key] for key in MaricopaCounty.keys() & {"Data"} }
print(MaricopaData1)

{'Data': [{'state_code': '04', 'county_code': '013', 'site_number': '0019', 'parameter_code': '44201', 'poc': 1, 'latitude': 33.48378, 'longitude': -112.14256, 'datum': 'NAD83', 'parameter': 'Ozone', 'date_local': '2019-01-01', 'time_local': '00:00', 'date_gmt': '2019-01-01', 'time_gmt': '07:00', 'sample_measurement': 0.016, 'units_of_measure': 'Parts per million', 'units_of_measure_code': '007', 'sample_duration': '1 HOUR', 'sample_duration_code': '1', 'sample_frequency': 'HOURLY', 'detection_limit': 0.005, 'uncertainty': None, 'qualifier': None, 'method_type': 'FEM', 'method': 'INSTRUMENTAL - ULTRA VIOLET ABSORPTION', 'method_code': '087', 'state': 'Arizona', 'county': 'Maricopa', 'date_of_last_change': '2019-03-14', 'cbsa_code': '38060'}, {'state_code': '04', 'county_code': '013', 'site_number': '0019', 'parameter_code': '44201', 'poc': 1, 'latitude': 33.48378, 'longitude': -112.14256, 'datum': 'NAD83', 'parameter': 'Ozone', 'date_local': '2019-01-01', 'time_local': '01:00', 'date_g

In [62]:
#Extracting key-value data from MaricopaData1
key,val = next(iter(MaricopaData1.items()))

print("First key of dictionary is:"+ str(key))
print("First value of dictionary is: " + str(val))
#

First key of dictionary is:Data
First value of dictionary is: [{'state_code': '04', 'county_code': '013', 'site_number': '0019', 'parameter_code': '44201', 'poc': 1, 'latitude': 33.48378, 'longitude': -112.14256, 'datum': 'NAD83', 'parameter': 'Ozone', 'date_local': '2019-01-01', 'time_local': '00:00', 'date_gmt': '2019-01-01', 'time_gmt': '07:00', 'sample_measurement': 0.016, 'units_of_measure': 'Parts per million', 'units_of_measure_code': '007', 'sample_duration': '1 HOUR', 'sample_duration_code': '1', 'sample_frequency': 'HOURLY', 'detection_limit': 0.005, 'uncertainty': None, 'qualifier': None, 'method_type': 'FEM', 'method': 'INSTRUMENTAL - ULTRA VIOLET ABSORPTION', 'method_code': '087', 'state': 'Arizona', 'county': 'Maricopa', 'date_of_last_change': '2019-03-14', 'cbsa_code': '38060'}, {'state_code': '04', 'county_code': '013', 'site_number': '0019', 'parameter_code': '44201', 'poc': 1, 'latitude': 33.48378, 'longitude': -112.14256, 'datum': 'NAD83', 'parameter': 'Ozone', 'date

In [80]:
#MaricopaData1... get values split out 
df1 = pd.concat({k: pd.DataFrame(v) for k, v in MaricopaData1.items()})
Maricopa_df = df1
Maricopa_df.head()
#df_test = pd.DataFrame.from_dict(MaricopaData1)
#df_test.loc[1:2]

Unnamed: 0,Unnamed: 1,state_code,county_code,site_number,parameter_code,poc,latitude,longitude,datum,parameter,date_local,...,detection_limit,uncertainty,qualifier,method_type,method,method_code,state,county,date_of_last_change,cbsa_code
Data,0,4,13,19,44201,1,33.48378,-112.14256,NAD83,Ozone,2019-01-01,...,0.005,,,FEM,INSTRUMENTAL - ULTRA VIOLET ABSORPTION,87,Arizona,Maricopa,2019-03-14,38060
Data,1,4,13,19,44201,1,33.48378,-112.14256,NAD83,Ozone,2019-01-01,...,0.005,,,FEM,INSTRUMENTAL - ULTRA VIOLET ABSORPTION,87,Arizona,Maricopa,2019-03-14,38060
Data,2,4,13,19,44201,1,33.48378,-112.14256,NAD83,Ozone,2019-01-01,...,0.005,,,FEM,INSTRUMENTAL - ULTRA VIOLET ABSORPTION,87,Arizona,Maricopa,2019-03-14,38060
Data,3,4,13,19,44201,1,33.48378,-112.14256,NAD83,Ozone,2019-01-01,...,0.005,,,FEM,INSTRUMENTAL - ULTRA VIOLET ABSORPTION,87,Arizona,Maricopa,2019-03-14,38060
Data,4,4,13,19,44201,1,33.48378,-112.14256,NAD83,Ozone,2019-01-01,...,0.005,,,FEM,INSTRUMENTAL - ULTRA VIOLET ABSORPTION,87,Arizona,Maricopa,2019-03-14,38060
