# AQS and USGS Data Set Retrieval:
## Dependencies and API setup
### Goals and Target:
    1. Load A Volcano event from the USGS Data Set into a data frame or json object
    2. Retrieve and load AQS data for a timewindow around the volcano event from sensors deteremined by a bounding box of a pair of (lat,long) keys.
    3. Retrieve only records for gasses we are interested in.
    4. Set up helper functions and files to make scaling painless.

In [None]:
# Import libraries:

import datetime
from datetime import timedelta
import time
import pandas as pd
import io
import requests
import geojson
import json
# AQS API Things
from config import apiURL, apiUser, apiPassword, aqsParams
#USGS API Things:
from config import usgsURL, usgsStart, usgsFormat, usgsMinmag, usgsConus

params = ','.join(list(aqsParams.values()))



## AQS URL Construction and helper functions:
1. Take gcs_coords and event_date from usgs earthquake api
2. Format Date/time USGS uses iso8061 time specs

In [None]:
#  Define functions to create a bounding box a time window for the api call
#  BBox takes an argument window which is how many square degrees our box will be:
def date_coord_id(df):
    date = df.Date
    coord = df.GCS
    ueid = df.Identifier
    return date, coord, ueid

def bbox(window, gcs_coords):
    bbox = {'minlat': gcs_coords[0]-window,
            'maxlat': gcs_coords[0]+window,
            'minlon': gcs_coords[1]-window,
            'maxlon': gcs_coords[1]+window}
    bbox_url = f'&minlat={bbox["minlat"]}&maxlat={bbox["maxlat"]}&minlon={bbox["minlon"]}&maxlon={bbox["maxlon"]}'
    return bbox_url
#  t_window takes a datetime and a time length to return a start and end date a time length's apart
def t_window(event_date, window_length):
    window = timedelta(days=window_length)
    event_time = time.strptime(event_date, '%Y%m%d')
    event_time = datetime.datetime(*event_time[:3])
    date_window = {'bdate': (event_time - window).strftime("%Y%m%d"),
                   'edate': (event_time + window).strftime("%Y%m%d")}
    tw_url = f'&bdate={date_window["bdate"]}&edate={date_window["edate"]}'
    return tw_url

#  calls the epa aqs api and attempts to aattach the unique quake identifier to each station record
def aqs_api_call(event_date, gcs_coords, quake_df):
    bbox_url = bbox(1.0, gcs_coords)
    tw_url = t_window(event_date, 7)
    aqs_url = f'{apiURL}param={params[:]}&email={apiUser}&key={apiPassword}{bbox_url}{tw_url}'
    aqs_raw = requests.get(aqs_url)
    if aqs_raw.status_code != 200:
        return
    aqs_raw = aqs_raw.json()
    return aqs_raw

def aqs_bulk_call(earthquake_df):
    j=0
    eqaq_raw = {}
    for quake in earthquake_df:
        
       while j < len(earthquake_df)-1:
            print(f'Getting Records for: {earthquake_df["Identifier"][j]}')
            eqaq_raw[earthquake_df['Identifier'][j]] = (aqs_api_call(earthquake_df['Date'][j], earthquake_df['GCS'][j], earthquake_df))
            j+=1
    return eqaq_raw


#  When you use the bulk api call, this function fails as it expects a json object not a list of json objects
def aqs_scrub(aqs_json):
    target_data = []
    for entry in aqs_json:
        i = 0
        if aqs_json[entry] is None:
            continue
        else:
            while i <= len(aqs_json[entry]['Data'])-1.0:
                county = aqs_json[entry]['Data'][i]['county']
                station = aqs_json[entry]['Data'][i]['site_number']
                parameter = aqs_json[entry]['Data'][i]['parameter']
                p_id = aqs_json[entry]['Data'][i]['parameter_code']
                measure_date = aqs_json[entry]['Data'][i]['date_local']
                time = aqs_json[entry]['Data'][i]['time_local']
                measurement = aqs_json[entry]['Data'][i]['sample_measurement']
                measurement_unit = aqs_json[entry]['Data'][i]['units_of_measure']
                #ueid = aqs_json[entry]['Data'][i]['q_ueid']
                entry_dict = {"q_ueid":entry,
                              "Parameter":parameter,
                              "P_id":p_id,
                              "Station":station,
                              "Date":measure_date,
                              "County":county,
                              "Measurement":measurement,
                              "Unit":measurement_unit}
                target_data.append(entry_dict)
                i+=1
    target_df = pd.DataFrame(target_data)
    return target_df

## USGS URL Construction and helper functions:

In [None]:
# Define fucntions to querry USGS for earthquakes in the US with mag > 7
# Return gcs_coords(epicenter in lat,long), event_date(), magnitude, uid, depth
# In this case the parameters for the USGS data search were decided in advance and 
# are specified and hardcoded in the config file. All we need is a function
# To create the api call from the specs, and reduce the data to our target parameters:
def usgs_api_call():
    usgs_url = f"{usgsURL}{usgsFormat}{usgsStart}{usgsMinmag}{usgsConus['minlat']}{usgsConus['maxlat']}{usgsConus['minlon']}{usgsConus['maxlon']}"
    usgs_raw = requests.get(usgs_url)
    usgs_raw = usgs_raw.json()
    return usgs_raw

#Exctract the information we're interested in stuff into a dataframe
def usgs_scrub(usgs_geojson):
    target_data = []
    i=0
    for entry in usgs_geojson['features']:
        print(entry, i)
        identifier = usgs_geojson['features'][i]['id']
        mag = usgs_geojson['features'][i]['properties']['mag']
        epochtime = usgs_geojson['features'][i]['properties']['time']
        converted_date= time.strftime('%Y%m%d',  time.gmtime(epochtime/1000))
        converted_time = time.strftime('%H:%M:%S',  time.gmtime(epochtime/1000))
        place = usgs_geojson['features'][i]['properties']['place']
        lon = float(usgs_geojson['features'][i]['geometry']['coordinates'][0])
        lat = float(usgs_geojson['features'][i]['geometry']['coordinates'][1])
        gcs_coords = [lat,lon]
        depth = usgs_geojson['features'][i]['geometry']['coordinates'][2]
        entry_dict = {"Identifier":identifier, 
                       "Location":place,
                       "GCS": gcs_coords, 
                       "Depth":depth,
                       "Magnitude":mag, 
                       "Date":converted_date,
                       "Time":converted_time}
        target_data.append(entry_dict)
        i+=1
    target_df = pd.DataFrame(target_data)
    target_df = target_df[~target_df['Location'].str.contains('Mexico')]
    target_df = target_df[~target_df['Location'].str.contains('MX')]

    return target_df
    

    

## API Calls 
1. Call USGS api first to get coordinates and date to feed into the AQS API
2. Call AQS API with coordinates and date
3. Clean AQS Data

In [None]:
# USGS Call and get date and coordinates for next call:
earthquake_df = usgs_scrub(usgs_api_call())
earthquake_df
                                    

In [None]:
# This resets the index after we removed some values not in the US
earthquake_df = earthquake_df.reset_index(drop=True)

In [None]:
# AQS Call: Un Comment out when you want to run the first time:
#Bulk Call for when you get all the events you are interested
# eqaq_json = aqs_bulk_call(earthquake_df)
# Don't run this again it takes a long time 1h45m ish 

In [None]:
# save our json as a file for posterity
# also just run once: commented out for safety

#with open('epa_aq_mag5_7dbuff_1990_2019.json', 'w') as f:
#   json.dump(eqaq_json, f, indent=4, sort_keys=True)

In [None]:
aqs_list_o_df = []
i = 0
for record in edqaq_json:
    aqs_list_o_df.append(aqs_scrub(eqaq_json[i]))
    i+=1
aqs_df = pd.concat(aqs_list_o_df).reset_index(drop=True)
aqs_df.to_csv('epa_aq_bbox_1990_2019.csv')

In [None]:
# load the file you saved as a json object into python
with open("epa_aq_mag5_7dbuff_1990_2019.json", "r") as read_file:
    eqaq_json = json.load(read_file)

In [None]:
# Scrub the data of the attributes we aren't interested in: defined above
eqaq_df = aqs_scrub(eqaq_json)

In [None]:
eqaq_df.head()

In [None]:
# Save your hard scrapped data into a file to be passed off to cleaning and viz
eqaq_df.to_csv('final_aq.csv')