#### Import Dependencies & needed API Keys

In [1]:
import pandas as pd
import requests
import datetime
from datetime import datetime as dt
from time import sleep

from config import airNowApiKeyShane, airNowApiKeyAudelia, airNowApiKeyCenez, airNowApiKeyJoseph, airNowApiKeyJoey
from largest_combined_statistical_areas import largest_combined_statistical_areas

#### Setup counters, empty list to hold data amd API Key Variables to be used

In [2]:
# Setup Counters
counterAttempts = 0
counterAttemptsPerAPI = 0
counterAPI = 1

# Setup empty list
apiData = []

# Setup API Key Variables
airNowApiKey1 = airNowApiKeyShane
airNowApiKey2 = airNowApiKeyAudelia
airNowApiKey3 = airNowApiKeyCenez
airNowApiKey4 = airNowApiKeyJoseph
airNowApiKey5 = airNowApiKeyJoey

#### Create a list of dates to iterate through (first 7 days of each month, 2015 to 2019)

In [3]:
# Set Date Range to use for API
startDate = '2015-01-01'
# startDate = '2020-01-01'
endDate = '2019-12-31'
# endDate = '2020-01-31'

# Create a list of dates (from Start to End)
dateList = pd.date_range(startDate, endDate).strftime("%Y-%m-%d").tolist()
# print(dateList)

# Create empty list to append dates within a given range to in for loop
datesToUseList = []

# Loop through dateList and append only dates that are within the range of days (of month) to the datesToUseList
for date in dateList:
    day = (datetime.datetime.strptime(date, "%Y-%m-%d")).day
    if day in range(1,8):
        datesToUseList.append(date)
#         print(f"KEEP: {date} is in range")
    else:
#         print(f"REMOVE: {date} is OUT OF RANGE")
        pass

# print(datesToUseList)
len(datesToUseList)

420

#### Setup API Call Function and with URL, API Key, and variables to pass through in the requests.get() API call

In [4]:
def apiCallLatLongHistorical(cityRank, cityName, cityCSA, cityState, cityPopulation2018Estimate, cityPopulation2010Census, cityLat, cityLong, cityZip, obsDate, obsTime, apiKey, apiKeyUsed, counterAttempts, counterAttemptsPerAPI, counterAPI):
    
    # Empty response list variable every time function is called
    response = []
    
    # Set API Parameters to be passed through to API requests.get
    params = {}
    params["baseURL"] = "http://www.airnowapi.org/aq/observation/latLong/historical/?format=application/json"
    params["lat"] = f"{cityLat}"
    params["long"] = f"{cityLong}"
    params["obsDate"] = f"{obsDate}"
    params["obsTime"] = "00-0000" # f"{obsTime}"
    params["searchDistance"] = "25"
    params["apiKey"] = apiKey

    # Build API Request URL, passing through parameters
    requestURL = params["baseURL"] \
                + "&latitude=" + params["lat"] \
                + "&longitude=" + params["long"] \
                + "&date=" + params["obsDate"] \
                + "T" + params["obsTime"] \
                + "&distance=" + params["searchDistance"] \
                + "&API_KEY=" + params["apiKey"]

    
    # Logger: Print status message
    print("--------------------------------------------------")
    print("Requesting AirNow API Data for...")
    print("     **************************************************")
    print(f"CITY: {cityName}, {cityState} | LAT: {params['lat']} | LONG: {params['long']} | DATE: {params['obsDate']} | TIME: {params['obsTime']}")
    print(f"ATTEMPT:{counterAttempts} | BATCH/API: {counterAPI} | ATTEMPT PER BATCH/API: {counterAttemptsPerAPI} | API Key Used: {apiKeyUsed}")
    print("     **************************************************")
#     print(requestURL)
    
    # Set up error handling in the even there is an error in the API requests.get()
    try:
        
        # Execute requests.get, passing through built requestURL
        response = requests.get(requestURL).json()
        responseCode = requests.get(requestURL)
        
        print(f"Number of results found: {len(response)}")
        print("--------------------------------------------------")

        # Loop through response, appending each element (dictionary) as a new item in the apiData list
        for i in range(len(response)):
            
            # Break out the response "Category" Key value (another dictionary) into two separate values (to easily convert later to a csv, if needed)
            response[i]["AQICategoryNumber"] = response[i]["Category"]["Number"]
            response[i]["AQICategory"] = response[i]["Category"]["Name"]
            
            # Delete the "Category" Key value (another dictionary) from response since separate key: values were created and this is no longer needed
            del response[i]["Category"]
            
            # Add values brought in from the largest_combined_statistical_areas dictionary / json object to the response
            response[i]["cityRank"] = cityRank
            response[i]["cityName"] = cityName
            response[i]["cityCSA"] = cityCSA
            response[i]["cityState"] = cityState
            response[i]["cityPopulation2018Estimate"] = cityPopulation2018Estimate
            response[i]["cityPopulation2010Census"] = cityPopulation2010Census
            response[i]["cityLat"] = cityLat
            response[i]["cityLong"] = cityLong
            response[i]["cityZip"] = cityZip
            
            # Append the response to the apiData list
            apiData.append(response[i])

    except Exception as e:
        print(f"ERROR: Unable to perform AirNow API request for CITY: {cityName}, {cityState}, LAT: {params['lat']}, LONG: {params['long']}, DATE: {params['obsDate']} & TIME: {params['obsTime']}.")
        print("%s" % e)
        print("--------------------------------------------------")
        pass
    
# if __name__ == "__apiCallLatLongHistorical__":
#     apiCallLatLongHistorical()
        

#### Loop through cities and dates, while updating variables and passing them through to the apiCallLatLongHistorical Function

In [5]:
# Loop through cities in the largest_combined_statistical_areas dictionary / json object
for city in largest_combined_statistical_areas:
    
    # Loop through datesToUseList, calling the apiCallLatLongHistorical function, passing through variables    
    for date in datesToUseList:
    
        # Set varaiable values to pass into the apiCallLatLongHistorical function
        cityRank = city["rank"]
        cityName = city["name"]
        cityCSA = city["csa_name"]
        cityState = city["state"]
        cityPopulation2018Estimate = city["population"]["2018_estimate"]
        cityPopulation2010Census = city["population"]["2010_census"]
        cityLat = city["location"]["lat"]
        cityLong = city["location"]["long"]
        cityZip = city["location"]["zip_code"]
        
        # Update overall attempt counter
        counterAttempts += 1
        
        # Once 250 attempts have been made for all five API Keys, reset counterAttemptsPerAPI, counterAPI and apiKey then sleep for one hour (3600 seconds)
        if counterAttemptsPerAPI >= 250 and counterAPI >= 5:
            counterAttemptsPerAPI = 1
            counterAPI = 1
            apiKey = airNowApiKey1
            sleep(3600)
#             sleep(3)
            print("*****  250 per API for all APIs avialable reached | RESET counterAttemptsPerAPI, counterAPI, apiKey & SLEEP  *****")

        # Once 250 attempts have been made for the currently used API Key, update counterAPI so a new apiKey is used next time through, reset counterAttemptsPerAPI
        elif counterAttemptsPerAPI >= 250:
            counterAPI += 1
            counterAttemptsPerAPI = 1
            print("*****  250 per current API reached | RESET counterAttemptsPerAPI; UPDATE counterAPI & apiKey  *****")
            
        else:
            counterAttemptsPerAPI += 1

        # Set apiKey variable based on the counterAPI variable
        if counterAPI == 1:
            apiKey = airNowApiKey1
            apiKeyUsed = "airNowApiKey1"
        elif counterAPI == 2:
            apiKey = airNowApiKey2
            apiKeyUsed = "airNowApiKey2"
        elif counterAPI == 3:
            apiKey = airNowApiKey3
            apiKeyUsed = "airNowApiKey3"
        elif counterAPI == 4:
            apiKey = airNowApiKey4
            apiKeyUsed = "airNowApiKey4"
        elif counterAPI == 5:
            apiKey = airNowApiKey5
            apiKeyUsed = "airNowApiKey5"
        
        # Call the apiCallLatLongHistorical function, passing through updated variables as parameters
        apiCallLatLongHistorical(cityRank, cityName, cityCSA, cityState, cityPopulation2018Estimate, cityPopulation2010Census, cityLat, cityLong, cityZip, date, "00-0000", apiKey, apiKeyUsed, counterAttempts, counterAttemptsPerAPI, counterAPI)
        
        

--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
CITY: New York City, NY | LAT: 40.7128 | LONG: -74.006 | DATE: 2015-01-01 | TIME: 00-0000
ATTEMPT:1 | BATCH/API: 1 | ATTEMPT PER BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
Number of results found: 2
--------------------------------------------------
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
CITY: New York City, NY | LAT: 40.7128 | LONG: -74.006 | DATE: 2015-01-02 | TIME: 00-0000
ATTEMPT:2 | BATCH/API: 1 | ATTEMPT PER BATCH/API: 2 | API Key Used: airNowApiKey1
     **************************************************
Number of results found: 2
--------------------------------------------------
--------------------------------------------------
Requesting AirNow API Data for...
     ****************

KeyboardInterrupt: 

#### Check the number of records in the apiData list

In [6]:
# apiData
len(apiData)

2730

#### Create DataFrame holding the values from teh apiData list

In [7]:
df = pd.DataFrame(apiData)

# Visualize the DataFrame
df

Unnamed: 0,DateObserved,HourObserved,LocalTimeZone,ReportingArea,StateCode,Latitude,Longitude,ParameterName,AQI,AQICategoryNumber,AQICategory,cityRank,cityName,cityCSA,cityState,cityPopulation2018Estimate,cityPopulation2010Census,cityLat,cityLong,cityZip
0,2015-01-01,0,EST,Newark,NJ,40.7267,-74.1442,OZONE,30,1,Good,1,New York City,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",NY,22679948,22255491,40.7128,-74.0060,10001
1,2015-01-01,0,EST,Newark,NJ,40.7267,-74.1442,PM2.5,65,2,Moderate,1,New York City,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",NY,22679948,22255491,40.7128,-74.0060,10001
2,2015-01-02,0,EST,Newark,NJ,40.7267,-74.1442,OZONE,24,1,Good,1,New York City,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",NY,22679948,22255491,40.7128,-74.0060,10001
3,2015-01-02,0,EST,Newark,NJ,40.7267,-74.1442,PM2.5,56,2,Moderate,1,New York City,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",NY,22679948,22255491,40.7128,-74.0060,10001
4,2015-01-03,0,EST,Newark,NJ,40.7267,-74.1442,OZONE,8,1,Good,1,New York City,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",NY,22679948,22255491,40.7128,-74.0060,10001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2725,2016-10-05,0,EST,Boston,MA,42.3510,-71.0510,PM2.5,27,1,Good,6,Boston,"Boston-Worcester-Providence, MA-RI-NH-CT Combi...",MA,8285407,7893376,42.3601,-71.0589,2108
2726,2016-10-06,0,EST,Boston,MA,42.3510,-71.0510,OZONE,20,1,Good,6,Boston,"Boston-Worcester-Providence, MA-RI-NH-CT Combi...",MA,8285407,7893376,42.3601,-71.0589,2108
2727,2016-10-06,0,EST,Boston,MA,42.3510,-71.0510,PM2.5,44,1,Good,6,Boston,"Boston-Worcester-Providence, MA-RI-NH-CT Combi...",MA,8285407,7893376,42.3601,-71.0589,2108
2728,2016-10-07,0,EST,Boston,MA,42.3510,-71.0510,OZONE,29,1,Good,6,Boston,"Boston-Worcester-Providence, MA-RI-NH-CT Combi...",MA,8285407,7893376,42.3601,-71.0589,2108


#### Set output data file filepath variable

In [8]:
output_data_filepath = "output_data/"

#### Create variables to create the CSV file name

In [10]:
timestamp = dt.now().strftime("%Y%m%d_%H%M%S")
output_file_name = f"AirNowAPI_{timestamp}"
# print(output_file_name)

#### Convert & export DataFrame to CSV and JSON files

In [11]:
df.to_csv(f"{output_data_filepath}{output_file_name}.csv", encoding="utf-8", index= False)

In [None]:
# df.to_json(f"{output_data_filepath}{output_file_name}.json")
