#### Import Dependencies & needed API Keys

In [1]:
import pandas as pd
import requests
import datetime
from datetime import datetime as dt
from time import sleep

from config import airNowApiKeyShane, airNowApiKeyAudelia, airNowApiKeyCenez, airNowApiKeyJoseph, airNowApiKeyJoey
# from largest_combined_statistical_areas import largest_combined_statistical_areas
# from largest_combined_statistical_areas_v2_t25 import largest_combined_statistical_areas
from largest_combined_statistical_areas_v3_t25 import largest_combined_statistical_areas

In [2]:
largest_combined_statistical_areas

[{'csa_rank': 1,
  'csa_name': 'New York-Newark, NY-NJ-CT-PA Combined Statistical Area',
  'primary_city': 'New York City',
  'primary_city_state': 'NY',
  'capital': 'no',
  'capital_of': 'n/a',
  'population': {'2018_estimate': 22679948, '2010_census': 22255491},
  'primary_city_location': {'lat': 40.7128,
   'long': -74.006,
   'zip_code': '10001'},
  'timezone_params': {'timezone': 'Eastern',
   'utc_offset': {'standard_time': '-05:00',
    'daylight_savings_time': '-04:00'}},
  'search_params': {'closest_monitoring_station': {'lat': 40.8419,
    'long': -73.8359,
    'search_radius': 25}}},
 {'csa_rank': 2,
  'csa_name': 'Los Angeles-Long Beach, CA Combined Statistical Area',
  'primary_city': 'Los Angeles',
  'primary_city_state': 'CA',
  'capital': 'no',
  'capital_of': 'n/a',
  'population': {'2018_estimate': 18764814, '2010_census': 17877006},
  'primary_city_location': {'lat': 34.0522,
   'long': -118.2437,
   'zip_code': '90001'},
  'timezone_params': {'timezone': 'Pacific',

#### Setup counters, empty list to hold data amd API Key Variables to be used

In [3]:
# Setup Counters
counterAttempts = 0
counterAttemptsPerAPI = 0
counterAPI = 1

# Setup empty list
apiData = []

# Setup API Key Variables
airNowApiKey1 = airNowApiKeyShane
airNowApiKey2 = airNowApiKeyAudelia
airNowApiKey3 = airNowApiKeyCenez
airNowApiKey4 = airNowApiKeyJoseph
airNowApiKey5 = airNowApiKeyJoey

#### Create a list of dates to iterate through (first 7 days of each month, 2015 to 2019)

In [4]:
# Set Date Range to use for API
# startDate = '2015-01-01'
startDate = '2020-04-07'
# endDate = '2019-12-31'
endDate = '2020-04-30'

# Create a list of dates (from Start to End)
dateList = pd.date_range(startDate, endDate).strftime("%Y-%m-%d").tolist()
# print(dateList)

# Create empty list to append dates within a given range to in for loop
datesToUseList = []

# Loop through dateList and append only dates that are within the range of days (of month) to the datesToUseList
for date in dateList:
    day = (datetime.datetime.strptime(date, "%Y-%m-%d")).day
    if day in range(1,8):
        datesToUseList.append(date)
#         print(f"KEEP: {date} is in range")
    else:
#         print(f"REMOVE: {date} is OUT OF RANGE")
        pass

# print(datesToUseList)
len(datesToUseList)

1

#### Setup API Call Function and with URL, API Key, and variables to pass through in the requests.get() API call

In [13]:
def apiCallLatLongHistorical(csaRank, csaFullName, csaPrimaryCity, csaPrimaryCityState, csaPopulation2018Estimate, csaPopulation2010Census, csaPrimaryCityLat, csaPrimaryCityLong, csaPrimaryCityZip, csaTimeZone, csaStandardTimeUtcOffset, csaDaylightSavingsTimeUtcOffset, csaSearchLat, csaSearchLong, csaSearchRadius, obsDate, obsTime, apiKey, apiKeyUsed, counterAttempts, counterAttemptsPerAPI, counterAPI):
    
    # Empty response list variable every time function is called
    response = []
    
    # Set API Parameters to be passed through to API requests.get
    params = {}
#     params["baseURL"] = "http://www.airnowapi.org/aq/observation/latLong/historical/?format=application/json"
    params["baseURL"] = "http://www.airnowapi.org/aq/observation/zipCode/historical/?format=application/json"
    params["lat"] = f"{csaPrimaryCityLat}"
    params["long"] = f"{csaPrimaryCityLong}"
    params["zip"] = f"{csaPrimaryCityZip}"
    params["obsDate"] = f"{obsDate}"
    params["obsTime"] = "00-0000" # f"{obsTime}"
    params["searchDistance"] = f"{csaSearchRadius}" # "25"
    params["apiKey"] = apiKey


#     # Build API Request URL, passing through parameters - by LAT/LONG
#     requestURL = params["baseURL"] \
#                 + "&latitude=" + params["lat"] \
#                 + "&longitude=" + params["long"] \
#                 + "&date=" + params["obsDate"] \
#                 + "T" + params["obsTime"] \
#                 + "&distance=" + params["searchDistance"] \
#                 + "&API_KEY=" + params["apiKey"]
    
    # Build API Request URL, passing through parameters - by ZIP CODE
    requestURL = params["baseURL"] \
                + "&zipCode=" + params["zip"] \
                + "&date=" + params["obsDate"] \
                + "T" + params["obsTime"] \
                + "&distance=" + params["searchDistance"] \
                + "&API_KEY=" + params["apiKey"]

    
    # Logger: Print status message
    print("--------------------------------------------------")
    print("Requesting AirNow API Data for...")
    print("     **************************************************")
#     print(f"CITY: {cityName}, {cityState} | LAT: {params['lat']} | LONG: {params['long']} | DATE: {params['obsDate']} | TIME: {params['obsTime']}")
#     print(f"CITY: {cityName}, {cityState} | LAT: {params['lat']} | LONG: {params['long']} | DATE: {params['obsDate']}")
#     print(f"CSA: {csaFullName} | CITY: {csaPrimaryCity}, {csaPrimaryCityState} | LAT: {params['lat']} | LONG: {params['long']} | DATE: {params['obsDate']}")
    print(f"CSA: {csaFullName} | CITY: {csaPrimaryCity}, {csaPrimaryCityState} | ZIP: {params['zip']} | DATE: {params['obsDate']}")
    print(f"ATTEMPT:{counterAttempts} | ATTEMPT PER BATCH/API: {counterAttemptsPerAPI} | BATCH/API: {counterAPI} | API Key Used: {apiKeyUsed}")
    print("     **************************************************")
#     print(requestURL)
    
    # Set up error handling in the even there is an error in the API requests.get()
    try:
        
        # Execute requests.get, passing through built requestURL
        response = requests.get(requestURL).json()
        responseCode = requests.get(requestURL)
        
        print(f"Number of results found: {len(response)}")
        print("--------------------------------------------------")

        # Loop through response, appending each element (dictionary) as a new item in the apiData list
        for i in range(len(response)):
            
            # Break out the response "Category" Key value (another dictionary) into two separate values (to easily convert later to a csv, if needed)
            response[i]["AQICategoryNumber"] = response[i]["Category"]["Number"]
            response[i]["AQICategory"] = response[i]["Category"]["Name"]
            
            # Delete the "Category" Key value (another dictionary) from response since separate key: values were created and this is no longer needed
            del response[i]["Category"]
            
            # Add values brought in from the largest_combined_statistical_areas dictionary / json object to the response
            response[i]["csaRank"] = csaRank
            response[i]["csaFullName"] = csaFullName
            response[i]["csaPrimaryCity"] = csaPrimaryCity
            response[i]["csaPrimaryCityState"] = csaPrimaryCityState
            response[i]["csaPopulation2018Estimate"] = csaPopulation2018Estimate
            response[i]["csaPopulation2010Census"] = csaPopulation2010Census
            response[i]["csaPrimaryCityLat"] = csaPrimaryCityLat
            response[i]["csaPrimaryCityLong"] = csaPrimaryCityLong
            response[i]["csaPrimaryCityZip"] = csaPrimaryCityZip
          
            response[i]["csaTimeZone"] = csaTimeZone
            response[i]["csaStandardTimeUtcOffset"] = csaStandardTimeUtcOffset
            response[i]["csaDaylightSavingsTimeUtcOffset"] = csaDaylightSavingsTimeUtcOffset
            response[i]["csaSearchLat"] = csaSearchLat
            response[i]["csaSearchLong"] = csaSearchLong
            response[i]["csaSearchRadius"] = csaSearchRadius
          
            # Append the response to the apiData list
            apiData.append(response[i])

    except Exception as e:
#         print(f"ERROR: Unable to perform AirNow API request for CITY: {cityName}, {cityState}, LAT: {params['lat']}, LONG: {params['long']}, DATE: {params['obsDate']} & TIME: {params['obsTime']}.")
#         print(f"ERROR: Unable to perform AirNow API request for CITY: {cityName}, {cityState}, LAT: {params['lat']}, LONG: {params['long']}, DATE: {params['obsDate']} & TIME: {params['obsTime']}.")
#         print(f"ERROR: Unable to perform AirNow API request for CITY: CSA: {csaFullName} | CITY: {csaPrimaryCity}, {csaPrimaryCityState} | LAT: {params['lat']} | LONG: {params['long']} | DATE: {params['obsDate']}.")
        print(f"ERROR: Unable to perform AirNow API request for CITY: CSA: {csaFullName} | CITY: {csaPrimaryCity}, {csaPrimaryCityState} | ZIP: {params['zip']} | DATE: {params['obsDate']}.")
        print("%s" % e)
        print("--------------------------------------------------")
        pass
    
# if __name__ == "__apiCallLatLongHistorical__":
#     apiCallLatLongHistorical()
        

#### Loop through cities and dates, while updating variables and passing them through to the apiCallLatLongHistorical Function

In [14]:
# Loop through cities in the largest_combined_statistical_areas dictionary / json object
for csa in largest_combined_statistical_areas:
    
    # Loop through datesToUseList, calling the apiCallLatLongHistorical function, passing through variables    
    for date in datesToUseList:
    
        # Set varaiable values to pass into the apiCallLatLongHistorical function
        csaRank = csa["csa_rank"]
        csaFullName = csa["csa_name"]
        csaPrimaryCity = csa["primary_city"]
        csaPrimaryCityState = csa["primary_city_state"]
        csaPopulation2018Estimate = csa["population"]["2018_estimate"]
        csaPopulation2010Census = csa["population"]["2010_census"]
        csaPrimaryCityLat = csa["primary_city_location"]["lat"]
        csaPrimaryCityLong = csa["primary_city_location"]["long"]
        csaPrimaryCityZip = csa["primary_city_location"]["zip_code"]
        
        csaTimeZone = csa["timezone_params"]["timezone"]
        csaStandardTimeUtcOffset = csa["timezone_params"]["utc_offset"]["standard_time"]
        csaDaylightSavingsTimeUtcOffset = csa["timezone_params"]["utc_offset"]["daylight_savings_time"]
        csaSearchLat = csa["search_params"]["closest_monitoring_station"]["lat"]
        csaSearchLong = csa["search_params"]["closest_monitoring_station"]["long"]
        csaSearchRadius = csa["search_params"]["closest_monitoring_station"]["search_radius"]
        
        # Update overall attempt counter
        counterAttempts += 1
        
        # Once 250 attempts have been made for all five API Keys, reset counterAttemptsPerAPI, counterAPI and apiKey then sleep for one hour (3600 seconds)
        if counterAttemptsPerAPI >= 250 and counterAPI >= 5:
            counterAttemptsPerAPI = 1
            counterAPI = 1
            apiKey = airNowApiKey1
            print("*****  250 per API for all APIs avialable reached | RESET counterAttemptsPerAPI, counterAPI, apiKey & SLEEP  *****")
            sleep(3600)
#             sleep(3)

        # Once 250 attempts have been made for the currently used API Key, update counterAPI so a new apiKey is used next time through, reset counterAttemptsPerAPI
        elif counterAttemptsPerAPI >= 250:
            counterAPI += 1
            counterAttemptsPerAPI = 1
            print("*****  250 per current API reached | RESET counterAttemptsPerAPI; UPDATE counterAPI & apiKey  *****")
            
        else:
            counterAttemptsPerAPI += 1

        # Set apiKey variable based on the counterAPI variable
        if counterAPI == 1:
            apiKey = airNowApiKey1
            apiKeyUsed = "airNowApiKey1"
        elif counterAPI == 2:
            apiKey = airNowApiKey2
            apiKeyUsed = "airNowApiKey2"
        elif counterAPI == 3:
            apiKey = airNowApiKey3
            apiKeyUsed = "airNowApiKey3"
        elif counterAPI == 4:
            apiKey = airNowApiKey4
            apiKeyUsed = "airNowApiKey4"
        elif counterAPI == 5:
            apiKey = airNowApiKey5
            apiKeyUsed = "airNowApiKey5"
        
        # Call the apiCallLatLongHistorical function, passing through updated variables as parameters
        apiCallLatLongHistorical(csaRank, csaFullName, csaPrimaryCity, csaPrimaryCityState, csaPopulation2018Estimate, csaPopulation2010Census, csaPrimaryCityLat, csaPrimaryCityLong, csaPrimaryCityZip, csaTimeZone, csaStandardTimeUtcOffset, csaDaylightSavingsTimeUtcOffset, csaSearchLat, csaSearchLong, csaSearchRadius, date, "00-0000", apiKey, apiKeyUsed, counterAttempts, counterAttemptsPerAPI, counterAPI)
        
        

--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
CSA: New York-Newark, NY-NJ-CT-PA Combined Statistical Area | CITY: New York City, NY | ZIP: 10001 | DATE: 2020-04-07
ATTEMPT:26 | ATTEMPT PER BATCH/API: 26 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
Number of results found: 3
--------------------------------------------------
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
CSA: Los Angeles-Long Beach, CA Combined Statistical Area | CITY: Los Angeles, CA | ZIP: 90001 | DATE: 2020-04-07
ATTEMPT:27 | ATTEMPT PER BATCH/API: 27 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
Number of results found: 1
--------------------------------------------------
--------------------------------------------------

Number of results found: 3
--------------------------------------------------
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
CSA: Portland-Vancouver-Salem, OR-WA Combined Statistical Area | CITY: Portland, OR | ZIP: 97201 | DATE: 2020-04-07
ATTEMPT:44 | ATTEMPT PER BATCH/API: 44 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
Number of results found: 2
--------------------------------------------------
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
CSA: St. Louis-St. Charles-Farmington, MO-IL Combined Statistical Area | CITY: St. Louis, MO | ZIP: 63103 | DATE: 2020-04-07
ATTEMPT:45 | ATTEMPT PER BATCH/API: 45 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
Number of results found: 3
--------------

#### Check the number of records in the apiData list

In [15]:
# apiData
len(apiData)

59

#### Create DataFrame holding the values from teh apiData list

In [16]:
df = pd.DataFrame(apiData)

# Visualize the DataFrame
df

Unnamed: 0,DateObserved,HourObserved,LocalTimeZone,ReportingArea,StateCode,Latitude,Longitude,ParameterName,AQI,AQICategoryNumber,...,csaPopulation2010Census,csaPrimaryCityLat,csaPrimaryCityLong,csaPrimaryCityZip,csaTimeZone,csaStandardTimeUtcOffset,csaDaylightSavingsTimeUtcOffset,csaSearchLat,csaSearchLong,csaSearchRadius
0,2020-04-07,0,EST,New York City Region,NY,40.8419,-73.8359,OZONE,47,1,...,22255491,40.7128,-74.006,10001,Eastern,-05:00,-04:00,40.8419,-73.8359,25
1,2020-04-07,0,EST,New York City Region,NY,40.8419,-73.8359,PM2.5,22,1,...,22255491,40.7128,-74.006,10001,Eastern,-05:00,-04:00,40.8419,-73.8359,25
2,2020-04-07,0,EST,New York City Region,NY,40.8419,-73.8359,PM10,14,1,...,22255491,40.7128,-74.006,10001,Eastern,-05:00,-04:00,40.8419,-73.8359,25
3,2020-04-07,0,PST,S Central LA CO,CA,33.9288,-118.2108,OZONE,31,1,...,17877006,34.0522,-118.2437,90001,Pacific,-08:00,-07:00,33.9288,-118.2108,25
4,2020-04-07,0,CST,Chicago,IL,41.964,-87.659,OZONE,43,1,...,9840929,41.8781,-87.6298,60601,Central,-06:00,-05:00,41.964,-87.659,25
5,2020-04-07,0,CST,Chicago,IL,41.964,-87.659,PM2.5,56,2,...,9840929,41.8781,-87.6298,60601,Central,-06:00,-05:00,41.964,-87.659,25
6,2020-04-07,0,EST,Metropolitan Washington,DC,38.919,-77.013,OZONE,42,1,...,9032651,38.9072,-77.0369,20001,Eastern,-05:00,-04:00,38.919,-77.013,25
7,2020-04-07,0,EST,Metropolitan Washington,DC,38.919,-77.013,PM2.5,40,1,...,9032651,38.9072,-77.0369,20001,Eastern,-05:00,-04:00,38.919,-77.013,25
8,2020-04-07,0,EST,Metropolitan Washington,DC,38.919,-77.013,PM10,17,1,...,9032651,38.9072,-77.0369,20001,Eastern,-05:00,-04:00,38.919,-77.013,25
9,2020-04-07,0,PST,San Francisco,CA,37.75,-122.43,OZONE,36,1,...,8923942,37.7749,-122.4194,94102,Pacific,-08:00,-07:00,37.75,-122.43,25


#### Set output data file filepath variable

In [None]:
output_data_filepath = "output_data/"

#### Create variables to create the CSV file name

In [None]:
timestamp = dt.now().strftime("%Y%m%d_%H%M%S")
output_file_name = f"AirNowAPI_{timestamp}"
# print(output_file_name)

#### Convert & export DataFrame to CSV and JSON files

In [None]:
df.to_csv(f"{output_data_filepath}{output_file_name}.csv", encoding="utf-8", index= False)

In [None]:
# df.to_json(f"{output_data_filepath}{output_file_name}.json")


In [17]:
apiData

[{'DateObserved': '2020-04-07 ',
  'HourObserved': 0,
  'LocalTimeZone': 'EST',
  'ReportingArea': 'New York City Region',
  'StateCode': 'NY',
  'Latitude': 40.8419,
  'Longitude': -73.8359,
  'ParameterName': 'OZONE',
  'AQI': 47,
  'AQICategoryNumber': 1,
  'AQICategory': 'Good',
  'csaRank': 1,
  'csaFullName': 'New York-Newark, NY-NJ-CT-PA Combined Statistical Area',
  'csaPrimaryCity': 'New York City',
  'csaPrimaryCityState': 'NY',
  'csaPopulation2018Estimate': 22679948,
  'csaPopulation2010Census': 22255491,
  'csaPrimaryCityLat': 40.7128,
  'csaPrimaryCityLong': -74.006,
  'csaPrimaryCityZip': '10001',
  'csaTimeZone': 'Eastern',
  'csaStandardTimeUtcOffset': '-05:00',
  'csaDaylightSavingsTimeUtcOffset': '-04:00',
  'csaSearchLat': 40.8419,
  'csaSearchLong': -73.8359,
  'csaSearchRadius': 25},
 {'DateObserved': '2020-04-07 ',
  'HourObserved': 0,
  'LocalTimeZone': 'EST',
  'ReportingArea': 'New York City Region',
  'StateCode': 'NY',
  'Latitude': 40.8419,
  'Longitude': -