#### Import Dependencies & needed API Keys

In [1]:
import pandas as pd
import requests
import json
from datetime import datetime, timedelta
from time import sleep, time

from config import airNowApiKeyShane, airNowApiKeyAudelia, airNowApiKeyCenez, airNowApiKeyJoseph, airNowApiKeyJoey

from largest_combined_statistical_areas_v4_t25 import largest_combined_statistical_areas

# from daylight_savings_time_dates_v1 import daylight_savings_time_dates
from daylight_savings_time_dates_v1_2015_to_2020 import daylight_savings_time_dates


#### Set output data file filepath variable

In [2]:
output_data_filepath = "output_data/"

#### Setup counters, empty list to hold data amd API Key Variables to be used

In [3]:
# Setup Counters
counterAttempts = 0
counterAttemptsPerAPI = 0
counterAPI = 1

# Setup empty list
apiData = []

# Setup API Key Variables
airNowApiKey1 = airNowApiKeyShane
airNowApiKey2 = airNowApiKeyAudelia
airNowApiKey3 = airNowApiKeyCenez
airNowApiKey4 = airNowApiKeyJoseph
airNowApiKey5 = airNowApiKeyJoey

#### Create a list of date ranges to iterate through (due to API limitations, list for each month: [monthStartDate, monthEndDate] -or- quarter [quarterStartDate, quarterEndDate]; January 2015 to April 2020)

In [4]:
# # Set Date Range to use for API
# # startDate = "2015-01-01"
# startDate = "2019-12-01" # Used for testing
# # endDate = "2020-04-30"
# endDate = "2019-12-31" # Used for testing

# # Create empty list to start
# datesToUseList = []
# monthStartDateList = pd.date_range(startDate,endDate, freq = '1M') - pd.offsets.MonthBegin(1)
# monthEndDateList = pd.date_range(startDate,endDate, freq = '1M')

# for date in monthEndDateList:
#     datesToUseList.append([(date - pd.offsets.MonthBegin(1)).strftime("%Y-%m-%d"), date.strftime("%Y-%m-%d")])

# Create list of Q ranges for each year of data we need
datesToUseList = [
    ['2015-01-01', '2015-03-31'],
    ['2015-04-01', '2015-06-30'],
    ['2015-07-01', '2015-09-30'],
    ['2015-10-01', '2015-12-31'],
    ['2016-01-01', '2016-03-31'],
    ['2016-04-01', '2016-06-30'],
    ['2016-07-01', '2016-09-30'],
    ['2016-10-01', '2016-12-31'],
    ['2017-01-01', '2017-03-31'],
    ['2017-04-01', '2017-06-30'],
    ['2017-07-01', '2017-09-30'],
    ['2017-10-01', '2017-12-31'],
    ['2018-01-01', '2018-03-31'],
    ['2018-04-01', '2018-06-30'],
    ['2018-07-01', '2018-09-30'],
    ['2018-10-01', '2018-12-31'],
    ['2019-01-01', '2019-03-31'],
    ['2019-04-01', '2019-06-30'],
    ['2019-07-01', '2019-09-30'],
    ['2019-10-01', '2019-12-31'],
    ['2020-01-01', '2020-03-31'],
    ['2020-04-01', '2020-06-30']
]


# datesToUseList
len(datesToUseList)

22

#### Create a list of date that are within the Daylight Savings Time period to determine which UTC Offset to use for each city (January 2015 to Dec  2020)

In [5]:
# Create a date list of DST dates
dstDateList = []
for year in daylight_savings_time_dates:
    dstStart = year["dates"]["start"]
    dstEnd = year["dates"]["end"]
    dstDateListHolder = pd.date_range(dstStart, dstEnd).strftime("%Y-%m-%d").tolist()
    for date in dstDateListHolder:
        dstDateList.append(date)

# dstDateList
len(dstDateList)

1434

#### Setup API Call Function and with URL, API Key, and variables to pass through in the requests.get() API call

In [6]:
def apiAirNowObsByMonitoringSite(csaRank, csaName, csaPrimaryCity, csaPrimaryCityState, csaPopulation2018Estimate, csaPopulation2010Census, csaPrimaryCityLat, csaPrimaryCityLong, csaPrimaryCityZip, csaTimeZone, csaStandardTimeUtcOffset, csaDaylightSavingsTimeUtcOffset, csaMonitoringStationLat, csaMonitoringStationLong, csaSearchRadius, csaBboxVar, obsStartDate, obsStartHour, obsEndDate, ObsEndHour, apiKey, apiKeyUsed, counterAttempts, counterAttemptsPerAPI, counterAPI):
    # Empty response list variable every time function is called
    response = []
    
    # Set variables to pass through to API parameters
#     particulates = "ozone,pm25,pm10,co,no2,so2"
    particulates = "ozone,pm25,pm10"
    longMin = round(csaMonitoringStationLong - csaBboxVar,3)
    longMax = round(csaMonitoringStationLong + csaBboxVar,3)
    latMin = round(csaMonitoringStationLat - csaBboxVar,3)
    latMax = round(csaMonitoringStationLat + csaBboxVar,3)
    
    # Set API Parameters to be passed through to API requests.get
    params = {}
    params["baseURL"] = "http://www.airnowapi.org/aq/data/"
    params["obsStartDate"] = f"{obsStartDate}"
    params["obsStartHour"] = f"{obsStartHour}"
    params["obsEndDate"] = f"{obsEndDate}"
    params["obsEndHour"] = f"{obsEndHour}"
    params["particulates"] = f"{particulates}"
    params["bbox"] = f"{longMin},{latMin},{longMax},{latMax}"
    params["dataType"] = "A" # A = AQI (C = Concentrations; B = AQI & Concentrations)
    params["format"] = "application/json"
    params["verbose"] = "1" # 1 = True; provides additional site information including Site Name, Agency Name, AQS ID, and Full AQS ID (0 = False)
    params["nowCastOnly"] = "0" # 0 = False; concentrations and AQI will transition to midpoint averages as data becomes available (1 = True; always provides Nowcast concentrations and AQI regardless of date/time)
    params["includeRawConcentrations"] = "0" # 0 = False (1 = True; an additional field that contains the raw concentration will be added to the output. For CO, NO2, and SO2, these values are the same as the concentration fields. For Ozone, PM2.5, and PM10, these are raw hourly concentrations measured by the instrument (Not Nowcast or Midpoint avg. concentrations) Raw concentration units are the same as those specified in the Unit field)
    params["apiKey"] = apiKey
    
    # Build API Request URL, passing through parameters - by GEOGRAPHIC BOUNDING BOX
    requestURL = params["baseURL"] \
                + "?startDate=" + params["obsStartDate"] + "T" + params["obsStartHour"] \
                + "&endDate=" + params["obsEndDate"] + "T" + params["obsEndHour"] \
                + "&parameters=" + params["particulates"] \
                + "&BBOX=" + params["bbox"] \
                + "&dataType=" + params["dataType"] \
                + "&format=" + params["format"] \
                + "&verbose=" + params["verbose"] \
                + "&nowcastonly=" + params["nowCastOnly"] \
                + "&includerawconcentrations=" + params["includeRawConcentrations"] \
                + "&API_KEY=" + params["apiKey"]
    
    # Logger: Print status message
    print("--------------------------------------------------")
    print("Requesting AirNow API Data for...")
    print("     **************************************************")
    print(datetime.now().strftime('%Y-%m-%d.%H.%M.%S'))
    print(f"CSA: {csaName} | CITY: {csaPrimaryCity}, {csaPrimaryCityState} | minLatLong: [{latMin},{longMin}] | maxLatLong: [{latMax},{longMax}] | DATE RANGE: {obsStartDate} - {obsEndDate}")
    print(f"ATTEMPT:{counterAttempts} | ATTEMPT PER BATCH/API: {counterAttemptsPerAPI} | BATCH/API: {counterAPI} | API Key Used: {apiKeyUsed}")
    print("     **************************************************")
    
    # Set up error handling in the even there is an error in the API requests.get()
    try:
        
        # Execute requests.get, passing through built requestURL
        response = requests.get(requestURL).json()
        responseCode = requests.get(requestURL)
        
        # Results sum and response Code:
        print("     **************************************************")
        print(f"Number of results found: {len(response)}")
        print(f"Response Code: {responseCode}")
        print("     **************************************************")

        # Loop through response, appending each element (dictionary) as a new item in the apiData list
        for i in range(len(response)):
            
            # Add values brought in from the largest_combined_statistical_areas dictionary / json object to the response
            response[i]["csaRank"] = csaRank
            response[i]["csaName"] = csaName
            response[i]["csaPrimaryCity"] = csaPrimaryCity
            response[i]["csaPrimaryCityState"] = csaPrimaryCityState
            response[i]["csaPopulation2018Estimate"] = csaPopulation2018Estimate
            response[i]["csaPopulation2010Census"] = csaPopulation2010Census
            response[i]["csaPrimaryCityLat"] = csaPrimaryCityLat
            response[i]["csaPrimaryCityLong"] = csaPrimaryCityLong
            response[i]["csaPrimaryCityZip"] = csaPrimaryCityZip
          
            response[i]["csaTimeZone"] = csaTimeZone
            response[i]["csaStandardTimeUtcOffset"] = csaStandardTimeUtcOffset
            response[i]["csaDaylightSavingsTimeUtcOffset"] = csaDaylightSavingsTimeUtcOffset
            response[i]["csaMonitoringStationLat"] = csaMonitoringStationLat
            response[i]["csaMonitoringStationLong"] = csaMonitoringStationLong
            response[i]["csaSearchRadius"] = csaSearchRadius
            response[i]["csaBboxVar"] = csaBboxVar            
            
            # Append the response to the apiData list
            apiData.append(response[i])
    
    except Exception as e:
        print("     **************************************************")
        print(f"Response Code: {responseCode}")
        print("     **************************************************")
        print(f"ERROR: Unable to perform AirNow API request for CSA: {csaName} | CITY: {csaPrimaryCity}, {csaPrimaryCityState} | minLatLong: [{latMin},{longMin}] | maxLatLong: [{latMax},{longMax}] | DATE RANGE: {obsStartDate} - {obsEndDate}")
        print("%s" % e)
        print("--------------------------------------------------")
        pass


#### Loop through cities and dates, while updating variables and passing them through to the apiAirNowObsByMonitoringSite Function

In [7]:
# Loop through cities in the largest_combined_statistical_areas dictionary / json object
for csa in largest_combined_statistical_areas:
    
    # Loop through datesToUseList, calling the apiAirNowObsByMonitoringSite function, passing through variables
    for date in datesToUseList:
    
        # Set varaiable values to pass into the apiAirNowObsByMonitoringSite function
        csaRank = csa["csa_rank"]
        csaName = csa["csa_name"]
        csaPrimaryCity = csa["primary_city"]
        csaPrimaryCityState = csa["primary_city_state"]
        csaPopulation2018Estimate = csa["population"]["2018_estimate"]
        csaPopulation2010Census = csa["population"]["2010_census"]
        csaPrimaryCityLat = csa["primary_city_location"]["lat"]
        csaPrimaryCityLong = csa["primary_city_location"]["long"]
        csaPrimaryCityZip = csa["primary_city_location"]["zip_code"]
        csaTimeZone = csa["timezone_params"]["timezone"]
        csaStandardTimeUtcOffset = csa["timezone_params"]["utc_offset"]["standard_time"]
        csaDaylightSavingsTimeUtcOffset = csa["timezone_params"]["utc_offset"]["daylight_savings_time"]
        csaMonitoringStationLat = csa["search_params"]["closest_monitoring_station"]["lat"]
        csaMonitoringStationLong = csa["search_params"]["closest_monitoring_station"]["long"]
        csaSearchRadius = csa["search_params"]["closest_monitoring_station"]["search_radius"]
        csaBboxVar = csa["search_params"]["closest_monitoring_station"]["bbox_latlong_var"]
        
        obsStartDate = date[0]
        obsStartHour = "00"
        obsEndDate = date[1]
        obsEndHour = "23"
                
        # Update overall attempt counter
        counterAttempts += 1
        
        # Once 250 attempts have been made for all five API Keys, reset counterAttemptsPerAPI, counterAPI and apiKey then sleep for one hour (3600 seconds)
        if counterAttemptsPerAPI >= 250 and counterAPI >= 5:
            counterAttemptsPerAPI = 1
            counterAPI = 1
            apiKey = airNowApiKey1
            print("*****  250 per API for all APIs avialable reached | RESET counterAttemptsPerAPI, counterAPI, apiKey & SLEEP  *****")
            sleep(3)

        # Once 250 attempts have been made for the currently used API Key, update counterAPI so a new apiKey is used next time through, reset counterAttemptsPerAPI
        elif counterAttemptsPerAPI >= 250:
            counterAPI += 1
            counterAttemptsPerAPI = 1
            print("*****  250 per current API reached | RESET counterAttemptsPerAPI; UPDATE counterAPI & apiKey  *****")
            
        else:
            counterAttemptsPerAPI += 1

        # Set apiKey variable based on the counterAPI variable
        if counterAPI == 1:
            apiKey = airNowApiKey1
            apiKeyUsed = "airNowApiKey1"
        elif counterAPI == 2:
            apiKey = airNowApiKey2
            apiKeyUsed = "airNowApiKey2"
        elif counterAPI == 3:
            apiKey = airNowApiKey3
            apiKeyUsed = "airNowApiKey3"
        elif counterAPI == 4:
            apiKey = airNowApiKey4
            apiKeyUsed = "airNowApiKey4"
        elif counterAPI == 5:
            apiKey = airNowApiKey5
            apiKeyUsed = "airNowApiKey5"
        
        # Call the apiCallLatLongHistorical function, passing through updated variables as parameters
        apiAirNowObsByMonitoringSite(csaRank, csaName, csaPrimaryCity, csaPrimaryCityState, csaPopulation2018Estimate, csaPopulation2010Census, csaPrimaryCityLat, csaPrimaryCityLong, csaPrimaryCityZip, csaTimeZone, csaStandardTimeUtcOffset, csaDaylightSavingsTimeUtcOffset, csaMonitoringStationLat, csaMonitoringStationLong, csaSearchRadius, csaBboxVar, obsStartDate, obsStartHour, obsEndDate, obsEndHour, apiKey, apiKeyUsed, counterAttempts, counterAttemptsPerAPI, counterAPI)

--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.02.18.01
CSA: New York-Newark, NY-NJ-CT-PA Combined Statistical Area | CITY: New York City, NY | minLatLong: [40.692,-73.986] | maxLatLong: [40.992,-73.686] | DATE RANGE: 2015-01-01 - 2015-03-31
ATTEMPT:1 | ATTEMPT PER BATCH/API: 1 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 22488
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.02.19.23
CSA: New York-Newark, NY-NJ-CT-PA Combined Statistical Area | CITY: New York City, NY | minLatLong: [40.692,-73.986] | maxLatLong: [40.992,-73.686] | DATE RANGE: 2015-04-01 - 2015

     **************************************************
Number of results found: 28156
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.02.39.56
CSA: New York-Newark, NY-NJ-CT-PA Combined Statistical Area | CITY: New York City, NY | minLatLong: [40.692,-73.986] | maxLatLong: [40.992,-73.686] | DATE RANGE: 2018-04-01 - 2018-06-30
ATTEMPT:14 | ATTEMPT PER BATCH/API: 14 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 25649
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.02.41.29
CS

     **************************************************
Number of results found: 3401
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.02.54.38
CSA: Los Angeles-Long Beach, CA Combined Statistical Area | CITY: Los Angeles, CA | minLatLong: [33.779,-118.361] | maxLatLong: [34.079,-118.061] | DATE RANGE: 2016-01-01 - 2016-03-31
ATTEMPT:27 | ATTEMPT PER BATCH/API: 27 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 3389
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.02.55.13
CSA: L

     **************************************************
Number of results found: 12680
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.04.25
CSA: Los Angeles-Long Beach, CA Combined Statistical Area | CITY: Los Angeles, CA | minLatLong: [33.779,-118.361] | maxLatLong: [34.079,-118.061] | DATE RANGE: 2019-04-01 - 2019-06-30
ATTEMPT:40 | ATTEMPT PER BATCH/API: 40 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 12950
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.05.14
CSA:

     **************************************************
Number of results found: 7686
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.13.29
CSA: Chicago-Naperville, IL-IN-WI Combined Statistical Area | CITY: Chicago, IL | minLatLong: [41.814,-87.809] | maxLatLong: [42.114,-87.509] | DATE RANGE: 2017-01-01 - 2017-03-31
ATTEMPT:53 | ATTEMPT PER BATCH/API: 53 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 8236
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.14.07
CSA: Chica

     **************************************************
Number of results found: 4303
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.21.11
CSA: Chicago-Naperville, IL-IN-WI Combined Statistical Area | CITY: Chicago, IL | minLatLong: [41.814,-87.809] | maxLatLong: [42.114,-87.509] | DATE RANGE: 2020-04-01 - 2020-06-30
ATTEMPT:66 | ATTEMPT PER BATCH/API: 66 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 1639
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.21.21
CSA: Washi

     **************************************************
Number of results found: 6194
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.24.50
CSA: Washington-Baltimore-Arlington, DC-MD-VA-WV-PA Combined Statistical Area | CITY: Washington, DC | minLatLong: [38.869,-77.063] | maxLatLong: [38.969,-76.963] | DATE RANGE: 2018-01-01 - 2018-03-31
ATTEMPT:79 | ATTEMPT PER BATCH/API: 79 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 5488
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-1

     **************************************************
Number of results found: 8802
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.31.46
CSA: San Jose-San Francisco-Oakland, CA Combined Statistical Area | CITY: San Francisco, CA | minLatLong: [37.6,-122.58] | maxLatLong: [37.9,-122.28] | DATE RANGE: 2015-10-01 - 2015-12-31
ATTEMPT:92 | ATTEMPT PER BATCH/API: 92 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 8556
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.32.36
CS

     **************************************************
Number of results found: 12897
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.41.36
CSA: San Jose-San Francisco-Oakland, CA Combined Statistical Area | CITY: San Francisco, CA | minLatLong: [37.6,-122.58] | maxLatLong: [37.9,-122.28] | DATE RANGE: 2019-01-01 - 2019-03-31
ATTEMPT:105 | ATTEMPT PER BATCH/API: 105 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 12717
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.42.2

     **************************************************
Number of results found: 12548
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.51.21
CSA: Boston-Worcester-Providence, MA-RI-NH-CT Combined Statistical Area | CITY: Boston, MA | minLatLong: [42.201,-71.201] | maxLatLong: [42.501,-70.901] | DATE RANGE: 2016-10-01 - 2016-12-31
ATTEMPT:118 | ATTEMPT PER BATCH/API: 118 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 12895
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.03.5

     **************************************************
Number of results found: 12989
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.02.12
CSA: Boston-Worcester-Providence, MA-RI-NH-CT Combined Statistical Area | CITY: Boston, MA | minLatLong: [42.201,-71.201] | maxLatLong: [42.501,-70.901] | DATE RANGE: 2020-01-01 - 2020-03-31
ATTEMPT:131 | ATTEMPT PER BATCH/API: 131 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 14347
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.0

     **************************************************
Number of results found: 4166
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.07.41
CSA: Dallas-Fort Worth, TX-OK Combined Statistical Area | CITY: Dallas, TX | minLatLong: [32.617,-96.933] | maxLatLong: [32.917,-96.633] | DATE RANGE: 2017-10-01 - 2017-12-31
ATTEMPT:144 | ATTEMPT PER BATCH/API: 144 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 6456
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.08.06
CSA: Dallas-F

     **************************************************
Number of results found: 12791
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.13.24
CSA: Philadelphia-Reading-Camden, PA-NJ-DE-MD Combined Statistical Area | CITY: Philadelphia, PA | minLatLong: [39.8,-75.301] | maxLatLong: [40.1,-75.001] | DATE RANGE: 2015-07-01 - 2015-09-30
ATTEMPT:157 | ATTEMPT PER BATCH/API: 157 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 13133
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04

     **************************************************
Number of results found: 16533
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.26.00
CSA: Philadelphia-Reading-Camden, PA-NJ-DE-MD Combined Statistical Area | CITY: Philadelphia, PA | minLatLong: [39.8,-75.301] | maxLatLong: [40.1,-75.001] | DATE RANGE: 2018-10-01 - 2018-12-31
ATTEMPT:170 | ATTEMPT PER BATCH/API: 170 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 14818
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04

     **************************************************
Number of results found: 19059
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.41.00
CSA: Houston-The Woodlands, TX Combined Statistical Area | CITY: Houston, TX | minLatLong: [29.601,-95.501] | maxLatLong: [29.901,-95.201] | DATE RANGE: 2016-07-01 - 2016-09-30
ATTEMPT:183 | ATTEMPT PER BATCH/API: 183 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 19559
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.42.13
CSA: Hous

     **************************************************
Number of results found: 21878
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.57.54
CSA: Houston-The Woodlands, TX Combined Statistical Area | CITY: Houston, TX | minLatLong: [29.601,-95.501] | maxLatLong: [29.901,-95.201] | DATE RANGE: 2019-10-01 - 2019-12-31
ATTEMPT:196 | ATTEMPT PER BATCH/API: 196 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 21376
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.04.59.16
CSA: Hous

     **************************************************
Number of results found: 2198
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.03.36
CSA: Miami-Port St. Lucie-Fort Lauderdale, FL Combined Statistical Area | CITY: Miami, FL | minLatLong: [25.626,-80.361] | maxLatLong: [25.926,-80.061] | DATE RANGE: 2017-07-01 - 2017-09-30
ATTEMPT:209 | ATTEMPT PER BATCH/API: 209 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 3743
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.03.5

     **************************************************
Number of results found: 866
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.06.00
CSA: Atlanta–Athens-Clarke County–Sandy Springs, GA-AL Combined Statistical Area | CITY: Atlanta, GA | minLatLong: [33.5,-84.58] | maxLatLong: [33.8,-84.28] | DATE RANGE: 2015-04-01 - 2015-06-30
ATTEMPT:222 | ATTEMPT PER BATCH/API: 222 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 2184
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.

     **************************************************
Number of results found: 4360
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.09.05
CSA: Atlanta–Athens-Clarke County–Sandy Springs, GA-AL Combined Statistical Area | CITY: Atlanta, GA | minLatLong: [33.5,-84.58] | maxLatLong: [33.8,-84.28] | DATE RANGE: 2018-07-01 - 2018-09-30
ATTEMPT:235 | ATTEMPT PER BATCH/API: 235 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 4368
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05

     **************************************************
Number of results found: 4227
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.13.38
CSA: Detroit-Warren-Ann Arbor, MI Combined Statistical Area | CITY: Detroit, MI | minLatLong: [42.083,-83.483] | maxLatLong: [42.383,-83.183] | DATE RANGE: 2016-04-01 - 2016-06-30
ATTEMPT:248 | ATTEMPT PER BATCH/API: 248 | BATCH/API: 1 | API Key Used: airNowApiKey1
     **************************************************
     **************************************************
Number of results found: 4352
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.13.56
CSA: Det

     **************************************************
Number of results found: 4332
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.17.31
CSA: Detroit-Warren-Ann Arbor, MI Combined Statistical Area | CITY: Detroit, MI | minLatLong: [42.083,-83.483] | maxLatLong: [42.383,-83.183] | DATE RANGE: 2019-07-01 - 2019-09-30
ATTEMPT:261 | ATTEMPT PER BATCH/API: 11 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 4331
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.17.48
CSA: Detr

     **************************************************
Number of results found: 12881
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.26.00
CSA: Phoenix-Mesa, AZ Combined Statistical Area | CITY: Phoenix, AZ | minLatLong: [33.493,-112.121] | maxLatLong: [33.593,-112.021] | DATE RANGE: 2017-04-01 - 2017-06-30
ATTEMPT:274 | ATTEMPT PER BATCH/API: 24 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 12642
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.26.48
CSA: Phoenix-Mesa

     **************************************************
Number of results found: 2297
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.36.06
CSA: Seattle-Tacoma, WA Combined Statistical Area | CITY: Seattle, WA | minLatLong: [47.412,-122.491] | maxLatLong: [47.712,-122.191] | DATE RANGE: 2015-01-01 - 2015-03-31
ATTEMPT:287 | ATTEMPT PER BATCH/API: 37 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 4115
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.36.23
CSA: Seattle-Taco

     **************************************************
Number of results found: 6299
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.40.19
CSA: Seattle-Tacoma, WA Combined Statistical Area | CITY: Seattle, WA | minLatLong: [47.412,-122.491] | maxLatLong: [47.712,-122.191] | DATE RANGE: 2018-04-01 - 2018-06-30
ATTEMPT:300 | ATTEMPT PER BATCH/API: 50 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 7067
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.40.47
CSA: Seattle-Taco

     **************************************************
Number of results found: 4383
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.46.29
CSA: Orlando-Lakeland-Deltona, FL Combined Statistical Area | CITY: Orlando, FL | minLatLong: [28.355,-81.524] | maxLatLong: [28.655,-81.224] | DATE RANGE: 2016-01-01 - 2016-03-31
ATTEMPT:313 | ATTEMPT PER BATCH/API: 63 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 4083
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.46.44
CSA: Orla

     **************************************************
Number of results found: 4290
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.49.51
CSA: Orlando-Lakeland-Deltona, FL Combined Statistical Area | CITY: Orlando, FL | minLatLong: [28.355,-81.524] | maxLatLong: [28.655,-81.224] | DATE RANGE: 2019-04-01 - 2019-06-30
ATTEMPT:326 | ATTEMPT PER BATCH/API: 76 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 4338
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.50.07
CSA: Orla

     **************************************************
Number of results found: 15252
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.58.17
CSA: Minneapolis-St. Paul, MN-WI Combined Statistical Area | CITY: Minneapolis, MN | minLatLong: [44.805,-93.335] | maxLatLong: [45.105,-93.035] | DATE RANGE: 2017-01-01 - 2017-03-31
ATTEMPT:339 | ATTEMPT PER BATCH/API: 89 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 14826
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.05.59.11
CSA:

     **************************************************
Number of results found: 15964
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.11.23
CSA: Minneapolis-St. Paul, MN-WI Combined Statistical Area | CITY: Minneapolis, MN | minLatLong: [44.805,-93.335] | maxLatLong: [45.105,-93.035] | DATE RANGE: 2020-04-01 - 2020-06-30
ATTEMPT:352 | ATTEMPT PER BATCH/API: 102 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 3166
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.11.36
CSA:

     **************************************************
Number of results found: 3011
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.14.41
CSA: Cleveland-Akron-Canton, OH Combined Statistical Area | CITY: Cleveland, OH | minLatLong: [40.931,-81.669] | maxLatLong: [41.231,-81.369] | DATE RANGE: 2018-01-01 - 2018-03-31
ATTEMPT:365 | ATTEMPT PER BATCH/API: 115 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 2915
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.14.58
CSA: Cle

     **************************************************
Number of results found: 14723
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.19.58
CSA: Denver-Aurora, CO Combined Statistical Area | CITY: Denver, CO | minLatLong: [39.618,-105.023] | maxLatLong: [39.918,-104.723] | DATE RANGE: 2015-10-01 - 2015-12-31
ATTEMPT:378 | ATTEMPT PER BATCH/API: 128 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 16797
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.21.01
CSA: Denver-Auro

     **************************************************
Number of results found: 21546
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.35.57
CSA: Denver-Aurora, CO Combined Statistical Area | CITY: Denver, CO | minLatLong: [39.618,-105.023] | maxLatLong: [39.918,-104.723] | DATE RANGE: 2019-01-01 - 2019-03-31
ATTEMPT:391 | ATTEMPT PER BATCH/API: 141 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 21101
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.37.16
CSA: Denver-Auro

     **************************************************
Number of results found: 9645
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.47.01
CSA: Portland-Vancouver-Salem, OR-WA Combined Statistical Area | CITY: Portland, OR | minLatLong: [45.388,-122.806] | maxLatLong: [45.688,-122.506] | DATE RANGE: 2016-10-01 - 2016-12-31
ATTEMPT:404 | ATTEMPT PER BATCH/API: 154 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 8979
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.47.38
CS

     **************************************************
Number of results found: 11372
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.56.57
CSA: Portland-Vancouver-Salem, OR-WA Combined Statistical Area | CITY: Portland, OR | minLatLong: [45.388,-122.806] | maxLatLong: [45.688,-122.506] | DATE RANGE: 2020-01-01 - 2020-03-31
ATTEMPT:417 | ATTEMPT PER BATCH/API: 167 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 10851
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.06.57.45


     **************************************************
Number of results found: 6567
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.04.24
CSA: St. Louis-St. Charles-Farmington, MO-IL Combined Statistical Area | CITY: St. Louis, MO | minLatLong: [38.6,-90.533] | maxLatLong: [38.9,-90.233] | DATE RANGE: 2017-10-01 - 2017-12-31
ATTEMPT:430 | ATTEMPT PER BATCH/API: 180 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 3925
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.04.53

     **************************************************
Number of results found: 4333
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.10.00
CSA: Charlotte-Concord, NC-SC Combined Statistical Area | CITY: Charlotte, NC | minLatLong: [35.077,-80.993] | maxLatLong: [35.377,-80.693] | DATE RANGE: 2015-07-01 - 2015-09-30
ATTEMPT:443 | ATTEMPT PER BATCH/API: 193 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 4375
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.10.31
CSA: Charl

     **************************************************
Number of results found: 4115
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.15.08
CSA: Charlotte-Concord, NC-SC Combined Statistical Area | CITY: Charlotte, NC | minLatLong: [35.077,-80.993] | maxLatLong: [35.377,-80.693] | DATE RANGE: 2018-10-01 - 2018-12-31
ATTEMPT:456 | ATTEMPT PER BATCH/API: 206 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 4400
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.15.30
CSA: Charl

     **************************************************
Number of results found: 6576
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.20.54
CSA: Sacramento-Roseville, CA Combined Statistical Area | CITY: Sacramento, CA | minLatLong: [38.517,-121.517] | maxLatLong: [38.617,-121.417] | DATE RANGE: 2016-07-01 - 2016-09-30
ATTEMPT:469 | ATTEMPT PER BATCH/API: 219 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 10705
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.21.41
CSA: S

     **************************************************
Number of results found: 10610
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.29.15
CSA: Sacramento-Roseville, CA Combined Statistical Area | CITY: Sacramento, CA | minLatLong: [38.517,-121.517] | maxLatLong: [38.617,-121.417] | DATE RANGE: 2019-10-01 - 2019-12-31
ATTEMPT:482 | ATTEMPT PER BATCH/API: 232 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 12042
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.30.11
CSA: 

     **************************************************
Number of results found: 8599
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.35.34
CSA: Pittsburgh-New Castle-Weirton, PA-OH-WV Combined Statistical Area | CITY: Pittsburgh, PA | minLatLong: [40.284,-80.134] | maxLatLong: [40.584,-79.834] | DATE RANGE: 2017-07-01 - 2017-09-30
ATTEMPT:495 | ATTEMPT PER BATCH/API: 245 | BATCH/API: 2 | API Key Used: airNowApiKey2
     **************************************************
     **************************************************
Number of results found: 8811
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.

     **************************************************
Number of results found: 4312
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.42.00
CSA: Salt Lake City-Provo-Orem, UT Combined Statistical Area | CITY: Salt Lake City, UT | minLatLong: [40.627,-112.08] | maxLatLong: [40.927,-111.78] | DATE RANGE: 2015-04-01 - 2015-06-30
ATTEMPT:508 | ATTEMPT PER BATCH/API: 8 | BATCH/API: 3 | API Key Used: airNowApiKey3
     **************************************************
     **************************************************
Number of results found: 4188
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.42.35
CS

     **************************************************
Number of results found: 8477
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.52.29
CSA: Salt Lake City-Provo-Orem, UT Combined Statistical Area | CITY: Salt Lake City, UT | minLatLong: [40.627,-112.08] | maxLatLong: [40.927,-111.78] | DATE RANGE: 2018-07-01 - 2018-09-30
ATTEMPT:521 | ATTEMPT PER BATCH/API: 21 | BATCH/API: 3 | API Key Used: airNowApiKey3
     **************************************************
     **************************************************
Number of results found: 8939
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.07.53.21
C

     **************************************************
Number of results found: 6481
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.08.02.01
CSA: San Antonio-New Braunfels-Pearsall, TX Combined Statistical Area | CITY: San Antonio, TX | minLatLong: [29.267,-98.634] | maxLatLong: [29.567,-98.334] | DATE RANGE: 2016-04-01 - 2016-06-30
ATTEMPT:534 | ATTEMPT PER BATCH/API: 34 | BATCH/API: 3 | API Key Used: airNowApiKey3
     **************************************************
     **************************************************
Number of results found: 6436
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.08.0

     **************************************************
Number of results found: 10777
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.08.08.41
CSA: San Antonio-New Braunfels-Pearsall, TX Combined Statistical Area | CITY: San Antonio, TX | minLatLong: [29.267,-98.634] | maxLatLong: [29.567,-98.334] | DATE RANGE: 2019-07-01 - 2019-09-30
ATTEMPT:547 | ATTEMPT PER BATCH/API: 47 | BATCH/API: 3 | API Key Used: airNowApiKey3
     **************************************************
     **************************************************
Number of results found: 10981
Response Code: <Response [200]>
     **************************************************
--------------------------------------------------
Requesting AirNow API Data for...
     **************************************************
2020-04-17.08

#### Check the number of records in the apiData list

In [8]:
# apiData
len(apiData)

5045520

#### Loop through results and only use the records that are for a chosen datetime (noon local time for each city, checking to see if date is within Standard Time (ST) or Daylight Savings Time (DST) periods and then use the correct UTC offset

In [9]:
# Set an empty list to hold the data being passed through
selectedDateTimeApiData = []

# Loop through the apiData
for record in apiData:
    
    # Set variables and calculate desired local datetime for the record we want to use (12:00 noon, local time per each city)
    desiredLocalTime = "12:00"
    
    # Isolate just the date of the "UTC" datetime stamp string
    recordDate = pd.Timestamp(record["UTC"]).strftime("%Y-%m-%d")
    
    # Create variable for the city's local datetime we want to use
    desiredLocalDateTime = pd.Timestamp(f"{recordDate}T{desiredLocalTime}")

    # Create Standard Time variables to use
    stdTimeOffsetInt = int(record["csaStandardTimeUtcOffset"].replace("0","").replace(":",""))
    stdDesiredUTCDateTime = desiredLocalDateTime + timedelta(hours = (stdTimeOffsetInt * -1))
    stdDesiredUTCDateTimeStr = stdDesiredUTCDateTime.strftime("%Y-%m-%dT%H%:%M")
    
    # Create Daylight Savings Time variables to use
    dstTimeOffsetInt = int(record["csaDaylightSavingsTimeUtcOffset"].replace("0","").replace(":",""))
    dstDesiredUTCDateTime = desiredLocalDateTime + timedelta(hours = (dstTimeOffsetInt * -1))
    dstDesiredUTCDateTimeStr = dstDesiredUTCDateTime.strftime("%Y-%m-%dT%H%:%M")
    
    # If the record date is within a DST period and the "UTC" datetime stamp matches desired for DST
    if recordDate in dstDateList and record["UTC"] == dstDesiredUTCDateTimeStr:
        
        # Append the response to the apiData list
        record["st_dst"] = "DaylightSavings"
        record["obsDateTime"] = desiredLocalDateTime.strftime("%Y-%m-%dT%H%:%M")
        record["dateObserved"] = desiredLocalDateTime.strftime("%Y-%m-%d")
        record["timeObserved"] = desiredLocalDateTime.strftime("%H%:%M")
        selectedDateTimeApiData.append(record)
        
    # If the record date is within a ST period and the "UTC" datetime stamp matches desired for ST
    elif recordDate not in dstDateList and record["UTC"] == stdDesiredUTCDateTimeStr:
        
        # Append the response to the apiData list
        record["st_dst"] = "Standard"
        record["obsDateTime"] = desiredLocalDateTime.strftime("%Y-%m-%dT%H%:%M")
        record["dateObserved"] = desiredLocalDateTime.strftime("%Y-%m-%d")
        record["timeObserved"] = desiredLocalDateTime.strftime("%H%:%M")
        selectedDateTimeApiData.append(record)
        
    else:
        # Ignore the record, do not pass to selectedDateTimeApiData
        pass

# See how many records/result are included
# selectedDateTimeApiData
len(selectedDateTimeApiData)

209215

#### Create DataFrame holding the values from the selectedDateTimeApiData lists

In [10]:
selectedDateTimeApiData_df = pd.DataFrame(selectedDateTimeApiData)

# Visualize the DataFrame
selectedDateTimeApiData_df

Unnamed: 0,Latitude,Longitude,UTC,Parameter,Unit,AQI,Category,SiteName,AgencyName,FullAQSCode,...,csaStandardTimeUtcOffset,csaDaylightSavingsTimeUtcOffset,csaMonitoringStationLat,csaMonitoringStationLong,csaSearchRadius,csaBboxVar,st_dst,obsDateTime,dateObserved,timeObserved
0,40.853550,-73.966100,2015-01-01T17:00,PM2.5,UG/M3,51,2,Fort Lee Near Road,New Jersey Dept. of Environmental Protection,340030010,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
1,40.819700,-73.948100,2015-01-01T17:00,PM2.5,UG/M3,50,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
2,40.819700,-73.948100,2015-01-01T17:00,OZONE,PPB,21,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
3,40.849200,-73.931900,2015-01-01T17:00,PM2.5,UG/M3,63,2,Manhattan/IS143,New York Dept. of Environmental Conservation,360610115,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
4,40.694401,-73.928596,2015-01-01T17:00,PM2.5,UG/M3,44,1,Bklyn - PS274,New York Dept. of Environmental Conservation,360470118,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209210,29.514999,-98.620003,2020-04-16T17:00,OZONE,PPB,45,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209211,29.514999,-98.620003,2020-04-16T17:00,PM2.5,UG/M3,36,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209212,29.423933,-98.580505,2020-04-16T17:00,PM2.5,UG/M3,38,1,San Antonio Old Hwy90 C677,Texas Commission on Environmental Quality,480290677,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209213,29.332803,-98.551396,2020-04-16T17:00,PM2.5,UG/M3,27,1,San Antonio Palo Alto C676,Texas Commission on Environmental Quality,480290676,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00


#### Convert & export raw DataFrame to CSV files (in the event we need to update/manipulate later from this point)

In [13]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
selectedDateTimeApiData_df.to_csv(f"{output_data_filepath}AirNowApiRawData_{timestamp}.csv", encoding="utf-8", index= False)


In [14]:
cleanedApiData_df = selectedDateTimeApiData_df

# Visualize the DataFrame
cleanedApiData_df

Unnamed: 0,Latitude,Longitude,UTC,Parameter,Unit,AQI,Category,SiteName,AgencyName,FullAQSCode,...,csaStandardTimeUtcOffset,csaDaylightSavingsTimeUtcOffset,csaMonitoringStationLat,csaMonitoringStationLong,csaSearchRadius,csaBboxVar,st_dst,obsDateTime,dateObserved,timeObserved
0,40.853550,-73.966100,2015-01-01T17:00,PM2.5,UG/M3,51,2,Fort Lee Near Road,New Jersey Dept. of Environmental Protection,340030010,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
1,40.819700,-73.948100,2015-01-01T17:00,PM2.5,UG/M3,50,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
2,40.819700,-73.948100,2015-01-01T17:00,OZONE,PPB,21,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
3,40.849200,-73.931900,2015-01-01T17:00,PM2.5,UG/M3,63,2,Manhattan/IS143,New York Dept. of Environmental Conservation,360610115,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
4,40.694401,-73.928596,2015-01-01T17:00,PM2.5,UG/M3,44,1,Bklyn - PS274,New York Dept. of Environmental Conservation,360470118,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209210,29.514999,-98.620003,2020-04-16T17:00,OZONE,PPB,45,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209211,29.514999,-98.620003,2020-04-16T17:00,PM2.5,UG/M3,36,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209212,29.423933,-98.580505,2020-04-16T17:00,PM2.5,UG/M3,38,1,San Antonio Old Hwy90 C677,Texas Commission on Environmental Quality,480290677,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209213,29.332803,-98.551396,2020-04-16T17:00,PM2.5,UG/M3,27,1,San Antonio Palo Alto C676,Texas Commission on Environmental Quality,480290676,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00


#### Drop reporting results that have Category of 0 (AQI = -999; null/bad values)

In [15]:
cleanedApiData_df = cleanedApiData_df[cleanedApiData_df.Category != 0]
# cleanedApiData_df = cleanedApiData_df[cleanedApiData_df.Category != -999]

# Visualize the DataFrame
cleanedApiData_df

Unnamed: 0,Latitude,Longitude,UTC,Parameter,Unit,AQI,Category,SiteName,AgencyName,FullAQSCode,...,csaStandardTimeUtcOffset,csaDaylightSavingsTimeUtcOffset,csaMonitoringStationLat,csaMonitoringStationLong,csaSearchRadius,csaBboxVar,st_dst,obsDateTime,dateObserved,timeObserved
0,40.853550,-73.966100,2015-01-01T17:00,PM2.5,UG/M3,51,2,Fort Lee Near Road,New Jersey Dept. of Environmental Protection,340030010,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
1,40.819700,-73.948100,2015-01-01T17:00,PM2.5,UG/M3,50,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
2,40.819700,-73.948100,2015-01-01T17:00,OZONE,PPB,21,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
3,40.849200,-73.931900,2015-01-01T17:00,PM2.5,UG/M3,63,2,Manhattan/IS143,New York Dept. of Environmental Conservation,360610115,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
4,40.694401,-73.928596,2015-01-01T17:00,PM2.5,UG/M3,44,1,Bklyn - PS274,New York Dept. of Environmental Conservation,360470118,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209210,29.514999,-98.620003,2020-04-16T17:00,OZONE,PPB,45,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209211,29.514999,-98.620003,2020-04-16T17:00,PM2.5,UG/M3,36,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209212,29.423933,-98.580505,2020-04-16T17:00,PM2.5,UG/M3,38,1,San Antonio Old Hwy90 C677,Texas Commission on Environmental Quality,480290677,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209213,29.332803,-98.551396,2020-04-16T17:00,PM2.5,UG/M3,27,1,San Antonio Palo Alto C676,Texas Commission on Environmental Quality,480290676,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00


In [16]:
# See what Columns are included and how they are named
list(cleanedApiData_df.columns)

['Latitude',
 'Longitude',
 'UTC',
 'Parameter',
 'Unit',
 'AQI',
 'Category',
 'SiteName',
 'AgencyName',
 'FullAQSCode',
 'IntlAQSCode',
 'csaRank',
 'csaName',
 'csaPrimaryCity',
 'csaPrimaryCityState',
 'csaPopulation2018Estimate',
 'csaPopulation2010Census',
 'csaPrimaryCityLat',
 'csaPrimaryCityLong',
 'csaPrimaryCityZip',
 'csaTimeZone',
 'csaStandardTimeUtcOffset',
 'csaDaylightSavingsTimeUtcOffset',
 'csaMonitoringStationLat',
 'csaMonitoringStationLong',
 'csaSearchRadius',
 'csaBboxVar',
 'st_dst',
 'obsDateTime',
 'dateObserved',
 'timeObserved']

#### Rename Columns (for relevancy downstream)

In [17]:
cleanedApiData_df = cleanedApiData_df.rename(columns={"Latitude": "SiteLatitude",\
                                                      "Longitude": "SiteLongitude",\
                                                      "UTC": "DateTimeObservedUTC",\
                                                      "Parameter": "ParameterName",\
                                                      "Category": "AQICategoryNumber",\
                                                      "AgencyName": "SiteAgencyName",\
                                                      "FullAQSCode": "SiteAQSCode",\
                                                      "IntlAQSCode": "SiteIntlAQSCode",\
                                                      "st_dst": "TimeMode",\
                                                      "obsDateTime": "DateTimeObserved",\
                                                      "dateObserved": "DateObserved",\
                                                      "timeObserved": "TimeObserved"})

# Visualize the updated DataFrame
cleanedApiData_df

Unnamed: 0,SiteLatitude,SiteLongitude,DateTimeObservedUTC,ParameterName,Unit,AQI,AQICategoryNumber,SiteName,SiteAgencyName,SiteAQSCode,...,csaStandardTimeUtcOffset,csaDaylightSavingsTimeUtcOffset,csaMonitoringStationLat,csaMonitoringStationLong,csaSearchRadius,csaBboxVar,TimeMode,DateTimeObserved,DateObserved,TimeObserved
0,40.853550,-73.966100,2015-01-01T17:00,PM2.5,UG/M3,51,2,Fort Lee Near Road,New Jersey Dept. of Environmental Protection,340030010,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
1,40.819700,-73.948100,2015-01-01T17:00,PM2.5,UG/M3,50,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
2,40.819700,-73.948100,2015-01-01T17:00,OZONE,PPB,21,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
3,40.849200,-73.931900,2015-01-01T17:00,PM2.5,UG/M3,63,2,Manhattan/IS143,New York Dept. of Environmental Conservation,360610115,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
4,40.694401,-73.928596,2015-01-01T17:00,PM2.5,UG/M3,44,1,Bklyn - PS274,New York Dept. of Environmental Conservation,360470118,...,-05:00,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209210,29.514999,-98.620003,2020-04-16T17:00,OZONE,PPB,45,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209211,29.514999,-98.620003,2020-04-16T17:00,PM2.5,UG/M3,36,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209212,29.423933,-98.580505,2020-04-16T17:00,PM2.5,UG/M3,38,1,San Antonio Old Hwy90 C677,Texas Commission on Environmental Quality,480290677,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00
209213,29.332803,-98.551396,2020-04-16T17:00,PM2.5,UG/M3,27,1,San Antonio Palo Alto C676,Texas Commission on Environmental Quality,480290676,...,-06:00,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00


#### Add AQICategory (Good, Moderate, Unhealthy for Sensitive Groups, Unhealthy, Very Unhealthy; based on AQICategoryNumber

In [18]:
# Create function to determine AQICategory value based on AQICategoryNumber
def fAQICategory(row):
    if row["AQICategoryNumber"] == 1:
        AQICategory = "Good"
    elif row["AQICategoryNumber"] == 2:
        AQICategory = "Moderate"
    elif row["AQICategoryNumber"] == 3:
        AQICategory = "Unhealthy for Sensitive Groups"
    elif row["AQICategoryNumber"] == 4:
        AQICategory = "Unhealthy"
    elif row["AQICategoryNumber"] == 5:
        AQICategory = "Very Unhealthy"
    elif row["AQICategoryNumber"] == 6:
        AQICategory = "Hazardous"
    else:
        AQICategory = "(invalid)"
    return AQICategory

# Create new column: AQICategory, using above function
cleanedApiData_df['AQICategory'] = cleanedApiData_df.apply(fAQICategory, axis=1)

# Visualize the DataFrame
cleanedApiData_df

Unnamed: 0,SiteLatitude,SiteLongitude,DateTimeObservedUTC,ParameterName,Unit,AQI,AQICategoryNumber,SiteName,SiteAgencyName,SiteAQSCode,...,csaDaylightSavingsTimeUtcOffset,csaMonitoringStationLat,csaMonitoringStationLong,csaSearchRadius,csaBboxVar,TimeMode,DateTimeObserved,DateObserved,TimeObserved,AQICategory
0,40.853550,-73.966100,2015-01-01T17:00,PM2.5,UG/M3,51,2,Fort Lee Near Road,New Jersey Dept. of Environmental Protection,340030010,...,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00,Moderate
1,40.819700,-73.948100,2015-01-01T17:00,PM2.5,UG/M3,50,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00,Good
2,40.819700,-73.948100,2015-01-01T17:00,OZONE,PPB,21,1,CCNY,New York Dept. of Environmental Conservation,360610135,...,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00,Good
3,40.849200,-73.931900,2015-01-01T17:00,PM2.5,UG/M3,63,2,Manhattan/IS143,New York Dept. of Environmental Conservation,360610115,...,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00,Moderate
4,40.694401,-73.928596,2015-01-01T17:00,PM2.5,UG/M3,44,1,Bklyn - PS274,New York Dept. of Environmental Conservation,360470118,...,-04:00,40.8419,-73.8359,25,0.15,Standard,2015-01-01T12:00,2015-01-01,12:00,Good
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209210,29.514999,-98.620003,2020-04-16T17:00,OZONE,PPB,45,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,Good
209211,29.514999,-98.620003,2020-04-16T17:00,PM2.5,UG/M3,36,1,San Antonio Northwest C23,Texas Commission on Environmental Quality,480290032,...,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,Good
209212,29.423933,-98.580505,2020-04-16T17:00,PM2.5,UG/M3,38,1,San Antonio Old Hwy90 C677,Texas Commission on Environmental Quality,480290677,...,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,Good
209213,29.332803,-98.551396,2020-04-16T17:00,PM2.5,UG/M3,27,1,San Antonio Palo Alto C676,Texas Commission on Environmental Quality,480290676,...,-05:00,29.4170,-98.4840,25,0.15,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,Good


In [19]:
# See what Columns are included and how they are named
list(cleanedApiData_df.columns)

['SiteLatitude',
 'SiteLongitude',
 'DateTimeObservedUTC',
 'ParameterName',
 'Unit',
 'AQI',
 'AQICategoryNumber',
 'SiteName',
 'SiteAgencyName',
 'SiteAQSCode',
 'SiteIntlAQSCode',
 'csaRank',
 'csaName',
 'csaPrimaryCity',
 'csaPrimaryCityState',
 'csaPopulation2018Estimate',
 'csaPopulation2010Census',
 'csaPrimaryCityLat',
 'csaPrimaryCityLong',
 'csaPrimaryCityZip',
 'csaTimeZone',
 'csaStandardTimeUtcOffset',
 'csaDaylightSavingsTimeUtcOffset',
 'csaMonitoringStationLat',
 'csaMonitoringStationLong',
 'csaSearchRadius',
 'csaBboxVar',
 'TimeMode',
 'DateTimeObserved',
 'DateObserved',
 'TimeObserved',
 'AQICategory']

#### Drop unneeded columns | Reorder columns (for efficiency downstream)

In [20]:
cleanedApiData_df = cleanedApiData_df[["csaRank", "csaName", "csaPrimaryCity", "csaPrimaryCityState",\
                                       "csaPrimaryCityLat", "csaPrimaryCityLong", "csaTimeZone", "TimeMode",\
                                       "DateTimeObserved", "DateObserved", "TimeObserved", "DateTimeObservedUTC",\
                                       "ParameterName", "Unit", "AQI", "AQICategoryNumber", "AQICategory",\
                                       "SiteName", "SiteAgencyName"]]

# Visualize the updated DataFrame
cleanedApiData_df

Unnamed: 0,csaRank,csaName,csaPrimaryCity,csaPrimaryCityState,csaPrimaryCityLat,csaPrimaryCityLong,csaTimeZone,TimeMode,DateTimeObserved,DateObserved,TimeObserved,DateTimeObservedUTC,ParameterName,Unit,AQI,AQICategoryNumber,AQICategory,SiteName,SiteAgencyName
0,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,12:00,2015-01-01T17:00,PM2.5,UG/M3,51,2,Moderate,Fort Lee Near Road,New Jersey Dept. of Environmental Protection
1,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,12:00,2015-01-01T17:00,PM2.5,UG/M3,50,1,Good,CCNY,New York Dept. of Environmental Conservation
2,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,12:00,2015-01-01T17:00,OZONE,PPB,21,1,Good,CCNY,New York Dept. of Environmental Conservation
3,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,12:00,2015-01-01T17:00,PM2.5,UG/M3,63,2,Moderate,Manhattan/IS143,New York Dept. of Environmental Conservation
4,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,12:00,2015-01-01T17:00,PM2.5,UG/M3,44,1,Good,Bklyn - PS274,New York Dept. of Environmental Conservation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209210,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,2020-04-16T17:00,OZONE,PPB,45,1,Good,San Antonio Northwest C23,Texas Commission on Environmental Quality
209211,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,2020-04-16T17:00,PM2.5,UG/M3,36,1,Good,San Antonio Northwest C23,Texas Commission on Environmental Quality
209212,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,2020-04-16T17:00,PM2.5,UG/M3,38,1,Good,San Antonio Old Hwy90 C677,Texas Commission on Environmental Quality
209213,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-16T12:00,2020-04-16,12:00,2020-04-16T17:00,PM2.5,UG/M3,27,1,Good,San Antonio Palo Alto C676,Texas Commission on Environmental Quality


#### Convert & export cleaned DataFrame to CSV files

In [21]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
cleanedApiData_df.to_csv(f"{output_data_filepath}AirNowApiData_{timestamp}.csv", encoding="utf-8", index= False)

#### See the column names we are working with

In [22]:
list(cleanedApiData_df.columns)

['csaRank',
 'csaName',
 'csaPrimaryCity',
 'csaPrimaryCityState',
 'csaPrimaryCityLat',
 'csaPrimaryCityLong',
 'csaTimeZone',
 'TimeMode',
 'DateTimeObserved',
 'DateObserved',
 'TimeObserved',
 'DateTimeObservedUTC',
 'ParameterName',
 'Unit',
 'AQI',
 'AQICategoryNumber',
 'AQICategory',
 'SiteName',
 'SiteAgencyName']

#### Create Grouped DataFrame (that groups all columns except values that are aggregated)

In [31]:
groupedApiData_df = cleanedApiData_df.groupby(["csaRank", "csaName", "csaPrimaryCity", "csaPrimaryCityState",\
                                               "csaPrimaryCityLat", "csaPrimaryCityLong", "csaTimeZone",\
                                               "TimeMode", "DateTimeObserved", "DateObserved", "TimeObserved",\
                                               "DateTimeObservedUTC", "ParameterName", "Unit"
                                              ])

# Visualize the DataFrame (use .count() to see it)
groupedApiData_df.count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,AQI,AQICategoryNumber,AQICategory,SiteName,SiteAgencyName
csaRank,csaName,csaPrimaryCity,csaPrimaryCityState,csaPrimaryCityLat,csaPrimaryCityLong,csaTimeZone,TimeMode,DateTimeObserved,DateObserved,TimeObserved,DateTimeObservedUTC,ParameterName,Unit,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,"New York-Newark, NY-NJ-CT-PA Combined Statistical Area",New York City,NY,40.7128,-74.0060,Eastern,DaylightSavings,2015-03-08T12:00,2015-03-08,12:00,2015-03-08T16:00,OZONE,PPB,4,4,4,4,4
1,"New York-Newark, NY-NJ-CT-PA Combined Statistical Area",New York City,NY,40.7128,-74.0060,Eastern,DaylightSavings,2015-03-08T12:00,2015-03-08,12:00,2015-03-08T16:00,PM10,UG/M3,1,1,1,1,1
1,"New York-Newark, NY-NJ-CT-PA Combined Statistical Area",New York City,NY,40.7128,-74.0060,Eastern,DaylightSavings,2015-03-08T12:00,2015-03-08,12:00,2015-03-08T16:00,PM2.5,UG/M3,5,5,5,5,5
1,"New York-Newark, NY-NJ-CT-PA Combined Statistical Area",New York City,NY,40.7128,-74.0060,Eastern,DaylightSavings,2015-03-09T12:00,2015-03-09,12:00,2015-03-09T16:00,OZONE,PPB,4,4,4,4,4
1,"New York-Newark, NY-NJ-CT-PA Combined Statistical Area",New York City,NY,40.7128,-74.0060,Eastern,DaylightSavings,2015-03-09T12:00,2015-03-09,12:00,2015-03-09T16:00,PM10,UG/M3,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,"San Antonio-New Braunfels-Pearsall, TX Combined Statistical Area",San Antonio,TX,29.4241,-98.4936,Central,Standard,2020-03-05T12:00,2020-03-05,12:00,2020-03-05T18:00,PM2.5,UG/M3,3,3,3,3,3
25,"San Antonio-New Braunfels-Pearsall, TX Combined Statistical Area",San Antonio,TX,29.4241,-98.4936,Central,Standard,2020-03-06T12:00,2020-03-06,12:00,2020-03-06T18:00,OZONE,PPB,1,1,1,1,1
25,"San Antonio-New Braunfels-Pearsall, TX Combined Statistical Area",San Antonio,TX,29.4241,-98.4936,Central,Standard,2020-03-06T12:00,2020-03-06,12:00,2020-03-06T18:00,PM2.5,UG/M3,4,4,4,4,4
25,"San Antonio-New Braunfels-Pearsall, TX Combined Statistical Area",San Antonio,TX,29.4241,-98.4936,Central,Standard,2020-03-07T12:00,2020-03-07,12:00,2020-03-07T18:00,OZONE,PPB,1,1,1,1,1


#### Aggregate values (aqiCount, aqiAvg, etc...)

In [32]:
# Calculate the count of AQI readings
aqiCount = groupedApiData_df["AQI"].count()
# aqiCount

# Calculate the avg (mean) AQI
aqiAvg = groupedApiData_df["AQI"].mean()
# aqiAvg

# Calculate the min AQI
aqiMin = groupedApiData_df["AQI"].min()
# aqiMin

# Calculate the min AQI
aqiMax = groupedApiData_df["AQI"].max()
# aqiMax

# Calculate the avg (mean) AQI Category Number
aqiCategoryNumberAvg = groupedApiData_df["AQICategoryNumber"].mean()
# aqiCategoryAvg

# Calculate the min AQI Category Number
aqiCategoryNumberMin = groupedApiData_df["AQICategoryNumber"].min()
# aqiCategoryMin

# Calculate the min AQI Category Number
aqiCategoryNumberMax = groupedApiData_df["AQICategoryNumber"].max()
# aqiCategoryMax

# Calculate the number of unique AQI Categories
aqiCategoryCount = groupedApiData_df["AQICategory"].nunique()

# Calculate the number of unique sites
sitesCount = groupedApiData_df["SiteName"].nunique()
# sitesTotal

# Calculate the number of unique agencies
agenciesCount = groupedApiData_df["SiteAgencyName"].nunique()
# agenciesTotal


#### Create Summary DataFrame (that holds the grouped and aggregated values)

In [33]:
summaryApiData_df = pd.DataFrame({"aqiCount": aqiCount,
                                  "aqiAvg": aqiAvg,
                                  "aqiMin": aqiMin,
                                  "aqiMax": aqiMax,
                                  "aqiCategoryNumberAvg": aqiCategoryNumberAvg,
                                  "aqiCategoryNumberMin": aqiCategoryNumberMin,
                                  "aqiCategoryNumberMax": aqiCategoryNumberMax,
                                  "aqiCategoryCount": aqiCategoryCount,
                                  "sitesCount": sitesCount,
                                  "agenciesCount": agenciesCount
                                 })

# Reset index for the DataFrame
summaryApiData_df = summaryApiData_df.reset_index()

# Sort DataFrame by CSA, Parameter and DateObserved
summaryApiData_df = summaryApiData_df.sort_values(["csaRank","ParameterName","DateObserved"], ascending=[True,True,True])

# Reset Index
summaryApiData_df = summaryApiData_df.reset_index()
del summaryApiData_df["index"]

# Visualize the DataFrame
summaryApiData_df


Unnamed: 0,csaRank,csaName,csaPrimaryCity,csaPrimaryCityState,csaPrimaryCityLat,csaPrimaryCityLong,csaTimeZone,TimeMode,DateTimeObserved,DateObserved,...,aqiCount,aqiAvg,aqiMin,aqiMax,aqiCategoryNumberAvg,aqiCategoryNumberMin,aqiCategoryNumberMax,aqiCategoryCount,sitesCount,agenciesCount
0,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,...,4,23.250000,21,26,1.0,1,1,1,4,1
1,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-02T12:00,2015-01-02,...,4,18.500000,17,20,1.0,1,1,1,4,1
2,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-03T12:00,2015-01-03,...,4,15.250000,13,20,1.0,1,1,1,4,1
3,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-04T12:00,2015-01-04,...,4,3.000000,2,5,1.0,1,1,1,4,1
4,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-05T12:00,2015-01-05,...,3,24.666667,23,26,1.0,1,1,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98844,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-12T12:00,2020-04-12,...,4,49.000000,44,55,1.5,1,2,2,4,1
98845,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-13T12:00,2020-04-13,...,4,25.750000,14,49,1.0,1,1,1,4,1
98846,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-14T12:00,2020-04-14,...,4,25.000000,20,33,1.0,1,1,1,4,1
98847,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-15T12:00,2020-04-15,...,3,28.666667,24,32,1.0,1,1,1,3,1


#### Add AQICategoryNumber & AQICategory (1 = Good, 2 = Moderate, 3 = Unhealthy for Sensitive Groups, 4 = Unhealthy, 5 = Very Unhealthy, 6 = Hazardous; based on aqiAvg)

In [34]:
# Create function to determine AQICategoryNumber value based on aqiAvg
def faqiAvgAQICategoryNumber(row):
    if round(row["aqiAvg"],0) >= 0 and round(row["aqiAvg"],0) < 51:
        AQICategoryNumber = 1
    elif round(row["aqiAvg"],0) >= 51 and round(row["aqiAvg"],0) < 101:
        AQICategoryNumber = 2
    elif round(row["aqiAvg"],0) >= 101 and round(row["aqiAvg"],0) < 151:
        AQICategoryNumber = 3
    elif round(row["aqiAvg"],0) >= 151 and round(row["aqiAvg"],0) < 201:
        AQICategoryNumber = 4
    elif round(row["aqiAvg"],0) >= 201 and round(row["aqiAvg"],0) < 301:
        AQICategoryNumber = 5
    elif round(row["aqiAvg"],0) >= 301:
        AQICategoryNumber = 6
    return AQICategoryNumber

# Create new column: AQICategoryNumber, using above function
summaryApiData_df['AQICategoryNumber'] = summaryApiData_df.apply(faqiAvgAQICategoryNumber, axis=1)

# Create function to determine AQICategory value based on aqiAvg
def faqiAvgAQICategory(row):
    if round(row["aqiAvg"],0) >= 0 and round(row["aqiAvg"],0) < 51:
        AQICategory = "Good"
    elif round(row["aqiAvg"],0) >= 51 and round(row["aqiAvg"],0) < 101:
        AQICategory = "Moderate"
    elif round(row["aqiAvg"],0) >= 101 and round(row["aqiAvg"],0) < 151:
        AQICategory = "Unhealthy for Sensitive Groups"
    elif round(row["aqiAvg"],0) >= 151 and round(row["aqiAvg"],0) < 201:
        AQICategory = "Unhealthy"
    elif round(row["aqiAvg"],0) >= 201 and round(row["aqiAvg"],0) < 301:
        AQICategory = "Very Unhealthy"
    elif round(row["aqiAvg"],0) >= 301:
        AQICategory = "Hazardous"
    return AQICategory

# Create new column: AQICategory, using above function
summaryApiData_df['AQICategory'] = summaryApiData_df.apply(faqiAvgAQICategory, axis=1)

# Visualize the DataFrame
summaryApiData_df

Unnamed: 0,csaRank,csaName,csaPrimaryCity,csaPrimaryCityState,csaPrimaryCityLat,csaPrimaryCityLong,csaTimeZone,TimeMode,DateTimeObserved,DateObserved,...,aqiMin,aqiMax,aqiCategoryNumberAvg,aqiCategoryNumberMin,aqiCategoryNumberMax,aqiCategoryCount,sitesCount,agenciesCount,AQICategoryNumber,AQICategory
0,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-01T12:00,2015-01-01,...,21,26,1.0,1,1,1,4,1,1,Good
1,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-02T12:00,2015-01-02,...,17,20,1.0,1,1,1,4,1,1,Good
2,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-03T12:00,2015-01-03,...,13,20,1.0,1,1,1,4,1,1,Good
3,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-04T12:00,2015-01-04,...,2,5,1.0,1,1,1,4,1,1,Good
4,1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",New York City,NY,40.7128,-74.0060,Eastern,Standard,2015-01-05T12:00,2015-01-05,...,23,26,1.0,1,1,1,3,1,1,Good
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98844,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-12T12:00,2020-04-12,...,44,55,1.5,1,2,2,4,1,1,Good
98845,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-13T12:00,2020-04-13,...,14,49,1.0,1,1,1,4,1,1,Good
98846,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-14T12:00,2020-04-14,...,20,33,1.0,1,1,1,4,1,1,Good
98847,25,"San Antonio-New Braunfels-Pearsall, TX Combine...",San Antonio,TX,29.4241,-98.4936,Central,DaylightSavings,2020-04-15T12:00,2020-04-15,...,24,32,1.0,1,1,1,3,1,1,Good


#### Convert & export summary DataFrame to CSV files

In [35]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
summaryApiData_df.to_csv(f"{output_data_filepath}AirNowApiData_summary_{timestamp}.csv", encoding="utf-8", index= False)