# OpenAQ Thailand PM 2.5 Data Collection

In [1]:
import pandas as pd 
import numpy as np 
import json
import requests
import time

In [2]:
# Set server URL as global variable.
server_url = 'https://u50g7n0cbj.execute-api.us-east-1.amazonaws.com/v2' # working as of May 27, 2022

## Functions

In [3]:
def construct_url(server_url, method, parameters):

    url_params = ''

    # Construct URL string for all parameters
    
    for index, value in enumerate(parameters):
        
        # This if clause just catches the last parameter (shouldn't have a succeeding &)
        if index == len(parameters)-1:
          param = value+'='+str(parameters[value])
        else:
          param = value+'='+str(parameters[value])+'&'

        # Append to URL parameters string
        url_params = url_params+param

    url = server_url+'/'+method+'?'+url_params
    url = url.replace(" ","%20")

    return(url)

In [4]:
def openaq_get(url):

    # API call
    response = requests.get(url)

    # Get results specifically
    data = response.json()['results']

    # Convert json output to DataFrame
    df = pd.json_normalize(data)

    return df

In [5]:
def get_measurements(country_id, start_date, end_date, parameter='pm25', sensor_type=None, limit=10000, include_mobile=False):

    method = 'measurements'

    parameters = {
        "date_from": start_date,
        "date_to": end_date,
        "has_geo": True,
        "parameter": parameter,
        "country_id": country_id,
        "limit": limit,
        "isMobile": include_mobile
    }

    # If sensor type is explicitly indicated
    if sensor_type:
        parameters['sensorType'] = sensor_type

    url = construct_url(server_url, method, parameters)
    df = openaq_get(url)

    return df

## Data Collection

In [8]:
# Set date range
date_range = pd.date_range('2021-01-01','2021-12-31')

# Create base table
openaq_df = pd.DataFrame()

# Iterate through date range
for d in date_range:
    
    start_date = d.date()
    end_date = pd.to_datetime(d) + pd.Timedelta(days=1)

    # Indicator for when data collection for a new month starts
    if d.day == 1:
        print(f'Collecting data for Month {start_date.month}...')
    
    t = 5
    for attempt in range(1,100):
        try:
            # Call API
            temp_df = get_measurements('TH', start_date, end_date)
        except KeyError:
            print(f'API limit exceeded for {start_date}. Trying again in {t} seconds... Retry attempt: {attempt}')
            time.sleep(t)
            print(f'Trying again for {start_date}.')
            t += 5
        else:
            break
    else:
        print(f'Error collecting data for {start_date}. Skipping to next date...')

    # Add to base table
    openaq_df = pd.concat([openaq_df, temp_df])
    
    # When the month is finished, sleep for 3 mins --workaround for API limits
    if start_date.month != end_date.month:
        print(f'Month {start_date.month} done. Please wait for the next month to start... Time: 3 mins')
        time.sleep(180)
    

print('Data collection complete.')
openaq_df

Collecting data for Month 1...
API limit exceeded for 2021-01-22. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-01-22.
API limit exceeded for 2021-01-23. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-01-23.
API limit exceeded for 2021-01-23. Trying again in 10 seconds... Retry attempt: 2
Trying again for 2021-01-23.
API limit exceeded for 2021-01-24. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-01-24.
API limit exceeded for 2021-01-24. Trying again in 10 seconds... Retry attempt: 2
Trying again for 2021-01-24.
API limit exceeded for 2021-01-25. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-01-25.
API limit exceeded for 2021-01-26. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-01-26.
API limit exceeded for 2021-01-26. Trying again in 10 seconds... Retry attempt: 2
Trying again for 2021-01-26.
API limit exceeded for 2021-01-27. Trying again in 5 seconds... Retry attempt: 1
Tryin

Trying again for 2021-03-18.
API limit exceeded for 2021-03-23. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-23.
API limit exceeded for 2021-03-24. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-24.
API limit exceeded for 2021-03-24. Trying again in 10 seconds... Retry attempt: 2
Trying again for 2021-03-24.
API limit exceeded for 2021-03-25. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-25.
API limit exceeded for 2021-03-26. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-26.
API limit exceeded for 2021-03-27. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-27.
API limit exceeded for 2021-03-28. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-28.
API limit exceeded for 2021-03-29. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-03-29.
Month 3 done. Please wait for the next month to start... Time: 3 mins
Collecting data for 

Collecting data for Month 7...
API limit exceeded for 2021-07-01. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-07-01.
API limit exceeded for 2021-07-01. Trying again in 10 seconds... Retry attempt: 2
Trying again for 2021-07-01.
API limit exceeded for 2021-07-01. Trying again in 15 seconds... Retry attempt: 3
Trying again for 2021-07-01.
API limit exceeded for 2021-07-01. Trying again in 20 seconds... Retry attempt: 4
Trying again for 2021-07-01.
API limit exceeded for 2021-07-01. Trying again in 25 seconds... Retry attempt: 5
Trying again for 2021-07-01.
API limit exceeded for 2021-07-01. Trying again in 30 seconds... Retry attempt: 6
Trying again for 2021-07-01.
API limit exceeded for 2021-07-02. Trying again in 5 seconds... Retry attempt: 1
Trying again for 2021-07-02.
API limit exceeded for 2021-07-02. Trying again in 10 seconds... Retry attempt: 2
Trying again for 2021-07-02.
API limit exceeded for 2021-07-03. Trying again in 5 seconds... Retry attempt: 1
Tr

Unnamed: 0,locationId,location,parameter,value,unit,country,city,isMobile,isAnalysis,entity,sensorType,date.utc,date.local,coordinates.latitude,coordinates.longitude
0,65397,Sansiri - d'Vieng Santitham,pm25,69.7,µg/m³,TH,,False,False,community,low-cost sensor,2021-01-19T23:59:57+00:00,2021-01-20T06:59:57+07:00,18.803404,98.982210
1,65176,Sansiri - Saransiri Tiwanon-Chaengwattana 2,pm25,123.3,µg/m³,TH,,False,False,community,low-cost sensor,2021-01-19T23:59:55+00:00,2021-01-20T06:59:55+07:00,13.961660,100.555150
2,69950,Sansiri - Siri Place Westgate,pm25,77.6,µg/m³,TH,,False,False,community,low-cost sensor,2021-01-19T23:59:54+00:00,2021-01-20T06:59:54+07:00,13.872132,100.387390
3,72114,Sansiri - The Base Height Udonthani,pm25,119.0,µg/m³,TH,,False,False,community,low-cost sensor,2021-01-19T23:59:53+00:00,2021-01-20T06:59:53+07:00,17.411478,102.795340
4,70059,Sansiri - dcondo sign,pm25,57.4,µg/m³,TH,,False,False,community,low-cost sensor,2021-01-19T23:59:49+00:00,2021-01-20T06:59:49+07:00,18.809105,99.015884
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11699,72914,Sansiri - Setthasiri Charan Pinklao 2,pm25,30.4,µg/m³,TH,,False,False,community,low-cost sensor,2021-12-31T14:12:25+00:00,2021-12-31T21:12:25+07:00,13.767839,100.458540
11700,70066,Sansiri - dcondo nim,pm25,47.1,µg/m³,TH,,False,False,community,low-cost sensor,2021-12-31T14:11:57+00:00,2021-12-31T21:11:57+07:00,18.809770,99.015940
11701,72911,Sansiri - The Base Phetkasem,pm25,25.7,µg/m³,TH,,False,False,community,low-cost sensor,2021-12-31T14:11:53+00:00,2021-12-31T21:11:53+07:00,13.715885,100.447395
11702,65631,E23BMY-Air-Station,pm25,0.8,µg/m³,TH,,False,False,community,low-cost sensor,2021-12-31T14:11:48+00:00,2021-12-31T21:11:48+07:00,13.793413,100.570020


In [10]:
# Save to CSV
openaq_df.to_csv('20220527_openaq_raw_data.csv', index=False)
print('Raw data saved as CSV.')

Raw data saved as CSV.
