In [1]:
import requests
import pandas as pd
import datetime as datetime
from pyspark.sql import SparkSession

In [81]:
'''
dt takes a format of either YYYY-MM-DD or YYYY-MM-DDTHH:mm:ss
and returns response as JSON if successful, else print error message and return None
'''
def extract_json_from_weather_api(url, params):
    weather_response = requests.get(url, params = params)
    response_status = weather_response.status_code
    try:
        weather_json = weather_response.json()
    except:
        print(f'Error: Response cannot be converted to JSON object.')
        return
    if response_status == 200:
        return weather_json
    elif response_status == 400 or response_status == 404:
        print(f'Error {response_status}: {weather_json['errorMsg']}')
    else:
        print(f'Error {response_status}: check your inputs')

# function takes in weather JSON and returns 2 tables. 1 table containing station info while the other containing readings
def process_weather_json_per_page(json):
    data = json['data']

    station_info = pd.DataFrame(data['stations'])
    station_info[['latitude', 'longitude']] = pd.DataFrame(station_info['location'].tolist()) # unpack geospatial coordinates
    station_info.drop('location', inplace = True, axis = 1) # remove location column
    
    # list of dictionaries with timestamp key and data key
    lst_of_readings = data['readings']
    # concatenate all readings on first page
    station_id_and_reading = pd.concat([pd.DataFrame(val['data']).assign(timestamp = val['timestamp']) for val in lst_of_readings])
    return station_info, station_id_and_reading

# Performs API call and returns the necessary weather data (either by the minute or by day)
def convert_weather_api_data_to_df(url, params):
    weather_json = extract_json_from_weather_api(url, params)
    if not weather_json:
        print('API call did not provide valid response to be converted to JSON')
    # check for subsequent pages
    stn_info, stn_id_with_reading = process_weather_json_per_page(weather_json)
    pagination_token = weather_json['data'].get('paginationToken')
    while pagination_token:
        params.update({'paginationToken': pagination_token})
        new_weather_json = extract_json_from_weather_api(url, params)
        new_stn_info, new_stn_id_with_reading = process_weather_json_per_page(new_weather_json)
        stn_info = pd.concat([stn_info, new_stn_info])
        stn_id_with_reading = pd.concat([stn_id_with_reading, new_stn_id_with_reading])
        pagination_token = new_weather_json['data'].get('paginationToken')
    return stn_info.drop_duplicates(), stn_id_with_reading

In [None]:
wind_dir_stn_info, stn_id_with_wind_dir_reading = \
    convert_weather_api_data_to_df(
        'https://api-open.data.gov.sg/v2/real-time/api/wind-direction', 
        params = {'date': '2025-01-01'})

In [84]:
air_temp_stn_info, stn_id_with_air_temp_reading = \
    convert_weather_api_data_to_df(
        'https://api-open.data.gov.sg/v2/real-time/api/air-temperature', 
        params = {'date': '2025-01-01'})

In [83]:
rel_humidity_stn_info, stn_id_with_rel_humidity_reading = \
    convert_weather_api_data_to_df(
        'https://api-open.data.gov.sg/v2/real-time/api/relative-humidity', 
        params = {'date': '2025-01-01'})

In [85]:
rainfall_stn_info, stn_id_with_rainfall_reading = \
    convert_weather_api_data_to_df(
        'https://api-open.data.gov.sg/v2/real-time/api/rainfall', 
        params = {'date': '2025-01-01'})

In [91]:
windspeed_stn_info, stn_id_with_windspeed_reading = \
    convert_weather_api_data_to_df(
        'https://api-open.data.gov.sg/v2/real-time/api/wind-speed', 
        params = {'date': '2025-01-01'})

In [101]:
all_stn_info = pd.concat([wind_dir_stn_info, air_temp_stn_info, 
                          rel_humidity_stn_info, rainfall_stn_info, windspeed_stn_info])
all_readings = pd.concat([stn_id_with_wind_dir_reading, stn_id_with_air_temp_reading, 
                          stn_id_with_rel_humidity_reading, stn_id_with_rainfall_reading, stn_id_with_windspeed_reading])


In [100]:
all_stn_info[all_stn_info.duplicated(subset = ['id'])]

Unnamed: 0,id,deviceId,name,latitude,longitude
0,S109,S109,Ang Mo Kio Avenue 5,1.3764,103.8492
1,S106,S106,Pulau Ubin,1.4168,103.9673
2,S117,S117,Banyan Road,1.256,103.679
3,S107,S107,East Coast Parkway,1.3135,103.9625
4,S104,S104,Woodlands Avenue 9,1.44387,103.78538
5,S115,S115,Tuas South Avenue 3,1.29377,103.61843
6,S116,S116,West Coast Highway,1.281,103.754
7,S60,S60,Sentosa,1.25,103.8279
8,S50,S50,Clementi Road,1.3337,103.7768
9,S43,S43,Kim Chuan Road,1.3399,103.8878
