In [28]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
from pathlib import Path

In [None]:
# Config
start_date = datetime(2023, 1, 15)
end_date = datetime(2024, 1, 1)
base_url = "https://api-open.data.gov.sg/v2/real-time/api/air-temperature"

headers = {
    "x-api-key": ""  # i dont have an api :c
}

# Function to fetch one day's readings
def fetch_temp_for_date(date_str):
    """
    Fetch temperature readings for a single date (YYYY-MM-DD)
    Returns a DataFrame with columns: ['timestamp', 'stationId', 'value']
    """
    params = {"date": date_str}
    resp = requests.get(base_url, params=params, headers=headers)
    data = resp.json()
    
    readings_list = []
    
    for reading in data['data']['readings']:
        ts = reading['timestamp']
        for s in reading['data']:
            readings_list.append({
                "timestamp": ts,
                "stationId": s['stationId'],
                "value": s['value']
            })
    
    return pd.DataFrame(readings_list)


Fetching 2023-01-15 ...
Fetching 2023-01-16 ...
Fetching 2023-01-17 ...
Fetching 2023-01-18 ...
Fetching 2023-01-19 ...
Fetching 2023-01-20 ...
Fetching 2023-01-21 ...
Fetching 2023-01-22 ...
Fetching 2023-01-23 ...
Fetching 2023-01-24 ...
Fetching 2023-01-25 ...
Fetching 2023-01-26 ...
Fetching 2023-01-27 ...
Fetching 2023-01-28 ...
Fetching 2023-01-29 ...
Fetching 2023-01-30 ...
Fetching 2023-01-31 ...
Fetching 2023-02-01 ...
Fetching 2023-02-02 ...
Fetching 2023-02-03 ...
Fetching 2023-02-04 ...
Fetching 2023-02-05 ...
Fetching 2023-02-06 ...
Fetching 2023-02-07 ...
Fetching 2023-02-08 ...
Fetching 2023-02-09 ...
Fetching 2023-02-10 ...
Fetching 2023-02-11 ...
Fetching 2023-02-12 ...
Fetching 2023-02-13 ...
Fetching 2023-02-14 ...
Fetching 2023-02-15 ...
Fetching 2023-02-16 ...
Fetching 2023-02-17 ...
Fetching 2023-02-18 ...
Fetching 2023-02-19 ...
Fetching 2023-02-20 ...
Fetching 2023-02-21 ...
Fetching 2023-02-22 ...
Fetching 2023-02-23 ...
Fetching 2023-02-24 ...
Fetching 2023-02

In [None]:
# Loop over all days
all_days = pd.date_range(start_date, end_date, freq='D')
df_list = []

for day in all_days:
    day_str = day.strftime('%Y-%m-%d')
    print(f"Fetching {day_str} ...")
    df_day = fetch_temp_for_date(day_str)
    df_list.append(df_day)
    time.sleep(0.2)  # please dont hit rate limit

# Combine all days
df_temp = pd.concat(df_list, ignore_index=True)

In [18]:
df_temp

Unnamed: 0,timestamp,stationId,value
0,2023-01-15T23:59:00+08:00,S109,26.2
1,2023-01-15T23:59:00+08:00,S50,26.4
2,2023-01-15T23:59:00+08:00,S107,27.3
3,2023-01-15T23:59:00+08:00,S43,26.4
4,2023-01-15T23:59:00+08:00,S108,26.7
...,...,...,...
119701,2024-01-01T23:35:00+08:00,S60,25.6
119702,2024-01-01T23:35:00+08:00,S115,26.0
119703,2024-01-01T23:35:00+08:00,S24,25.2
119704,2024-01-01T23:35:00+08:00,S116,25.8


In [None]:
# Convert to datetime
df_temp['timestamp'] = pd.to_datetime(df_temp['timestamp'])
df_temp.set_index('timestamp', inplace=True)

# Make index also datetime
df_temp.index = pd.to_datetime(df_temp.index)

# Average across stations per timestamp
df_avg = df_temp.groupby(df_temp.index).value.mean().to_frame(name='temp_C')

# 30 min intervals
df_30min = df_avg.resample('30T').mean().interpolate()

df_30min = df_30min.reset_index()
print(df_30min.head())

                  timestamp     temp_C
0 2023-01-15 23:30:00+08:00  26.456928
1 2023-01-16 00:00:00+08:00  26.459149
2 2023-01-16 00:30:00+08:00  26.461370
3 2023-01-16 01:00:00+08:00  26.463591
4 2023-01-16 01:30:00+08:00  26.465813


In [None]:
df_30min.to_csv("temperature_30min.csv", index=False)

# Combining Temp Data

In [45]:
data_dir = Path("data/temperature")
files = sorted(data_dir.glob("*temperature_30min.csv"))
df_temp = pd.concat(
    [pd.read_csv(f) for f in files],
    ignore_index=True
)

df_temp["timestamp"] = pd.to_datetime(df_temp["timestamp"], errors="coerce")
df_temp["timestamp"] = df_temp["timestamp"].dt.tz_localize(None)

In [46]:
df_temp["Date"] = df_temp["timestamp"]
df_temp["Temperature"] = df_temp["temp_C"]
df_temp = df_temp[["Date", "Temperature"]]

In [47]:
df_temp.to_csv("temperature_data.csv", index=None)