In [1]:
import numpy as np
import pandas as pd

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

import time

In [2]:
# Optional: run headless (no browser window)
options = Options()
options.headless = True

# Set up ChromeDriver
driver = webdriver.Chrome(options=options)

columns = np.array([ 
    "temperature_max", "temperature_avg", "temperature_min",
    "dew_point_max", "dew_point_avg", "dew_point_min",
    "humidity_max", "humidity_avg", "humidity_min",
    "wind_speed_max", "wind_speed_avg", "wind_speed_min",
    "pressure_max", "pressure_avg", "pressure_min",
    "precipitation",
])

for y in range(1997, 2026)[::-1]:
    record_count = 0
    data = []
    first = True
    for m in range(1, 13)[::-1]:

        url = f'https://www.wunderground.com/history/monthly/th/bangkok/VTBD/date/{y}-{m}'
        print(f"Fetching data for {y}-{m}...")
        driver.get(url)
        time.sleep(5)  # Adjust as needed for content to load

        try:
            print(f"Extracting data for {y}-{m}...")
            table = driver.find_element(By.CSS_SELECTOR, "table.days")
            ttable = table.find_elements(By.CSS_SELECTOR, "table")

            for i, t in enumerate(ttable):
                rows = t.find_elements(By.CSS_SELECTOR, "tr")
                for j, r in enumerate(rows):
                    cells = r.find_elements(By.CSS_SELECTOR, "td")
                    if i == 0:
                        data.append([f'{c.text}-{m}-{y}' for c in cells])
                    else:
                        data[record_count + j].extend([c.text for c in cells])

            record_count += len(rows)     
            print(f'Successfully Extracted: {y}-{m}\n--------------------------------')
        except Exception as e:
            print(f"!!! Failed to extract {y}-{m}: {e}")

    temp_df = pd.DataFrame(data)
    temp_df = temp_df[temp_df[16] != 'Total']

    df = pd.DataFrame(
        data=temp_df.iloc[0:, 1:].values,
        index=temp_df[0],
        columns=columns,
        dtype=np.float64,
    )
    df.index = pd.to_datetime(df.index, format='%d-%m-%Y')
    df.index.name = 'date'

    df.to_csv("../data/weather/bangkok_weather.csv", mode='a', header=False, index=True)

driver.quit()

Fetching data for 2025-12...
Extracting data for 2025-12...
!!! Failed to extract 2025-12: Message: no such element: Unable to locate element: {"method":"css selector","selector":"table.days"}
  (Session info: chrome=129.0.6668.101); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF68BAEB095+29557]
	(No symbol) [0x00007FF68BA5FA50]
	(No symbol) [0x00007FF68B91B56A]
	(No symbol) [0x00007FF68B96F695]
	(No symbol) [0x00007FF68B96F8EC]
	(No symbol) [0x00007FF68B9BB777]
	(No symbol) [0x00007FF68B9971CF]
	(No symbol) [0x00007FF68B9B851C]
	(No symbol) [0x00007FF68B996F33]
	(No symbol) [0x00007FF68B96116F]
	(No symbol) [0x00007FF68B9622D1]
	GetHandleVerifier [0x00007FF68BE1C96D+3378253]
	GetHandleVerifier [0x00007FF68BE68497+3688311]
	GetHandleVerifier [0x00007FF68BE5D1CB+3642539]
	GetHandleVerifier [0x00007FF68BBAA6B6+813462]
	(No symbol) [0x00007FF68BA6AB5F

In [6]:
weather_df = pd.read_csv("../data/weather/bangkok_weather.csv", index_col=0, parse_dates=True)  

In [8]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 9767 entries, 2025-04-01 to 1997-01-31
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   temperature_max  9767 non-null   float64
 1   temperature_avg  9767 non-null   float64
 2   temperature_min  9767 non-null   float64
 3   dew_point_max    9767 non-null   float64
 4   dew_point_avg    9767 non-null   float64
 5   dew_point_min    9767 non-null   float64
 6   humidity_max     9767 non-null   float64
 7   humidity_avg     9767 non-null   float64
 8   humidity_min     9767 non-null   float64
 9   wind_speed_max   9767 non-null   float64
 10  wind_speed_avg   9767 non-null   float64
 11  wind_speed_min   9767 non-null   float64
 12  pressure_max     9767 non-null   float64
 13  pressure_avg     9767 non-null   float64
 14  pressure_min     9767 non-null   float64
 15  precipitation    9767 non-null   float64
dtypes: float64(16)
memory usage: 1.3 MB
