In [6]:
!pip install pandas

Collecting pandas
  Using cached pandas-3.0.0-cp314-cp314-win_amd64.whl.metadata (19 kB)
Collecting numpy>=2.3.3 (from pandas)
  Using cached numpy-2.4.2-cp314-cp314-win_amd64.whl.metadata (6.6 kB)
Collecting tzdata (from pandas)
  Using cached tzdata-2025.3-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-3.0.0-cp314-cp314-win_amd64.whl (9.9 MB)
Using cached numpy-2.4.2-cp314-cp314-win_amd64.whl (12.4 MB)
Using cached tzdata-2025.3-py2.py3-none-any.whl (348 kB)
Installing collected packages: tzdata, numpy, pandas

   ---------------------------------------- 0/3 [tzdata]
   ------------- -------------------------- 1/3 [numpy]
   ------------- -------------------------- 1/3 [numpy]
   ------------- -------------------------- 1/3 [numpy]
   ------------- -------------------------- 1/3 [numpy]
   ------------- -------------------------- 1/3 [numpy]
   ------------- -------------------------- 1/3 [numpy]
   ------------- -------------------------- 1/3 [numpy]
   ------------- ---

In [7]:
!pip install requests

Collecting requests
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting charset_normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl.metadata (38 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.11-py3-none-any.whl.metadata (8.4 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.6.3-py3-none-any.whl.metadata (6.9 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2026.1.4-py3-none-any.whl.metadata (2.5 kB)
Using cached requests-2.32.5-py3-none-any.whl (64 kB)
Using cached charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl (107 kB)
Using cached idna-3.11-py3-none-any.whl (71 kB)
Using cached urllib3-2.6.3-py3-none-any.whl (131 kB)
Using cached certifi-2026.1.4-py3-none-any.whl (152 kB)
Installing collected packages: urllib3, idna, charset_normalizer, certifi, requests

   ---------------------------------------- 0/5 [urllib3]
   --------------------------

In [8]:
import requests
import pandas as pd

load_url = "https://api-v3.thaiwater.net/api/v1/thaiwater30/public/waterlevel_load"
headers = {"User-Agent": "Mozilla/5.0", "Accept": "application/json"}

payload = requests.get(load_url, headers=headers, timeout=30).json()

stations = payload["waterlevel_data"]["data"]
df_stations = pd.json_normalize(stations)

# Keep only what we need
cols = ["id", "station_type", "station.tele_station_name.th"]
keep = [c for c in cols if c in df_stations.columns]
df_stations = df_stations[keep].drop_duplicates()

print(df_stations.head())
df_stations.to_csv("thaiwater_station_list.csv", index=False, encoding="utf-8-sig")
print("Saved thaiwater_station_list.csv, rows:", len(df_stations))

           id     station_type station.tele_station_name.th
0  1203439728  tele_waterlevel              บ้านขนงพระเหนือ
1  1203467836  tele_waterlevel                        ปากรอ
2  1203467504  tele_waterlevel   สะพานธรรมจักร(วัดธรรมามูล)
3  1203467319  tele_waterlevel           สะพานแม่น้ำแควใหญ่
4  1203467477  tele_waterlevel                    คลองชะอวด
Saved thaiwater_station_list.csv, rows: 733


In [None]:
import requests
import pandas as pd
from time import sleep

graph_url = "https://api-v3.thaiwater.net/api/v1/thaiwater30/public/waterlevel_graph_year"
headers = {"User-Agent": "Mozilla/5.0", "Accept": "application/json"}

years = range(2018, 2027)  # adjust range
all_rows = []

for _, row in df_stations.iterrows():
    station_id = str(row["id"])
    station_type = row["station_type"]
    station_name = row.get("station.tele_station_name.th", "")

    for year in years:
        params = {
            "station_type": station_type,
            "station_id": station_id,
            "year": str(year)
        }

        try:
            r = requests.get(graph_url, params=params, headers=headers, timeout=30)
            if r.status_code != 200:
                continue

            p = r.json()
            gd = p.get("data", {}).get("graph_data", [])
            if not gd:
                continue

            year_entry = next((x for x in gd if str(x.get("year")) == str(year)), None)
            if not year_entry:
                continue

            df = pd.DataFrame(year_entry.get("data", []))
            if df.empty:
                continue

            df["ds"] = pd.to_datetime(df["datetime"], errors="coerce")
            df["y"] = pd.to_numeric(df.get("waterlevel_msl"), errors="coerce")

            df["station_id"] = station_id
            df["station_type"] = station_type
            df["station_name_th"] = station_name
            df["year"] = year

            df = df.dropna(subset=["ds", "y"])

            all_rows.append(df[["station_id", "station_type", "station_name_th", "ds", "y", "year"]])

            # be nice to the API
            sleep(0.1)

        except Exception as e:
            print("Error:", station_id, station_type, year, e)

ts_all = pd.concat(all_rows, ignore_index=True)
ts_all = ts_all.sort_values(["station_id", "ds"]).reset_index(drop=True)

ts_all.to_csv("thaiwater_all_stations_history.csv", index=False, encoding="utf-8-sig")
print("Saved thaiwater_all_stations_history.csv, rows:", len(ts_all))