-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Related to issue #5
Code for list of stations (and metadata #1):
Important: It seems that for a given spot, there are different id for the station according to the variable. So 894 refers to discharge, and perhaps there is another id in the same place for temperature and stage! If one just hold 894 while looking for temperature, they will retrieve an empty df...
import requests
import pandas as pd
import numpy as np
from pyproj import Transformer
def get_finnish_hydro_metadata() -> pd.DataFrame:
"""
Download and return Finnish Environment Institute (SYKE)
hydrological station metadata.
- Keeps ALL original columns from the API.
- Renames only:
Paikka_Id → gauge_id
Nimi → station_name
PaaVesalNimi → river
(computed) latitude / longitude
NimiEng → type
- Adds or fills standardized fields if missing:
['gauge_id', 'station_name', 'river', 'latitude', 'longitude',
'altitude', 'area', 'country', 'source', 'type']
- Adds constants:
country='Finland', source='SYKE Hydrologiarajapinta'
- Coordinates are converted from ETRS-TM35FIN (EPSG:3067) to WGS84 (EPSG:4326),
falling back to DMS-style KoordLat/KoordLong if needed.
"""
base_url = "http://rajapinnat.ymparisto.fi/api/Hydrologiarajapinta/1.1/odata/"
headers = {"Accept": "application/json"}
# --- Fetch station metadata (Paikka) ---
url = base_url + "Paikka"
all_records = []
while url:
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
data = r.json()
all_records.extend(data["value"])
url = data.get("odata.nextLink")
if url:
#print(f"Fetching next page: {url}")
1+1
df_paikka = pd.DataFrame(all_records)
# --- Fetch variable (Suure) metadata ---
var_url = base_url + "Suure"
r = requests.get(var_url, headers=headers, timeout=30)
r.raise_for_status()
df_suure = pd.DataFrame(r.json()["value"])[["Suure_Id", "NimiEng", "Yksikko"]]
# --- Merge station info with variable names ---
df = df_paikka.merge(df_suure, on="Suure_Id", how="left")
# --- Coordinate conversion setup ---
transformer = Transformer.from_crs(3067, 4326, always_xy=True)
def dms_str_to_decimal(s):
"""Convert DMS-like string (e.g., '622536') to decimal degrees."""
s = str(s).strip()
if not s or not s.isdigit():
return None
deg = int(s[:-4])
minutes = int(s[-4:-2])
seconds = int(s[-2:])
return deg + minutes / 60 + seconds / 3600
def get_coords(row):
e, n = row.get("KoordErTmIta"), row.get("KoordErTmPohj")
if pd.notna(e) and pd.notna(n):
try:
lon, lat = transformer.transform(e, n)
return lat, lon
except Exception:
pass
# fallback: parse DMS-style KoordLat/KoordLong
lat = dms_str_to_decimal(row.get("KoordLat"))
lon = dms_str_to_decimal(row.get("KoordLong"))
return lat, lon
coords = df.apply(get_coords, axis=1)
df["latitude"], df["longitude"] = zip(*coords)
# --- Rename standardized columns (keep all others) ---
rename_map = {
"Paikka_Id": "gauge_id",
"Nimi": "station_name",
"PaaVesalNimi": "river",
"NimiEng": "type",
}
df = df.rename(columns=rename_map)
# --- Ensure standardized columns exist ---
std_cols = [
"gauge_id",
"station_name",
"river",
"latitude",
"longitude",
"altitude",
"area",
"country",
"source",
"type",
]
for col in std_cols:
if col not in df.columns:
df[col] = np.nan
# --- Add constants / overwrite ---
df["country"] = "Finland"
df["source"] = "SYKE Hydrologiarajapinta"
# --- Convert numeric safely ---
if "latitude" in df.columns:
df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce")
if "longitude" in df.columns:
df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce")
return df.reset_index(drop=True)
return df_std
Example usage:
df = get_finnish_hydro_metadata()
print(df)
Code for downloading the data
import requests
import pandas as pd
from datetime import datetime
from typing import Optional
def get_syke_data(
station_code: str,
variable: str = "discharge",
start_date: Optional[str] = None,
end_date: Optional[str] = None,
verbose: bool = False,
) -> pd.DataFrame:
"""
Download and parse hydrological data from the SYKE Hydrologiarajapinta API.
Parameters
----------
station_code : str
Station identifier (Paikka_Id), e.g., "894"
variable : str
One of: 'discharge', 'stage', 'temperature'
start_date : str, optional
Start date in 'YYYY-MM-DD' format
end_date : str, optional
End date in 'YYYY-MM-DD' format
verbose : bool
Print progress info.
Returns
-------
pd.DataFrame
Columns: ['time', variable]
"""
variable = variable.lower()
var_map = {
"discharge": "Virtaama",
"stage": "Vedenkorkeus",
"temperature": "LampoPintavesi",
}
if variable not in var_map:
raise ValueError("Variable must be 'discharge', 'stage', or 'temperature'.")
base_url = f"http://rajapinnat.ymparisto.fi/api/Hydrologiarajapinta/1.1/odata/{var_map[variable]}"
headers = {"Accept": "application/json"}
# Build filter
filter_q = f"?$filter=Paikka_Id eq {station_code}"
if start_date and end_date:
start_iso = f"{start_date}T00:00:00"
end_iso = f"{end_date}T00:00:00"
filter_q += f" and Aika ge datetime'{start_iso}' and Aika le datetime'{end_iso}'"
url = base_url + filter_q
all_records = []
pages = 0
if verbose:
print(f"Fetching {variable} data for station {station_code}...")
try:
while url:
if verbose:
print(f"→ Requesting: {url}")
r = requests.get(url, headers=headers, timeout=30)
if r.status_code == 404:
if verbose:
print("No data found for this station.")
return pd.DataFrame(columns=["time", variable])
r.raise_for_status()
data = r.json()
records = data.get("value", [])
if not records:
break
all_records.extend(records)
url = data.get("odata.nextLink")
pages += 1
if not all_records:
if verbose:
print("No records returned for the specified period.")
return pd.DataFrame(columns=["time", variable])
# Convert to DataFrame
df = pd.DataFrame(all_records)
if "Aika" not in df or "Arvo" not in df:
if verbose:
print("Missing expected fields in API response.")
return pd.DataFrame(columns=["time", variable])
df["time"] = pd.to_datetime(df["Aika"], errors="coerce")
df[variable] = pd.to_numeric(df["Arvo"], errors="coerce")
df = df[["time", variable]].dropna(subset=["time"]).sort_values("time")
if start_date:
df = df[df["time"] >= start_date]
if end_date:
df = df[df["time"] <= end_date]
if verbose:
print(f"Retrieved {len(df)} records in {pages} page(s).")
return df.reset_index(drop=True)
except requests.exceptions.RequestException as e:
if verbose:
print(f"Request failed: {e}")
return pd.DataFrame(columns=["time", variable])
except Exception as e:
if verbose:
print(f"Unexpected error: {e}")
return pd.DataFrame(columns=["time", variable])
Example usage
# Example: get discharge data for station 894 in year 2000
df = get_syke_data(
station_code=894,
variable="discharge",
start_date="2000-01-01",
end_date="2001-01-01",
)
print(df)
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels