Skip to content

Finland (add more countries) #65

@thiagovmdon

Description

@thiagovmdon

Related to issue #5

Code for list of stations (and metadata #1):

Important: It seems that for a given spot, there are different id for the station according to the variable. So 894 refers to discharge, and perhaps there is another id in the same place for temperature and stage! If one just hold 894 while looking for temperature, they will retrieve an empty df...

import requests
import pandas as pd
import numpy as np
from pyproj import Transformer

def get_finnish_hydro_metadata() -> pd.DataFrame:
    """
    Download and return Finnish Environment Institute (SYKE)
    hydrological station metadata.

    - Keeps ALL original columns from the API.
    - Renames only:
        Paikka_Id → gauge_id
        Nimi → station_name
        PaaVesalNimi → river
        (computed) latitude / longitude
        NimiEng → type
    - Adds or fills standardized fields if missing:
        ['gauge_id', 'station_name', 'river', 'latitude', 'longitude',
         'altitude', 'area', 'country', 'source', 'type']
    - Adds constants:
        country='Finland', source='SYKE Hydrologiarajapinta'
    - Coordinates are converted from ETRS-TM35FIN (EPSG:3067) to WGS84 (EPSG:4326),
      falling back to DMS-style KoordLat/KoordLong if needed.
    """

    base_url = "http://rajapinnat.ymparisto.fi/api/Hydrologiarajapinta/1.1/odata/"
    headers = {"Accept": "application/json"}

    # --- Fetch station metadata (Paikka) ---
    url = base_url + "Paikka"

    all_records = []
    while url:
        r = requests.get(url, headers=headers, timeout=30)
        r.raise_for_status()
        data = r.json()
        all_records.extend(data["value"])
        url = data.get("odata.nextLink")
        if url:
            #print(f"Fetching next page: {url}")
            1+1
    df_paikka = pd.DataFrame(all_records)

    # --- Fetch variable (Suure) metadata ---
    var_url = base_url + "Suure"

    r = requests.get(var_url, headers=headers, timeout=30)
    r.raise_for_status()
    df_suure = pd.DataFrame(r.json()["value"])[["Suure_Id", "NimiEng", "Yksikko"]]

    # --- Merge station info with variable names ---
    df = df_paikka.merge(df_suure, on="Suure_Id", how="left")

    # --- Coordinate conversion setup ---
    transformer = Transformer.from_crs(3067, 4326, always_xy=True)

    def dms_str_to_decimal(s):
        """Convert DMS-like string (e.g., '622536') to decimal degrees."""
        s = str(s).strip()
        if not s or not s.isdigit():
            return None
        deg = int(s[:-4])
        minutes = int(s[-4:-2])
        seconds = int(s[-2:])
        return deg + minutes / 60 + seconds / 3600

    def get_coords(row):
        e, n = row.get("KoordErTmIta"), row.get("KoordErTmPohj")
        if pd.notna(e) and pd.notna(n):
            try:
                lon, lat = transformer.transform(e, n)
                return lat, lon
            except Exception:
                pass
        # fallback: parse DMS-style KoordLat/KoordLong
        lat = dms_str_to_decimal(row.get("KoordLat"))
        lon = dms_str_to_decimal(row.get("KoordLong"))
        return lat, lon

    coords = df.apply(get_coords, axis=1)
    df["latitude"], df["longitude"] = zip(*coords)

    # --- Rename standardized columns (keep all others) ---
    rename_map = {
        "Paikka_Id": "gauge_id",
        "Nimi": "station_name",
        "PaaVesalNimi": "river",
        "NimiEng": "type",
    }
    df = df.rename(columns=rename_map)

    # --- Ensure standardized columns exist ---
    std_cols = [
        "gauge_id",
        "station_name",
        "river",
        "latitude",
        "longitude",
        "altitude",
        "area",
        "country",
        "source",
        "type",
    ]
    for col in std_cols:
        if col not in df.columns:
            df[col] = np.nan

    # --- Add constants / overwrite ---
    df["country"] = "Finland"
    df["source"] = "SYKE Hydrologiarajapinta"

    # --- Convert numeric safely ---
    if "latitude" in df.columns:
        df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce")
    if "longitude" in df.columns:
        df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce")

    return df.reset_index(drop=True)


    return df_std

Example usage:

df = get_finnish_hydro_metadata()

print(df)

Code for downloading the data

import requests
import pandas as pd
from datetime import datetime
from typing import Optional

def get_syke_data(
    station_code: str,
    variable: str = "discharge",
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    verbose: bool = False,
) -> pd.DataFrame:
    """
    Download and parse hydrological data from the SYKE Hydrologiarajapinta API.

    Parameters
    ----------
    station_code : str
        Station identifier (Paikka_Id), e.g., "894"
    variable : str
        One of: 'discharge', 'stage', 'temperature'
    start_date : str, optional
        Start date in 'YYYY-MM-DD' format
    end_date : str, optional
        End date in 'YYYY-MM-DD' format
    verbose : bool
        Print progress info.

    Returns
    -------
    pd.DataFrame
        Columns: ['time', variable]
    """

    variable = variable.lower()
    var_map = {
        "discharge": "Virtaama",
        "stage": "Vedenkorkeus",
        "temperature": "LampoPintavesi",
    }
    if variable not in var_map:
        raise ValueError("Variable must be 'discharge', 'stage', or 'temperature'.")

    base_url = f"http://rajapinnat.ymparisto.fi/api/Hydrologiarajapinta/1.1/odata/{var_map[variable]}"
    headers = {"Accept": "application/json"}

    # Build filter
    filter_q = f"?$filter=Paikka_Id eq {station_code}"
    if start_date and end_date:
        start_iso = f"{start_date}T00:00:00"
        end_iso = f"{end_date}T00:00:00"
        filter_q += f" and Aika ge datetime'{start_iso}' and Aika le datetime'{end_iso}'"

    url = base_url + filter_q
    all_records = []
    pages = 0

    if verbose:
        print(f"Fetching {variable} data for station {station_code}...")

    try:
        while url:
            if verbose:
                print(f"→ Requesting: {url}")
            r = requests.get(url, headers=headers, timeout=30)
            if r.status_code == 404:
                if verbose:
                    print("No data found for this station.")
                return pd.DataFrame(columns=["time", variable])

            r.raise_for_status()
            data = r.json()
            records = data.get("value", [])
            if not records:
                break

            all_records.extend(records)
            url = data.get("odata.nextLink")
            pages += 1

        if not all_records:
            if verbose:
                print("No records returned for the specified period.")
            return pd.DataFrame(columns=["time", variable])

        # Convert to DataFrame
        df = pd.DataFrame(all_records)
        if "Aika" not in df or "Arvo" not in df:
            if verbose:
                print("Missing expected fields in API response.")
            return pd.DataFrame(columns=["time", variable])

        df["time"] = pd.to_datetime(df["Aika"], errors="coerce")
        df[variable] = pd.to_numeric(df["Arvo"], errors="coerce")
        df = df[["time", variable]].dropna(subset=["time"]).sort_values("time")
        
        if start_date:
            df = df[df["time"] >= start_date]
        if end_date:
            df = df[df["time"] <= end_date]
        if verbose:
            print(f"Retrieved {len(df)} records in {pages} page(s).")

        return df.reset_index(drop=True)

    except requests.exceptions.RequestException as e:
        if verbose:
            print(f"Request failed: {e}")
        return pd.DataFrame(columns=["time", variable])
    except Exception as e:
        if verbose:
            print(f"Unexpected error: {e}")
        return pd.DataFrame(columns=["time", variable])

Example usage

# Example: get discharge data for station 894 in year 2000
df = get_syke_data(
    station_code=894,
    variable="discharge",
    start_date="2000-01-01",
    end_date="2001-01-01",
)

print(df)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions