Skip to content

Sweden (add more countries) #30

@thiagovmdon

Description

@thiagovmdon

Sweden

Related to issue #5

This code is adapted from the SMHI official API, and just uses their structure to organize a reusable function. Not only covers streamflow, but other variables.

Code for list of stations (and metadata #1) and time series.

The time series also offer the option of real time data #4 ("latest-day")

import requests
import pandas as pd
import io
from typing import Union, List, Optional

# Code using the base SMHI HydroObs API to fetch station metadata and time series data.
# Documentation (in Swedish) can be found here:
# https://opendata.smhi.se/hydroobs/api

BASE_URL = "https://opendata-download-hydroobs.smhi.se/api/version/latest/"

SUPPORTED_PARAMETERS = {
    "discharge_daily": 1,
    "discharge_15min": 2,
    "water_level": 3,
    "water_temperature": 4,
    "ice_formation": 5,
    "ice_breakup": 6,
    "ice_thickness": 7,
    "snow_density": 8,
    "water_content": 9,
    "discharge_monthly": 10,
}
'''
| ID | Swedish Name              | English Name             | Unit   |
|----|---------------------------|--------------------------|--------|
| 1  | Vattenföring (Dygn)       | Discharge (Daily)        | m³/s   |
| 2  | Vattenföring (15 min)     | Discharge (15 min)       | m³/s   |
| 3  | Vattenstånd               | Water level              | cm     |
| 4  | Vattendragstemperatur     | Water temperature        | °C     |
| 5  | Isläggning                | Ice formation date       | —      |
| 6  | Islossning                | Ice breakup date         | —      |
| 7  | Istjocklek                | Ice thickness            | cm     |
| 8  | Snödensitet               | Snow density             | g/cm³  |
| 9  | Vatteninnehåll            | Water content (SWE)      | mm     |
| 10 | Vattenföring (Månad)      | Discharge (Monthly)      | m³/s   |
"""
''' 
SUPPORTED_PERIODS = ["latest-day", "latest-months", "corrected-archive", "observations"]


def _get_parameters() -> pd.DataFrame:
    """Return a list of available HydroObs parameters."""
    url = f"{BASE_URL}parameter.json"
    r = requests.get(url)
    r.raise_for_status()
    data = r.json()["parameter"]
    return pd.DataFrame(data)


def _get_station_metadata(parameter_id: int) -> pd.DataFrame:
    """Get metadata for all stations measuring a given parameter (e.g. discharge)."""
    url = f"{BASE_URL}parameter/{parameter_id}.json"
    r = requests.get(url)
    r.raise_for_status()
    data = r.json()["station"]

    df = pd.DataFrame(
        [
            {
                "id": s["id"],
                "name": s["name"],
                "latitude": s["latitude"],
                "longitude": s["longitude"],
                "from": pd.to_datetime(s["from"], unit="ms"),
                "to": pd.to_datetime(s["to"], unit="ms"),
            }
            for s in data
        ]
    )

    df["years"] = (df["to"] - df["from"]).dt.days / 365.25
    df["active"] = df["to"] > pd.Timestamp.today()
    return df


def get_metadata(parameter: str = "discharge_daily") -> pd.DataFrame:
    """Get metadata for stations measuring a given parameter (e.g. discharge, temperature)."""
    
    if parameter not in SUPPORTED_PARAMETERS:
        raise ValueError(f"Unsupported parameter: {parameter}")
    
    parameter_id = SUPPORTED_PARAMETERS[parameter]
    metadata = _get_station_metadata(parameter_id)
    metadata["parameter_id"] = parameter_id
    metadata["parameter_name"] = parameter
    return metadata



import re

def get_data(
    station_id: int,
    parameter: Union[str, int] = "discharge",
    period: str = "corrected-archive",
    start: Optional[str] = None,
    end: Optional[str] = None,
    verbose: bool = True,
) -> pd.DataFrame:
    """
    Download discharge (or other parameter) data for a specific SMHI station.

    Handles SMHI's messy CSVs by:
      - reading only lines that start with a date (YYYY-MM-DD)
      - ignoring metadata, comments, or notes
    """

    # --- parameter setup
    if isinstance(parameter, str):
        if parameter not in SUPPORTED_PARAMETERS:
            raise ValueError(f"Unsupported parameter: {parameter}")
        parameter_id = SUPPORTED_PARAMETERS[parameter]
    else:
        parameter_id = parameter

    if period not in SUPPORTED_PERIODS:
        raise ValueError(f"Invalid period: must be one of {SUPPORTED_PERIODS}")

    url = f"{BASE_URL}parameter/{parameter_id}/station/{station_id}/period/{period}/data.csv"
    if verbose:
        print(f"Downloading data from {url}")

    r = requests.get(url)
    r.raise_for_status()
    lines = r.text.splitlines()

    # --- regex for lines that start with date like "1984-08-17"
    date_pattern = re.compile(r"^\d{4}-\d{2}-\d{2}")

    records = []
    for line in lines:
        if date_pattern.match(line):
            parts = line.split(";")
            date = parts[0].strip()
            value = parts[1].strip() if len(parts) > 1 else None
            quality = parts[2].strip() if len(parts) > 2 else None
            records.append((date, value, quality))

    df = pd.DataFrame(records, columns=["date", "value", "quality"])

    # --- clean and convert
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df["value"] = pd.to_numeric(df["value"], errors="coerce")
    df = df.dropna(subset=["date", "value"]).sort_values("date")

    if start:
        df = df[df["date"] >= pd.to_datetime(start)]
    if end:
        df = df[df["date"] <= pd.to_datetime(end)]

    if verbose:
        if not df.empty:
            print(f"Final dataset: {len(df)} rows from {df['date'].min().date()} to {df['date'].max().date()}")
        else:
            print("No valid numeric rows found after cleanup")

    return df.reset_index(drop=True)

Example of usage:

# Discharge stations (default)
meta_q = get_metadata()
print(meta_q.head())

# Water temperature stations
meta_temp = get_metadata("water_temperature")
print(meta_temp.head())

# Full time series 
df = get_data(2357, "discharge_daily", period="corrected-archive")

# Latest day #4 
df = get_data(2357, "discharge_daily", period="latest-day")

# Given interval
df = get_data(2357, "discharge_daily", start="2015-01-01", end="2024-12-31")


Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions