In [None]:
import datetime
import pprint

import requests

import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def fetch_datastream(sta_url, datastream_id, start_datetime=None, end_datetime=None):
    """Return a pandas dataframe with the data and a dictionary with the metadata.

    Parameters:
    -----------
    sta_url : str
        Base url of the STA server. Something like https://sta.gfz-potsdam.de
        or https://service.tereno.net/eifelrur.sta.stamplate
    datastream_id : int
        The id of the datastream to fetch.
    start_datetime: Optional[datetime.datetime]
        The optinal start date for that we want to query data. If specified no earlier data
        will be fetched.
    end_datetime: Optional[datetime.datetime]
        The optinal end date for the query. If set no later data will be fetched.

    Returns
    -------
    df : pd.DataFrame
        Dataframe that contains a date column and a column with the integrated data.
        The `date` column will be read from the resultTime value in the STA server.
        The column with data is form the result value and will be set with the column name
        of the observed property. This way we return "Air temperature", "Soil moisture", etc.
    metadata: dict
        A dict that contains all the STA metadata of the datastream including information
        about the observed property, the thing, unit of measure and the sensor.
    """
    metadata_url = f"{sta_url}/v1.1/Datastreams({datastream_id})"
    metadata_response = requests.get(metadata_url, {
        "$expand": "Thing($expand=Locations),Sensor,ObservedProperty",
    })
    metadata_response.raise_for_status()
    metadata = metadata_response.json()

    data_url = f"{sta_url}/v1.1/Datastreams({datastream_id})/Observations"
    # https://fraunhoferiosb.github.io/FROST-Server/sensorthingsapi/requestingData/STA-Tailoring-Responses.html
    # Top is the value for the page size.
    top = 1000
    skip = 0
    fetch_next_page = True
    collected_data = []
    output_column = metadata["ObservedProperty"]["name"]
    filters = []
    if start_datetime is not None:
        filters.append(f"resultTime ge {start_datetime.isoformat()}")
    if end_datetime is not None:
        filters.append(f"resultTime le {end_datetime.isoformat()}")
    common_params_for_data_queries = {
        "$top": top,
    }
    if filters:
        common_params_for_data_queries["$filter"] = " and ".join(filters)
    while fetch_next_page:
        data_response = requests.get(data_url, {"$skip": skip, **common_params_for_data_queries})
        data_response.raise_for_status()
        skip += top
        data = data_response.json()["value"]
        for data_point in data:
            collected_data.append({
                # The python standard lib can't parse values like 2023-11-16T00:00:00Z
                # However, it can parse 2023-11-16T00:00:00+00:00 - which is equivalent.
                "date": datetime.datetime.fromisoformat(data_point["resultTime"].replace("Z", "+00:00")),
                output_column: data_point["result"]
            })
        fetch_next_page = len(data) > 0

    df = pd.DataFrame(collected_data)
        
    return df, metadata

In [None]:
sta_url = "https://sta.gfz-potsdam.de"
# The datastream_link_id for the alaska mountains in SMS is 610.
# I looked that up in the MaTS database.
datastream_id = 607

In [None]:
df, metadata = fetch_datastream(sta_url, datastream_id)

# For subsets
# df, metadata = fetch_datastream(
#     sta_url, 
#     datastream_id, 
#     start_datetime=datetime.datetime(2015, 1, 1, 0, 0, 0, 0, datetime.timezone.utc),
#     end_datetime=datetime.datetime(2016, 1, 1, 0, 0, 0, 0, datetime.timezone.utc),
# )

In [None]:
pprint.pprint(metadata)

In [None]:
df.head()

In [None]:
plt.plot(df["date"], df["Terrestrial Water Storage Anomaly"])