In [1]:
from sympy import *
init_printing()

In [None]:
import requests
import pandas as pd
from time import sleep

# ---------------------------------------------
# 1) KONFIGURATION
# ---------------------------------------------
API_KEY = "56642815-d535-418b-bda2-30a8a8ef4999"
DMI_URL = "https://dmigw.govcloud.dk/v2/metObs/collections/observation/items"
STATION = "06102"
START = pd.Timestamp(2002, 1, 1, tz="UTC")
END   = pd.Timestamp(2023, 12, 31, 23, tz="UTC")

PARAMETERS = [
    "temp_min_past1h", "temp_max_past1h", "temp_mean_past1h",
    "humidity_past1h", "pressure",
    "wind_speed_past1h", "wind_gust_always_past1h", "wind_dir_past1h",
    "precip_past1h", "precip_dur_past1h",
    "radia_glob_past1h", "sun_last1h_glob"
]

# ---------------------------------------------
# 2) HENT VEJRET MED PAGINATION
# ---------------------------------------------
def fetch_paginated_param(session, param_id, max_retries=4):
    print(f"Henter {param_id} ...")
    url = DMI_URL
    params = {
        "api-key": API_KEY,
        "stationId": STATION,
        "parameterId": param_id,
        "datetime": f"{START.isoformat()}/{END.isoformat()}",
        "limit": 10000
    }

    all_feats = []
    retries = 0

    while url:
        try:
            r = session.get(url, params=params, timeout=600)
            r.raise_for_status()
            js = r.json()
            feats = js.get("features", [])
            all_feats.extend(feats)

            next_links = [link.get("href") for link in js.get("links", []) if link.get("rel") == "next"]
            url = next_links[0] if next_links else None
            params = None
            retries = 0   # nulstil ved success
            sleep(0.1)
        except requests.exceptions.ReadTimeout:
            retries += 1
            if retries > max_retries:
                raise Exception(f"Timeout gentaget {max_retries} gange for {param_id}")
            print(f"⚠ Timeout – prøver igen ({retries}/{max_retries}) ...")
            sleep(2 * retries)  # vent lidt mere for hver retry

    print(f"  → {len(all_feats):,} rækker hentet")
    return all_feats


session = requests.Session()
dfs = []

for param in PARAMETERS:
    feats = fetch_paginated_param(session, param)
    if feats:
        df = pd.json_normalize(feats)
        df["time"] = pd.to_datetime(df["properties.observed"])
        df = df[["time", "properties.value"]].rename(columns={"properties.value": param}).set_index("time")
        dfs.append(df)

weather = pd.concat(dfs, axis=1).sort_index()
weather = weather[~weather.index.duplicated(keep="first")]

print("SAMLET VEJR‑DATA:", weather.shape)

# ---------------------------------------------
# 3) LÆS VANDSTAND + KONVERTER TIL UTC
# ---------------------------------------------
water_raw = pd.read_csv(
    "/Users/nicolaigarderhansen/Desktop/Bachelorprojekt/Data/28.02_Vandstand (DMP)_Time.csv",
    parse_dates=["time"],
    index_col="time"
).sort_index()

water = (
    water_raw
    .tz_localize("Europe/Copenhagen")
    .tz_convert("UTC")
    .loc[START:END]
    .rename(columns=lambda c: "vandstand")
)

print("VANDSTAND‑DATA:", water.shape)

# ---------------------------------------------
# 4) SAMMENFØJ & GEM
# ---------------------------------------------
data = weather.join(water, how="outer")
print("SAMLET DATA:", data.shape)

data.to_csv("model_data_hourly_raw_UTC.csv")
print("✔ Gemte: model_data_hourly_raw_UTC.csv")

# Peek
print(data.head())

Henter temp_min_past1h ...
