In [0]:
from pyspark.sql.functions import *
from datetime import datetime
import time
import pandas as pd
import requests

In [0]:
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
bronze_table = "endepunkt_bronze"

# Liste over kommune-IDer du √∏nsker √• hente data for
kommune_ider = ["4621", "4618", "3411", "3422"]

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {bronze_table} (
    nodeid STRING,
    x DOUBLE,
    y DOUBLE,
    wkt STRING,
    kommune_id STRING,
    hentet_tid TIMESTAMP
) USING DELTA
"""
spark.sql(q)

In [0]:
def hent_wkt_koordinater(kommune_id: str, srid="UTM33", max_retries=10):
    """
    Henter WKT-geometri og koordinater for alle noder fra NVDB-apiet.
    """
    node_url = f"https://nvdbapiles.atlas.vegvesen.no/vegnett/api/v4/noder"
    veglenkesekvens_url = (
        f"https://nvdbapiles.atlas.vegvesen.no/vegnett/api/v4/veglenkesekvenser"
    )
    headers = {
        "Accept": "application/json",
        "X-Client": "Systemet for vegobjekter",
    }
    node_params = {"srid": srid, "kommune": kommune_id}
    veglenkesekvens_params = {
        "srid": srid,
        "kommune": kommune_id,
        "vegsystemreferanse": "S",
    }

    nodes = []
    attempt = 0
    while True:
        try:
            response = requests.get(
                veglenkesekvens_url,
                headers=headers,
                params=veglenkesekvens_params,
                timeout=10,
            )
            time.sleep(0.5)
            response.raise_for_status()
            data = response.json()
            objects = data.get("objekter", [])
            for obj in objects:
                porter = obj.get("porter", [])
                for p in porter:
                    nodes.append(p.get("tilkobling", {}).get("nodeid"))
            next_start = data.get("metadata", {}).get("neste", {}).get("start")
            if not next_start:
                break
            veglenkesekvens_params["start"] = next_start

        except requests.exceptions.RequestException as e:
            wait = 2**attempt
            print(
                f"[{kommune_id}] ‚ö†Ô∏è Feil ved henting ({e}), pr√∏ver igjen om {wait}s..."
            )
            time.sleep(wait)
            attempt += 1
            if attempt >= max_retries:
                raise e
            continue

    nodeid_dict = {}
    attempt = 0
    while True:
        try:
            response = requests.get(
                node_url, headers=headers, params=node_params, timeout=10
            )
            time.sleep(0.5)
            response.raise_for_status()
            data = response.json()
            objects = data.get("objekter", [])

            for obj in objects:
                if obj["id"] in nodes:
                    porter = obj.get("porter", [])
                    if len(porter) == 1:
                        portnummer = porter[0].get("tilkobling", {}).get("portnummer")
                        real = portnummer in (1, 2)
                    else:
                        real = False
                    wkt = obj.get("geometri", {}).get("wkt")
                    if wkt and wkt.startswith("POINT Z"):
                        coords = (
                            wkt.replace("POINT Z", "")
                            .replace("(", "")
                            .replace(")", "")
                            .split()
                        )
                        x, y = float(coords[0]), float(coords[1])
                    else:
                        x, y = None, None

                    nodeid_dict[obj["id"]] = {
                        "x": x,
                        "y": y,
                        "wkt": wkt,
                        "real": real,
                    }
            next_start = data.get("metadata", {}).get("neste", {}).get("start")
            if not next_start:
                break
            node_params["start"] = next_start

        except requests.exceptions.RequestException as e:
            wait = 2**attempt
            print(
                f"[{kommune_id}] ‚ö†Ô∏è Feil ved henting ({e}), pr√∏ver igjen om {wait}s..."
            )
            time.sleep(wait)
            attempt += 1
            if attempt >= max_retries:
                raise e
            continue

    return nodeid_dict

In [0]:
for kommune_id in kommune_ider:
    print(f"üì° Henter data for kommune {kommune_id}")

    rows = []
    nodeid_dict = hent_wkt_koordinater(kommune_id)
    for nodeid, row in nodeid_dict.items():
        real = row["real"]
        wkt = row["wkt"]
        x = row["x"]
        y = row["y"]
        if real and wkt and x and y:
            rows.append(
                {
                    "nodeid": nodeid,
                    "x": x,
                    "y": y,
                    "wkt": wkt,
                    "kommune_id": kommune_id,
                    "hentet_tid": datetime.now(),
                }
            )

    if rows:
        df_bronze = spark.createDataFrame(rows)
        bronze_table_schema = spark.table(bronze_table).schema
        df_bronze = df_bronze.select(
            [
                col(field.name).cast(field.dataType).alias(field.name)
                for field in bronze_table_schema.fields
            ]
        )

        existing = (
            spark.read.table(bronze_table)
            .filter(col("kommune_id") == kommune_id)
            .select("nodeid")
        )

        # Anti-join: fjern alt som finnes fra f√∏r
        df_new = df_bronze.join(existing, on="nodeid", how="left_anti")

        new_rows = df_new.count()
        if new_rows > 0:
            df_new.write.format("delta").mode("append").saveAsTable(bronze_table)
            print(
                f"‚úÖ Skrev {new_rows} nye rader for kommune {kommune_id} til bronze-tabellen"
            )
        else:
            print(f"‚ö†Ô∏è Ingen nye rader for kommune {kommune_id}")
    else:
        print(f"‚ö†Ô∏è Ingen gyldige rader for kommune {kommune_id}")