In [0]:
%pip install python-dotenv

In [0]:
import os
import time
import requests
from pyspark.sql.functions import lit
from delta.tables import DeltaTable
from dotenv import load_dotenv

In [0]:
IMAGE_SIZE = [512, 512]
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")

nyendepunkt_silver = "utenfotottid_nyendepunkt_silver"

In [0]:
endepunkt_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreendepunkt_images/"
os.makedirs(endepunkt_image_output_dir, exist_ok=True)

In [0]:
load_dotenv()
BRUKERID = os.getenv("GEONORGE_BRUKERID")
PASSORD  = os.getenv("GEONORGE_PASSORD")

def get_token():
    """
    Hender token fra GeoNorge og returnerer det.
    """
    url = (
        f"https://baat.geonorge.no/skbaatts/req?brukerid={BRUKERID}"
        f"&passord={PASSORD}&tjenesteid=wms.nib&retformat=s"
    )
    raw_token = requests.get(url).text.strip("`")
    return raw_token

token = get_token()
token_start_time = time.time()
token_lifetime = 55 * 60  

def refresh_token_if_needed():
    """
    henter ny token om den gamle er utløpt.
    """
    global token, token_start_time
    if time.time() - token_start_time > token_lifetime:
        print("🔄 Fornyer token...")
        token = get_token()
        token_start_time = time.time()

In [0]:
def download_image(
    url: str, out_path: str, retries: int = 3, backoff: float = 2.0
) -> bool:
    """
    Prøver å laste ned et bilde fra en URL og lagrer det lokalt.
    """
    for attempt in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=30)
            if r.status_code == 200:
                with open(out_path, "wb") as f:
                    f.write(r.content)
                return True
            else:
                print(
                    f"⚠️ Nedlasting feilet (status {r.status_code}) på forsøk {attempt}"
                )
        except Exception as e:
            print(f"❌ Feil ved nedlasting på forsøk {attempt}: {e}")

        # Prøver på nytt ved feil, med eksponentiell backoff
        if attempt < retries:
            sleep_time = backoff * attempt  # eksponentiell ventetid
            time.sleep(sleep_time)

    print(f"❌ Alle {retries} forsøk mislyktes for {url}")
    return False

In [0]:
incomplete_df = (
    spark.read.table(nyendepunkt_silver)
    .filter("image_status = 'PENDING'")
    .select("nodeid", "image_wms")
    .toPandas()
)

In [0]:
for _, row in incomplete_df.iterrows():
    nodeid = row["nodeid"]
    filename = f"{nodeid}.png"

    image_base = row["image_wms"]
    if isinstance(image_base, str) and image_base.startswith("http"):
        refresh_token_if_needed()
        image_url = f"{image_base}{token}"
        image_out_path = f"{endepunkt_image_output_dir}/image_{filename}"

        if not os.path.exists(image_out_path) and download_image(
            image_url, image_out_path
        ):
            DeltaTable.forName(spark, nyendepunkt_silver).update(
                condition=f"nodeid = '{nodeid}'",
                set={"image_status": lit("DOWNLOADED")},
            )