In [0]:
%pip install python-dotenv

In [0]:
import os
import time
import requests

from pyspark.sql import functions as F
from pyspark.sql.functions import col, lit
from pyspark.sql.types import StringType, ArrayType, DoubleType
from delta.tables import DeltaTable

from dotenv import load_dotenv

from PIL import Image, ImageDraw
import numpy as np
from shapely.geometry import MultiPolygon, Polygon
import ast 

import geopandas as gpd

from pyspark.sql import SparkSession

In [0]:
IMAGE_SIZE = [512, 512]
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
silver_table = "polygons_silver"
endepunkt_silver = "endepunkt_silver"

In [0]:
load_dotenv()
brukerid = os.getenv("GEONORGE_BRUKERID")
passord = os.getenv("GEONORGE_PASSORD")

def get_token():
    url = (
        f"https://baat.geonorge.no/skbaatts/req?brukerid={brukerid}"
        f"&passord={passord}&tjenesteid=wms.nib&retformat=s"
    )
    raw_token = requests.get(url).text.strip("`")
    return raw_token

token = get_token()
token_start_time = time.time()
token_lifetime = 55 * 60  # sekunder

def refresh_token_if_needed():
    global token, token_start_time
    if time.time() - token_start_time > token_lifetime:
        print("🔄 Fornyer token...")
        token = get_token()
        token_start_time = time.time()

In [0]:
def generate_binary_mask(geom, out_path, bbox, width=512, height=512):
    """
    Lager en binær maske basert på en geometri (Polygon eller MultiPolygon)
    og lagrer den som PNG. BBOX må alltid oppgis, og tolkes som koordinatramme.
    """

    try:
        if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
            print(f"❌ Ugyldig bbox: {bbox}")
            return False

        minx, miny, maxx, maxy = bbox
        assert maxx > minx and maxy > miny

        mask = Image.new("L", (width, height), 0)
        draw = ImageDraw.Draw(mask)

        def world_to_pixel(x, y):
            px = int((x - minx) / (maxx - minx) * width)
            py = int((maxy - y) / (maxy - miny) * height)
            return (px, py)

        def draw_single_polygon(polygon):
            exterior = [world_to_pixel(x, y) for x, y in polygon.exterior.coords]
            draw.polygon(exterior, outline=255, fill=255)

        # Håndter geometri
        if isinstance(geom, Polygon):
            draw_single_polygon(geom)
        elif isinstance(geom, MultiPolygon):
            for poly in geom.geoms:
                draw_single_polygon(poly)
        else:
            print(f"⚠️ Ukjent geometri-type: {type(geom)}")
            return False

        mask.save(out_path)
        return True

    except Exception as e:
        print(f"❌ Feil ved maskegenerering: {e}")
        return False


In [0]:
def parse_bbox(value):
    if value is None or (isinstance(value, float) and np.isnan(value)):
        raise ValueError("Verdi er tom eller NaN")
    if isinstance(value, str):
        value = ast.literal_eval(value)
    if isinstance(value, (list, tuple, np.ndarray)) and len(value) == 4:
        return tuple(float(v) for v in value)

In [0]:
def download_image(url: str, out_path: str, retries: int = 3, backoff: float = 2.0) -> bool:
    """
    Prøver å laste ned et bilde fra en URL og lagrer det lokalt. 
    Prøver på nytt ved feil, med eksponentiell backoff.
    """
    for attempt in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=30)
            if r.status_code == 200:
                with open(out_path, "wb") as f:
                    f.write(r.content)
                return True
            else:
                print(f"⚠️ Nedlasting feilet (status {r.status_code}) på forsøk {attempt}")
        except Exception as e:
            print(f"❌ Feil ved nedlasting på forsøk {attempt}: {e}")
        
        if attempt < retries:
            sleep_time = backoff * attempt  # eksponentiell ventetid
            time.sleep(sleep_time)

    print(f"❌ Alle {retries} forsøk mislyktes for {url}")
    return False

# Polygons Nedlastning

In [0]:
snuplass_dom_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storredom/"
snuplass_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreimage/"
snuplass_mask_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storrelabel/"
os.makedirs(snuplass_dom_output_dir, exist_ok=True)
os.makedirs(snuplass_image_output_dir, exist_ok=True)
os.makedirs(snuplass_mask_dir, exist_ok=True)

In [0]:
df_poly_pending = spark.read.table(snuplass_silver) \
    .filter("dom_status = 'PENDING' OR image_status = 'PENDING'") \
    .toPandas()

In [0]:
for _, row in df_poly_pending.iterrows():
    row_hash = row["row_hash"]

    if row["dom_status"] == "PENDING":
        dom_out = f"{snuplass_dom_output_dir}/dom_{row_hash}.png"
        if download_image(row["dom_path"], dom_out):
            DeltaTable.forName(spark, snuplass_silver).update(
                condition=f"row_hash = '{row_hash}'",
                set={"dom_status": lit("DOWNLOADED")}
            )

    if row["image_status"] == "PENDING":
        refresh_token_if_needed()
        base = row["image_path"]
        image_url = f"{base}{token}" if isinstance(base, str) and base.startswith("http") else base
        image_out = f"{snuplass_image_output_dir}/image_{row_hash}.png"

        if download_image(image_url, image_out):
            DeltaTable.forName(spark, snuplass_silver).update(
                condition=f"row_hash = '{row_hash}'",
                set={"image_status": lit("DOWNLOADED")}
            )

In [0]:
df_mask_pending = spark.read.table(snuplass_silver) \
    .filter("mask_status = 'PENDING'") \
    .toPandas()

gdf = gpd.GeoDataFrame(df_mask_pending, geometry="geometry", crs="EPSG:25833")

In [0]:
# GENERER MASKER
for _, row in gdf.iterrows():
    row_hash = row["row_hash"]
    filename = f"{row_hash}.png"
    out_path = f"{snuplass_mask_dir}/mask_{filename}"

    # Prøv først Adjusted_bbox, så fallback til bbox
    try:
        bbox = None
        if "Adjusted_bbox" in row and row["Adjusted_bbox"] is not None:
            bbox = parse_bbox(row["Adjusted_bbox"])
        elif "bbox" in row and row["bbox"] is not None:
            bbox = parse_bbox(row["bbox"])
        else:
            print(f"❌ Mangler gyldig bbox for {row_hash}")
            continue
    except Exception as e:
        print(f"❌ Feil ved parsing av bbox for {row_hash}: {e}")
        continue

    # Generer binærmaske
    success = generate_binary_mask(row["geometry"], out_path, bbox)
    if success:
        DeltaTable.forName(spark, snuplass_silver).update(
            condition=f"row_hash = '{row_hash}'",
            set={"mask_status": lit("GENERATED")}
        )
        print(f"✅ Maske generert for {row_hash}")
    else:
        print(f"⚠️ Maske-generering feilet for {row_hash}")


# Endepunkts Nedlastning

In [0]:
endepunkt_dom_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreendepunkt_dom/"
endepunkt_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreendepunkt_images/"
os.makedirs(endepunkt_dom_output_dir, exist_ok=True)
os.makedirs(endepunkt_image_output_dir, exist_ok=True)

In [0]:
df_ende_pending = spark.read.table(endepunkt_silver) \
    .filter("(dom_status = 'PENDING') OR (image_status = 'PENDING')") \
    .toPandas()

In [0]:
# LAST NED ENDEPUNKT DOM og IMAGE 
for _, row in df_ende_pending.iterrows():
    nodeid = row["nodeid"]

    if row["dom_status"] == "PENDING":
        dom_out = f"{endepunkt_dom_output_dir}/dom_{nodeid}.png"
        if download_image(row["dom_path"], dom_out):
            DeltaTable.forName(spark, endepunkt_silver).update(
                condition=f"nodeid = '{nodeid}'",
                set={"dom_status": lit("DOWNLOADED")}
            )

    if row["image_status"] == "PENDING":
        refresh_token_if_needed()
        base = row["image_path"]
        image_url = f"{base}{token}" if isinstance(base, str) and base.startswith("http") else base
        image_out = f"{endepunkt_image_output_dir}/image_{nodeid}.png"

        if download_image(image_url, image_out):
            DeltaTable.forName(spark, endepunkt_silver).update(
                condition=f"nodeid = '{nodeid}'",
                set={"image_status": lit("DOWNLOADED")}
            )