In [0]:
%pip install python-dotenv

In [0]:
import os
import time
import requests
from PIL import Image, ImageDraw
import numpy as np
from shapely.geometry import MultiPolygon, Polygon
from pyspark.sql.functions import col, lit
from delta.tables import DeltaTable
import ast
import geopandas as gpd
from dotenv import load_dotenv
from shapely import wkb

In [0]:
IMAGE_SIZE = [512, 512]
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
silver_table = "polygons_silver"
endepunkt_table = "endepunkt_silver"
hospital_table = "hospitals_silver"
helicopter_table = "helicopters_silver"

In [0]:
load_dotenv()
brukerid = os.getenv("GEONORGE_BRUKERID")
passord = os.getenv("GEONORGE_PASSORD")

def get_token():
    """
    Henter en token fra Geonorge og returnerer den.
    """
    url = (
        f"https://baat.geonorge.no/skbaatts/req?brukerid={brukerid}"
        f"&passord={passord}&tjenesteid=wms.nib&retformat=s"
    )
    raw_token = requests.get(url).text.strip("`")
    return raw_token

token = get_token()
token_start_time = time.time()
token_lifetime = 55 * 60  # sekunder

def refresh_token_if_needed():
    """
    Henter en ny token hvis den er utl√∏pt.
    """
    global token, token_start_time
    if time.time() - token_start_time > token_lifetime:
        print("üîÑ Fornyer token...")
        token = get_token()
        token_start_time = time.time()

In [0]:
def generate_binary_mask(geom, out_path, bbox, width=512, height=512):
    """
    Lager en bin√¶r maske basert p√• en geometri (Polygon eller MultiPolygon)
    og lagrer den som PNG. BBOX m√• alltid oppgis, og tolkes som koordinatramme.
    """
    try:
        if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
            print(f"‚ùå Ugyldig bbox: {bbox}")
            return False

        minx, miny, maxx, maxy = bbox
        assert maxx > minx and maxy > miny

        mask = Image.new("L", (width, height), 0)
        draw = ImageDraw.Draw(mask)

        def world_to_pixel(x, y):
            px = int((x - minx) / (maxx - minx) * width)
            py = int((maxy - y) / (maxy - miny) * height)
            return (px, py)

        def draw_single_polygon(polygon):
            exterior = [world_to_pixel(x, y) for x, y in polygon.exterior.coords]
            draw.polygon(exterior, outline=255, fill=255)
        # H√•ndter geometri
        if isinstance(geom, Polygon):
            draw_single_polygon(geom)
        elif isinstance(geom, MultiPolygon):
            for poly in geom.geoms:
                draw_single_polygon(poly)
        else:
            print(f"‚ö†Ô∏è Ukjent geometritype: {type(geom)}")
            return False

        mask.save(out_path)
        return True

    except Exception as e:
        print(f"‚ùå Feil ved maskegenerering: {e}")
        return False


In [0]:
def parse_bbox(value):
    if value is None or (isinstance(value, float) and np.isnan(value)):
        raise ValueError("Verdi er tom eller NaN")
    if isinstance(value, str):
        value = ast.literal_eval(value)
    if isinstance(value, (list, tuple, np.ndarray)) and len(value) == 4:
        return tuple(float(v) for v in value)

In [0]:
def download_image(url: str, out_path: str, retries: int = 3, backoff: float = 2.0) -> bool:
    """
    Pr√∏ver √• laste ned et bilde fra en URL og lagrer det lokalt. 
    """
    for attempt in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=30)
            if r.status_code == 200:
                with open(out_path, "wb") as f:
                    f.write(r.content)
                return True
            else:
                print(f"‚ö†Ô∏è Nedlasting feilet (status {r.status_code}) p√• fors√∏k {attempt}")
        except Exception as e:
            print(f"‚ùå Feil ved nedlasting p√• fors√∏k {attempt}: {e}")
        
        # Pr√∏ver p√• nytt ved feil, med eksponentiell backoff
        if attempt < retries:
            sleep_time = backoff * attempt  # eksponentiell ventetid
            time.sleep(sleep_time)

    print(f"‚ùå Alle {retries} fors√∏k mislyktes for {url}")
    return False

In [0]:
def download_pending_images(table: str, id_col: str, image_output_dir: str, dom_output_dir: str = None):
    """
    Laster ned alle bilder som er markedert som 'PENDING' i s√∏lvtabellen og setter dem som 'DOWNLOADED'.
    """
    df = spark.read.table(table)
    cols = df.columns

    # Trenger ikke √• ha b√•de dom_status og image_status
    conditions = []
    if "dom_status" in cols:
        conditions.append("dom_status = 'PENDING'")
    if "image_status" in cols:
        conditions.append("image_status = 'PENDING'")

    filter_expr = " OR ".join(conditions)
    df_pending = df.filter(filter_expr).toPandas()

    delta_tbl = DeltaTable.forName(spark, table)

    for _, row in df_pending.iterrows():
        id_value = row[id_col]

        # Last ned dom-bilder
        if dom_output_dir and "dom_status" in cols and row.get("dom_status") == "PENDING":
            dom_out = f"{dom_output_dir}/dom_{id_value}.png"
            if download_image(row["dom_wms"], dom_out):
                delta_tbl.update(
                    condition=f"{id_col} = '{id_value}'",
                    set={"dom_status": lit("DOWNLOADED")}
                )

        # Last ned bilder
        if "image_status" in cols and row.get("image_status") == "PENDING":
            refresh_token_if_needed()
            base = row["image_wms"]
            image_url = f"{base}&TICKET={token}" if isinstance(base, str) and base.startswith("http") else base
            image_out = f"{image_output_dir}/image_{id_value}.png"

            if download_image(image_url, image_out):
                delta_tbl.update(
                    condition=f"{id_col} = '{id_value}'",
                    set={"image_status": lit("DOWNLOADED")}
                )


In [0]:
def download_pending_labels(table: str, id_col: str, mask_output_dir: str):
    """
    Genererer labels for alle masker som er markedert som 'PENDING' i s√∏lvtabellen og setter dem som 'GENERATED'.
    """
    df_pending = spark.read.table(table) \
    .filter("mask_status = 'PENDING'") \
    .toPandas()
    df_pending["geometry"] = df_pending["geometry"].apply(lambda x: wkb.loads(bytes(x)) if x is not None else None)
    gdf = gpd.GeoDataFrame(df_pending, geometry="geometry", crs="EPSG:25833")
    for _, row in gdf.iterrows():
        id_value = row[id_col]
        filename = f"{id_value}.png"
        out_path = f"{mask_output_dir}/mask_{filename}"

        # Pr√∏v f√∏rst Adjusted_bbox, s√• fallback til bbox
        try:
            bbox = None
            if "Adjusted_bbox" in row and row["Adjusted_bbox"] is not None:
                bbox = parse_bbox(row["Adjusted_bbox"])
            elif "bbox" in row and row["bbox"] is not None:
                bbox = parse_bbox(row["bbox"])
            else:
                print(f"‚ùå Mangler gyldig bbox for {id_value}")
                continue
        except Exception as e:
            print(f"‚ùå Feil ved parsing av bbox for {id_value}: {e}")
            continue

        # Generer bin√¶rmaske
        success = generate_binary_mask(row["geometry"], out_path, bbox)
        if success:
            DeltaTable.forName(spark, table).update(
                condition=f"{id_col} = '{id_value}'",
                set={"mask_status": lit("GENERATED")}
            )
            print(f"‚úÖ Maske generert for {id_value}")
        else:
            print(f"‚ö†Ô∏è Maske-generering feilet for {id_value}")


In [0]:
snuplass_dom_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storredom/"
snuplass_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreimage/"
snuplass_mask_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storrelabel/"
os.makedirs(snuplass_dom_output_dir, exist_ok=True)
os.makedirs(snuplass_image_output_dir, exist_ok=True)
os.makedirs(snuplass_mask_dir, exist_ok=True)

download_pending_images(silver_table, "row_hash", snuplass_image_output_dir, snuplass_dom_output_dir)
download_pending_labels(silver_table, "row_hash", snuplass_mask_dir)

In [0]:
endepunkt_dom_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreendepunkt_dom/"
endepunkt_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreendepunkt_images/"
os.makedirs(endepunkt_dom_output_dir, exist_ok=True)
os.makedirs(endepunkt_image_output_dir, exist_ok=True)

download_pending_images(endepunkt_table, "nodeid", endepunkt_image_output_dir, endepunkt_dom_output_dir)

In [0]:
hospital_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/hospital_images/"
os.makedirs(hospital_image_output_dir, exist_ok=True)

download_pending_images(hospital_table, "lokalid", image_output_dir=hospital_image_output_dir)

In [0]:
helicopter_image_output_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/helicopter_images/"
helicopter_mask_dir = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/helicopter_labels/"
os.makedirs(helicopter_image_output_dir, exist_ok=True)
os.makedirs(helicopter_mask_dir, exist_ok=True)

download_pending_images(helicopter_table, "lokalid", image_output_dir=helicopter_image_output_dir)
download_pending_labels(helicopter_table, "lokalid", mask_output_dir=helicopter_mask_dir)