In [0]:
%pip install python-dotenv

In [0]:
import os
import time
import requests
from PIL import Image, ImageDraw
import numpy as np
from shapely.geometry import MultiPolygon, Polygon
from pyspark.sql.functions import col, lit, expr
from delta.tables import DeltaTable
import ast
import geopandas as gpd
from dotenv import load_dotenv
from shapely import wkb

In [0]:
IMAGE_SIZE = [512, 512]
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
silver_table = "polygons_silver"
utenS_table = "utensnuplass_silver"
endepunkt_table = "endepunkt_silver"
hospital_table = "hospitals_gold"
helipad_table = "helipads_silver"
noHelipad_table = "nohelipads",
blokkmark_table = "blokkmark_silver"

In [0]:
load_dotenv()
brukerid = os.getenv("GEONORGE_BRUKERID")
passord = os.getenv("GEONORGE_PASSORD")


def get_token():
    """
    Henter en token fra Geonorge og returnerer den.
    """
    url = (
        f"https://baat.geonorge.no/skbaatts/req?brukerid={brukerid}"
        f"&passord={passord}&tjenesteid=wms.nib&retformat=s"
    )
    raw_token = requests.get(url).text.strip("`")
    return raw_token


token = get_token()
token_start_time = time.time()
token_lifetime = 55 * 60  # sekunder


def refresh_token_if_needed():
    """
    Henter en ny token hvis den er utl√∏pt.
    """
    global token, token_start_time
    if time.time() - token_start_time > token_lifetime:
        print("üîÑ Fornyer token...")
        token = get_token()
        token_start_time = time.time()

In [0]:
def generate_binary_mask(geom, out_path, bbox, width=512, height=512):
    """
    Lager en bin√¶r maske basert p√• en geometri (Polygon eller MultiPolygon)
    og lagrer den som PNG. BBOX m√• alltid oppgis, og tolkes som koordinatramme.
    """
    try:
        if not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
            print(f"‚ùå Ugyldig bbox: {bbox}")
            return False

        minx, miny, maxx, maxy = bbox
        assert maxx > minx and maxy > miny

        mask = Image.new("L", (width, height), 0)
        draw = ImageDraw.Draw(mask)

        def world_to_pixel(x, y):
            px = int((x - minx) / (maxx - minx) * width)
            py = int((maxy - y) / (maxy - miny) * height)
            return (px, py)

        def draw_single_polygon(polygon):
            exterior = [world_to_pixel(x, y) for x, y in polygon.exterior.coords]
            draw.polygon(exterior, outline=255, fill=255)

        # H√•ndter geometri
        if isinstance(geom, Polygon):
            draw_single_polygon(geom)
        elif isinstance(geom, MultiPolygon):
            for poly in geom.geoms:
                draw_single_polygon(poly)
        else:
            print(f"‚ö†Ô∏è Ukjent geometritype: {type(geom)}")
            return False

        mask.save(out_path)
        return True

    except Exception as e:
        print(f"‚ùå Feil ved maskegenerering: {e}")
        return False

In [0]:
def parse_bbox(value):
    if value is None or (isinstance(value, float) and np.isnan(value)):
        raise ValueError("Verdi er tom eller NaN")
    if isinstance(value, str):
        value = ast.literal_eval(value)
    if isinstance(value, (list, tuple, np.ndarray)) and len(value) == 4:
        return tuple(float(v) for v in value)

In [0]:
def download_image(
    url: str, out_path: str, retries: int = 3, backoff: float = 2.0
) -> bool:
    """
    Pr√∏ver √• laste ned et bilde fra en URL og lagrer det lokalt.
    """
    for attempt in range(1, retries + 1):
        try:
            r = requests.get(url, timeout=30)
            if r.status_code == 200:
                with open(out_path, "wb") as f:
                    f.write(r.content)
                return True
            else:
                print(
                    f"‚ö†Ô∏è Nedlasting feilet (status {r.status_code}) p√• fors√∏k {attempt}"
                )
        except Exception as e:
            print(f"‚ùå Feil ved nedlasting p√• fors√∏k {attempt}: {e}")

        # Pr√∏ver p√• nytt ved feil, med eksponentiell backoff
        if attempt < retries:
            sleep_time = backoff * attempt  # eksponentiell ventetid
            time.sleep(sleep_time)

    print(f"‚ùå Alle {retries} fors√∏k mislyktes for {url}")
    return False

In [0]:
def download_pending_images(
    table: str,
    id_col: str,
    image_output_dir: str,
    dom_output_dir: str = None,
    with_empty_geometry: bool = False,
):
    """
    Laster ned alle bilder som er markedert som 'PENDING' i s√∏lvtabellen og setter dem som 'DOWNLOADED'.
    """
    df = spark.read.table(table)
    cols = df.columns

    # Trenger ikke √• ha b√•de dom_status og image_status
    conditions = []
    if "dom_status" in cols:
        conditions.append("dom_status = 'PENDING'")
    if "image_status" in cols:
        conditions.append("image_status = 'PENDING'")

    if with_empty_geometry:
        filter_expr = " OR ".join(conditions)
    else:
        status_expr = " OR ".join(conditions)
        filter_expr = f"({status_expr}) AND geometry IS NOT NULL",

    df_pending = df.filter(filter_expr)
    
    df_pending = df_pending.withColumn(
        "geometry",
        expr("ST_SetSRID(ST_GeomFromWKB(geometry), 25833)"),
    )
    
    df_pending_pd = df_pending.toPandas()

    delta_tbl = DeltaTable.forName(spark, table)

    for _, row in df_pending_pd.iterrows():
        id_value = row[id_col]

        # Last ned dom-bilder
        if (
            dom_output_dir
            and "dom_status" in cols
            and row.get("dom_status") == "PENDING"
        ):
            dom_out = f"{dom_output_dir}/dom_{id_value}.png"
            if download_image(row["dom_wms"], dom_out):
                delta_tbl.update(
                    condition=f"{id_col} = '{id_value}'",
                    set={"dom_status": lit("DOWNLOADED")},
                )

        # Last ned bilder
        if "image_status" in cols and row.get("image_status") == "PENDING":
            refresh_token_if_needed()
            base = row["image_wms"]
            image_url = (
                f"{base}&TICKET={token}"
                if isinstance(base, str) and base.startswith("http")
                else base
            )
            image_out = f"{image_output_dir}/image_{id_value}.png"

            if download_image(image_url, image_out):
                delta_tbl.update(
                    condition=f"{id_col} = '{id_value}'",
                    set={"image_status": lit("DOWNLOADED")},
                )

In [0]:
def download_pending_labels(
    table: str,
    id_col: str,
    mask_output_dir: str,
    image_size: int=512,
    black_label: bool = False,
    with_empty_geometry: bool = False
    ):
    """
    Genererer labels for alle masker som er markedert som 'PENDING' i s√∏lvtabellen og setter dem som 'GENERATED'.
    """
    if with_empty_geometry:
        query = "mask_status = 'PENDING' AND geometry IS NOT NULL"
    else:
        query = "mask_status = 'PENDING'"
    
    df_pending = spark.read.table(table).filter(query)
    
    df_pending = df_pending.withColumn(
        "geometry",
        expr("ST_SetSRID(ST_GeomFromWKB(geometry), 25833)")
    )
    
    df_pending = df_pending.toPandas()
    
    gdf = gpd.GeoDataFrame(df_pending, geometry="geometry", crs="EPSG:25833")
    for _, row in gdf.iterrows():
        id_value = row[id_col]
        filename = f"{id_value}.png"
        out_path = f"{mask_output_dir}/mask_{filename}"

        # Generer bin√¶rmaske eller svart maske hvis black_label
        if black_label:
            mask = np.zeros((image_size, image_size), dtype=np.uint8)
            Image.fromarray(mask).save(out_path)
            success = True
        else:
            # Pr√∏v f√∏rst Adjusted_bbox, s√• fallback til bbox
            try:
                bbox = None
                if "Adjusted_bbox" in row and row["Adjusted_bbox"] is not None:
                    bbox = parse_bbox(row["Adjusted_bbox"])
                elif "bbox" in row and row["bbox"] is not None:
                    bbox = parse_bbox(row["bbox"])
                else:
                    print(f"‚ùå Mangler gyldig bbox for {id_value}")
                    continue
            except Exception as e:
                print(f"‚ùå Feil ved parsing av bbox for {id_value}: {e}")
                continue

            success = generate_binary_mask(row["geometry"], out_path, bbox)

        if success:
            DeltaTable.forName(spark, table).update(
                condition=f"{id_col} = '{id_value}'",
                set={"mask_status": lit("GENERATED")},
            )
        else:
            print(f"‚ö†Ô∏è Maske-generering feilet for {id_value}")

In [0]:
snuplass_dom_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/storredom/"
snuplass_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/storreimage/"
snuplass_mask_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/storrelabel/"
os.makedirs(snuplass_dom_output_dir, exist_ok=True)
os.makedirs(snuplass_image_output_dir, exist_ok=True)
os.makedirs(snuplass_mask_dir, exist_ok=True)

download_pending_images(
    silver_table, "row_hash", snuplass_image_output_dir, snuplass_dom_output_dir
)
download_pending_labels(silver_table, "row_hash", snuplass_mask_dir)

In [0]:
utenS_dom_output_dir = (
    "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/utenSdom/"
)
utenS_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/utenSimage/"
utenS_mask_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/utenSlabel/"
os.makedirs(utenS_dom_output_dir, exist_ok=True)
os.makedirs(utenS_image_output_dir, exist_ok=True)
os.makedirs(utenS_mask_dir, exist_ok=True)

download_pending_images(
    utenS_table, "row_hash", utenS_image_output_dir, utenS_dom_output_dir, with_empty_geometry=True
)
download_pending_labels(
    utenS_table, "row_hash", utenS_mask_dir, black_label=True, with_empty_geometry=True
)

In [0]:
endepunkt_dom_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/storreendepunkt_dom/"
endepunkt_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/storreendepunkt_images/"
os.makedirs(endepunkt_dom_output_dir, exist_ok=True)
os.makedirs(endepunkt_image_output_dir, exist_ok=True)

download_pending_images(
    endepunkt_table, "nodeid", endepunkt_image_output_dir, endepunkt_dom_output_dir
)

In [0]:
hospital_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/hospital_images/"
os.makedirs(hospital_image_output_dir, exist_ok=True)

download_pending_images(
    hospital_table, "row_hash", image_output_dir=hospital_image_output_dir
)

In [0]:
helipad_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/helipad_images/"
helipad_mask_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/helipad_labels/"
os.makedirs(helipad_image_output_dir, exist_ok=True)
os.makedirs(helipad_mask_dir, exist_ok=True)

download_pending_images(
    helipad_table, "row_hash", image_output_dir=helipad_image_output_dir
)
download_pending_labels(helipad_table, "row_hash", mask_output_dir=helipad_mask_dir)

In [0]:
noHelipad_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/noHelipad_images/"
noHelipad_mask_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/noHelipad_labels/"
os.makedirs(noHelipad_image_output_dir, exist_ok=True)
os.makedirs(noHelipad_mask_dir, exist_ok=True)

download_pending_images(
    noHelipad_table, "row_hash", image_output_dir=noHelipad_image_output_dir, with_empty_geometry=True
)
download_pending_labels(
    noHelipad_table,
    "row_hash",
    mask_output_dir=noHelipad_mask_dir,
    black_label=True,
    with_empty_geometry=True
)

In [0]:
blokkmark_image_output_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/blokkmark_images/"
blokkmark_mask_dir = "/Volumes/land_auto-gen-kart_dev/external_dev/static_data/DL_bildesegmentering/blokkmark_labels/"
os.makedirs(noHelipad_image_output_dir, exist_ok=True)
os.makedirs(noHelipad_mask_dir, exist_ok=True)

download_pending_images(
    blokkmark_table, "row_hash", image_output_dir=blokkmark_image_output_dir, with_empty_geometry=True
)
download_pending_labels(
    blokkmark_table,
    "row_hash",
    mask_output_dir=blokkmark_mask_dir,
    black_label=True,
    with_empty_geometry=True
)