In [0]:
%pip install python-dotenv


In [0]:
from pyspark.sql import functions as F
from pyspark.sql import DataFrame
from pyspark.sql.types import ArrayType, DoubleType, StringType, StructType, StructField, DateType
from delta.tables import DeltaTable
from sedona.spark import *
import random
import geopandas as gpd
import time
import os
from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
from datetime import datetime


In [0]:
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
bronze_table = "utensnuplass_bronze"
silver_table = "utensnuplass_silver"

BASE_PATH = "/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER"
SUBDIR = {"image": "utenSimage", "dom": "utenSdom", "mask": "utenSlabel"}

In [0]:
def read_table_to_wkt():
    """
    Leser polygonene fra bronsetabellen og returnerer dem som en GeoDataFrame med WKT geometri.
    """
    df_bronze = spark.read.table(bronze_table).withColumn("geometry", F.expr("ST_GeomFromWKT(geometry)"))
    return df_bronze

In [0]:
def make_envelope(df:DataFrame) -> DataFrame:
    """
    Lager en minimal boks rundt polygonene.
    """
    return df.withColumn("envelope", F.expr("ST_Envelope(geometry)")) 

In [0]:
def random_adjusted_bbox_centered(
    envelope: list, bbox_size: int = 128, max_offset: float = 50
) -> list:
    """
    Genererer en tilfeldig justert boks rundt polygonen med en maksimal avstand fra sentrum.
    """
    xmin, ymin, xmax, ymax = envelope
    poly_width = xmax - xmin
    poly_height = ymax - ymin

    if poly_width > bbox_size or poly_height > bbox_size:
        print("OBS: polygon er større enn bbox")
        max_offset = 0

    half_size = bbox_size / 2

    center_x = (xmin + xmax) / 2 + random.uniform(-max_offset, max_offset)
    center_y = (ymin + ymax) / 2 + random.uniform(-max_offset, max_offset)

    adjusted_xmin = center_x - half_size
    adjusted_xmax = center_x + half_size
    adjusted_ymin = center_y - half_size
    adjusted_ymax = center_y + half_size

    bbox = [adjusted_xmin, adjusted_ymin, adjusted_xmax, adjusted_ymax]
    bbox_str = "_".join(f"{v:.6f}" for v in bbox)
    return bbox, bbox_str

In [0]:
def make_bbox(df: DataFrame, column_name: str) -> DataFrame:
    """
    Lager en boks rundt polygonen med en fast avstand fra sentrum.
    """
    df = df.withColumn(
        "bbox",
        F.expr(
            f"""
        array(
            ST_X(ST_Centroid({column_name})) - 256,
            ST_Y(ST_Centroid({column_name})) - 256,
            ST_X(ST_Centroid({column_name})) + 256,
            ST_Y(ST_Centroid({column_name})) + 256
        )
        """
        ),
    )

    df = (
        df.withColumn(
            "Polygons", F.expr("ST_MakeEnvelope(bbox[0], bbox[1], bbox[2], bbox[3])")
        )
        .withColumn("adjusted_struct", adjusted_bbox_udf(F.col("bbox")))
        .withColumn("Adjusted_bbox", F.col("adjusted_struct.bbox"))
        .withColumn("bbox_str", F.col("adjusted_struct.bbox_str"))
        .drop(column_name)
    )

    return df

In [0]:
def generate_dom_url(bbox_str: str):
    """
    Genererer en URL for DOM-bilde basert på bbox_str.
    """
    width, height = [512, 512]
    return (
        f"https://wms.geonorge.no/skwms1/wms.hoyde-dom-nhm-25833?request=GetMap&Format=image/png&"
        f"GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=NHM_DOM_25833:skyggerelieff&"
        f"BBOX={bbox_str}&width={width}&height={height}"
    )


def generate_image_url(bbox_str: str):
    """
    Genererer en URL for image-bilde basert på bbox_str.
    """
    width, height = [512, 512]
    return (
        f"https://wms.geonorge.no/skwms1/wms.nib?VERSION=1.3.0"
        f"&service=WMS&request=GetMap&Format=image/png&"
        f"GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=ortofoto&"
        f"BBox={bbox_str}&width={width}&height={height}&TICKET="
    )


def dom_file_exists(id: str) -> str:
    """
    Sjeker om DOM-bildet med gitt ID er lastet ned.
    """
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storredom/dom_{id}.png"
    return "DOWNLOADED" if os.path.exists(path) else "PENDING"


def image_file_exists(id: str) -> str:
    """
    Sjekker om bildet med gitt ID er lastet ned.
    """
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storreimage/image_{id}.png"
    return "DOWNLOADED" if os.path.exists(path) else "PENDING"


def mask_file_exists(id: str) -> str:
    """
    Sjekker om masken med gitt ID er generert.
    """
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/storrelabel/mask_{id}.png"
    return "GENERATED" if os.path.exists(path) else "PENDING"

In [0]:
load_dotenv()
BRUKERID = os.getenv("GEONORGE_BRUKERID")
PASSORD  = os.getenv("GEONORGE_PASSORD")

def get_token():
    """
    Henter token fra GeoNorge og returnerer det.
    """
    url = (
        f"https://baat.geonorge.no/skbaatts/req?brukerid={BRUKERID}"
        f"&passord={PASSORD}&tjenesteid=wms.nib&retformat=s"
    )
    raw_token = requests.get(url).text.strip("`")
    return raw_token

token = get_token()
token_start_time = time.time()
token_lifetime = 55 * 60  

def refresh_token_if_needed():
    """
    Henter ny token om den gamle er utløpt.
    """
    global token, token_start_time
    if time.time() - token_start_time > token_lifetime:
        print("🔄 Fornyer token...")
        token = get_token()
        token_start_time = time.time()

In [0]:
def get_fotodato(bbox: str, token: str):
    """
    Henter fotodato for en bbox.
    """
    url = f"https://wms.geonorge.no/skwms1/wms.nib?SERVICE=WMS&VERSION=1.3.0&REQUEST=GetFeatureInfo&CRS=EPSG:25833&BBOX={bbox}&WIDTH=512&HEIGHT=512&LAYERS=ortofoto&QUERY_LAYERS=ortofoto&INFO_FORMAT=text/html&I=256&J=256&TICKET={token}"
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    table = soup.find("table")
    field_value = None
    
    if table:
        rows = table.find_all("tr")
        for row in rows:
            cells = row.find_all("td")
            if len(cells) >= 2 and cells[0].text.strip() == "Fotodato":
                field_value = cells[1].text.strip()
                field_value = datetime.strptime(field_value, "%d.%m.%Y").date()
                return field_value
    
    return None

In [0]:
def write_delta_table(sdf: DataFrame, mode: str = "merge") -> None:
    """
    Skriver data til deltatabellen og opdaterer dersom den row_hash allerede finnes.
    """
    if mode == "overwrite":
        sdf.write.format("delta").option("mergeSchema", "true").mode(
            "overwrite"
        ).saveAsTable(silver_table)
    else:
        from delta.tables import DeltaTable

        delta_tbl = DeltaTable.forName(spark, silver_table)

        delta_tbl.alias("target").merge(
            sdf.alias("source"), condition="target.nodeid = source.nodeid"
        ).whenMatchedUpdate(
            condition="target.hentet_tid < source.hentet_tid OR target.image_path IS NULL",
            set={col: f"source.{col}" for col in sdf.columns},
        ).whenNotMatchedInsert(
            values={col: f"source.{col}" for col in sdf.columns}
        ).execute()

In [0]:
def write_delta_table(sdf: DataFrame):
    if not spark.catalog.tableExists(silver_table):
        sdf.write.format("delta") \
            .option("mergeSchema", "true") \
            .mode("overwrite") \
            .saveAsTable(silver_table)
    else:
        delta_tbl = DeltaTable.forName(spark, silver_table)
        delta_tbl.alias("target") \
            .merge(
                source=sdf.alias("source"),
                condition="target.row_hash = source.row_hash"
            ) \
            .whenMatchedUpdateAll() \
            .whenNotMatchedInsertAll() \
            .execute()


In [0]:
adjusted_bbox_schema = StructType(
    [
        StructField("bbox", ArrayType(DoubleType())),
        StructField("bbox_str", StringType()),
    ]
)
adjusted_bbox_udf = F.udf(
    lambda envelope: random_adjusted_bbox_centered(envelope), adjusted_bbox_schema
)
generate_dom_url_udf = F.udf(generate_dom_url, StringType())
generate_image_url_udf = F.udf(generate_image_url, StringType())
dom_file_exists_udf = F.udf(dom_file_exists, StringType())
image_file_exists_udf = F.udf(image_file_exists, StringType())
mask_file_exists_udf = F.udf(mask_file_exists, StringType())

df = read_table_to_wkt()
df = make_envelope(df)
df = make_bbox(df, "envelope")

for dt in ["image", "dom", "mask"]:
    sub = SUBDIR[dt]
    df = df.withColumn(
        f"{dt}_path",
        F.concat(F.lit(f"{BASE_PATH}/{sub}/{dt}_"), F.col("row_hash"), F.lit(".png")),
    )
df = (
    df.withColumn("dom_wms", generate_dom_url_udf("Adjusted_bbox"))
    .withColumn("image_wms", generate_image_url_udf("Adjusted_bbox"))
    .withColumn("dom_status", dom_file_exists_udf("row_hash"))
    .withColumn("image_status", image_file_exists_udf("row_hash"))
    .withColumn("mask_status", mask_file_exists_udf("row_hash"))
    .withColumn("lastet_tid", F.current_timestamp())
)
df = add_ortofoto_date(df, token)

write_delta_table(df)