In [0]:
%pip install python-dotenv

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql import DataFrame
from pyspark.sql.functions import min as spark_min, max as spark_max
from pyspark.sql.types import ArrayType, DoubleType, StringType

from delta.tables import DeltaTable

from sedona.spark import *

import random
import geopandas as gpd
from pathlib import Path
import time
import os
import numpy as np
from PIL import Image, ImageDraw
from dotenv import load_dotenv
import requests

In [0]:
# config
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
log_table = f"{catalog_dev}.{schema_dev}.logs_processed_gdbs"
bronze_table = f"{catalog_dev}.{schema_dev}.polygons_bronze"
silver_table = f"{catalog_dev}.{schema_dev}.polygons_silver"
buffer = 20

In [0]:
df_bronze = spark.read.table(bronze_table)
df_bronze.display(5)

In [0]:
def read_table_to_wkt() -> DataFrame:
    """
    Read a Spark DataFrame column containing WKT and return a Spark DataFrame with the
    """
    df_bronze = spark.read.table(bronze_table).withColumn("geometry", F.expr("ST_GeomFromWKT(geometry)"))
    return df_bronze

In [0]:
def make_envelope(df: DataFrame) -> DataFrame:
    """
    Make a envelope based on the geom.
    """
    return df.withColumn("envelope", F.expr("ST_Boundary(geometry)"))

In [0]:
# def random_adjusted_bbox(envelope: list, output_size: int=512) -> list:
#     """
#     Generate a random bbox based on the envelope.
#     """
#     xmin, ymin, xmax, ymax = envelope
#     poly_width = xmax - xmin
#     poly_height = ymax - ymin

#     if poly_width > output_size or poly_height > output_size:
#         return None

#     max_dx = output_size - poly_width
#     max_dy = output_size - poly_height

#     dx = random.uniform(0, max_dx)
#     dy = random.uniform(0, max_dy)

#     adjusted_xmin = xmin - dx
#     adjusted_ymin = ymin - dy
#     adjusted_xmax = adjusted_xmin + output_size
#     adjusted_ymax = adjusted_ymin + output_size

#     return [adjusted_xmin, adjusted_ymin, adjusted_xmax, adjusted_ymax]

In [0]:
def random_adjusted_bbox_centered(
    envelope: list,
    min_size: int = 256,
    max_size: int = 512,
    margin: int = 20,
    max_offset: float = 20
) -> list:
    """
    Lager en justert BBOX som garanterer at polygonet er synlig og sentrert,
    med en viss variasjon i størrelse og plassering.
    
    Args:
        envelope: [xmin, ymin, xmax, ymax]
        min_size: Minimum størrelse på BBOX (f.eks. 256 m)
        max_size: Maksimal størrelse (f.eks. 512 m)
        margin: Buffer rundt polygonet (f.eks. 20 m)
        max_offset: Maks tilfeldig forskyvning (f.eks. ±20 m)

    Returns:
        [adjusted_xmin, adjusted_ymin, adjusted_xmax, adjusted_ymax]
    """
    import random

    xmin, ymin, xmax, ymax = envelope
    poly_width = xmax - xmin
    poly_height = ymax - ymin

    # Finn ønsket størrelse: polygonens størrelse + margin, begrenset mellom min_size og max_size
    bbox_size = max(poly_width, poly_height) + margin * 2
    bbox_size = min(max(bbox_size, min_size), max_size)

    half_size = bbox_size / 2

    # Senter av polygonet
    center_x = (xmin + xmax) / 2
    center_y = (ymin + ymax) / 2

    # Legg på tilfeldig forskyvning
    dx = random.uniform(-max_offset, max_offset)
    dy = random.uniform(-max_offset, max_offset)

    center_x += dx
    center_y += dy

    # Lag bbox
    adjusted_xmin = center_x - half_size
    adjusted_xmax = center_x + half_size
    adjusted_ymin = center_y - half_size
    adjusted_ymax = center_y + half_size

    return [adjusted_xmin, adjusted_ymin, adjusted_xmax, adjusted_ymax]

In [0]:
def make_bbox(df: DataFrame, column_name: str) -> DataFrame: 
    """
    Make a bounding box from a Spark DataFrame based on the envelope.
    """
    df = df.withColumn(
    "bbox",
    F.expr(f"""
    array(
        ST_X(ST_Centroid(envelope)) - (GREATEST(ST_XMax(envelope) - ST_XMin(envelope), ST_YMax(envelope) - ST_YMin(envelope)) / 2 + {buffer}),
        ST_Y(ST_Centroid(envelope)) - (GREATEST(ST_XMax(envelope) - ST_XMin(envelope), ST_YMax(envelope) - ST_YMin(envelope)) / 2 + {buffer}),
        ST_X(ST_Centroid(envelope)) + (GREATEST(ST_XMax(envelope) - ST_XMin(envelope), ST_YMax(envelope) - ST_YMin(envelope)) / 2 + {buffer}),
        ST_Y(ST_Centroid(envelope)) + (GREATEST(ST_XMax(envelope) - ST_XMin(envelope), ST_YMax(envelope) - ST_YMin(envelope)) / 2 + {buffer})
    )
    """) # Lager BBOX kolonne som kan brukes videre i WMSene på en enkel måte.
    )
    df = df.withColumn(
        "Polygons",
        F.expr("ST_MakeEnvelope(bbox[0], bbox[1], bbox[2], bbox[3])")
    ) # Lager polygons basert på bboxen 
    df = df.withColumn(
        "Adjusted_bbox",
        adjusted_bbox_udf(F.col("bbox"))
    ).drop("envelope")  # Dropper envelope kolonnen. Denne kolonnen brukes egt bare for å plotte bboxene. Strengt tatt ikke nødvendig å bruke.
    return df

In [0]:
def generate_dom_url(bbox):
    bbox_str = ",".join(map(str, bbox))
    width, height = [512, 512]
    return (
        f"https://wms.geonorge.no/skwms1/wms.hoyde-dom-nhm-25833?request=GetMap&Format=image/png&"
        f"GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=NHM_DOM_25833:skyggerelieff&"
        f"BBOX={bbox_str}&width={width}&height={height}"
    )

def generate_image_url(bbox):
    bbox_str = ",".join(map(str, bbox))
    width, height = [512, 512]
    return (
        f"https://wms.geonorge.no/skwms1/wms.nib?VERSION=1.3.0"
        f"&service=WMS&request=GetMap&Format=image/png&"
        f"GetFeatureInfo=text/plain&CRS=EPSG:25833&Layers=ortofoto&"
        f"BBox={bbox_str}&width={width}&height={height}&TICKET="
    )

def dom_file_exists(id: str) -> str:
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/polygon_dom/{id}.png"
    return "DOWNLOADED" if os.path.exists(path) else "PENDING"

def image_file_exists(id: str) -> str:
    path = f"/Volumes/land_topografisk-gdb_dev/external_dev/static_data/DL_SNUPLASSER/polygon_image/{id}.png"
    return "DOWNLOADED" if os.path.exists(path) else "PENDING"

In [0]:
def write_delta_table(sdf: DataFrame):
    """
    Write Spark DataFrame to Delta table.
    Automatically updates or inserts all columns.
    """
    if not spark.catalog.tableExists(silver_table):
        sdf.write.format("delta") \
            .option("mergeSchema", "true") \
            .mode("overwrite") \
            .saveAsTable(silver_table)
    else:
        delta_tbl = DeltaTable.forName(spark, silver_table)
        delta_tbl.alias("target") \
            .merge(
                source=sdf.alias("source"),
                condition="target.row_hash = source.row_hash"
            ) \
            .whenMatchedUpdateAll() \
            .whenNotMatchedInsertAll() \
            .execute()

In [0]:
def plot(df: DataFrame, column_name: str, plot_column: str):
    """
    Plot the bbox.
    """
    bbox_gdf = gpd.GeoDataFrame(
    df.toPandas(),
    geometry=column_name,
    crs="EPSG:25833",  # Bruker ETRS89 / UTM sone 33N (Norge)
    )
    return bbox_gdf

In [0]:
adjusted_bbox_udf = F.udf(lambda envelope: random_adjusted_bbox_centered(envelope), ArrayType(DoubleType()))
generate_dom_url_udf = F.udf(generate_dom_url, StringType())
generate_image_url_udf = F.udf(generate_image_url, StringType())
dom_file_status_udf = F.udf(dom_file_exists, StringType())
image_file_status_udf = F.udf(image_file_exists, StringType())

df = read_table_to_wkt()
df = make_envelope(df)
df = make_bbox(df, "envelope")
df = df.withColumn("image_path", generate_image_url_udf("Adjusted_bbox")) \
       .withColumn("dom_path", generate_dom_url_udf("Adjusted_bbox")) \
       .withColumn("image_status", image_file_status_udf("row_hash")) \
       .withColumn("dom_status", dom_file_status_udf("row_hash")) \
       .withColumn("mask_status", lit("PENDING")) \
       .withColumn("lastet_tid", F.current_timestamp())

gdf = plot(df, "Polygons", "Vegtyper")
write_delta_table(df)


In [0]:
df.display()

In [0]:
# Uncomment under hvis man ønsker å se på plotten
# gdf.explore(column="Vegtyper", tooltip="Vegtyper", popup=True, cmap="Set1")

In [0]:
# spark.sql(f"DROP TABLE IF EXISTS {silver_table}")