In [0]:
from pyspark.sql.functions import *
from pyspark.sql import DataFrame
from delta.tables import DeltaTable

In [0]:
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
log_table = "logs_predicted_masks"
silver_table = "predicted_silver"
gold_table = "predicted_gold"

In [0]:
df_silver = spark.read.table(silver_table)

df_geom = df_silver.withColumn(
    "geometry",
    expr("ST_Point(centroid_x, centroid_y)")
) # Lager en punktgeometri fra koordinatene
df_geom = df_geom.drop('centroid_x', 'centroid_y')

df_category = df_geom.withColumn(
    "category",
    when((col("diameter") >= 20) & (col("diameter") < 25), "20-25")
    .when((col("diameter") >= 25) & (col("diameter") < 30), "25-30")
    .when((col("diameter") >= 30) & (col("diameter") < 35), "30-35")
    .when(col("diameter") >= 35, "35+"),
) # Kategoriserer basert på diameter
df_category = df_category.filter(col("category").isNotNull())

In [0]:
def write_delta_table(sdf: DataFrame):
    """
    Skriver data til deltatabellen og opdaterer dersom den row_hash allerede finnes.
    """
    if not spark.catalog.tableExists(gold_table):
        sdf.write.format("delta").option("mergeSchema", "true").mode("overwrite").saveAsTable(gold_table)
    else:
        delta_tbl = DeltaTable.forName(spark, gold_table)
        delta_tbl.alias("target").merge(
                    source=sdf.alias("source"),
                    condition="target.row_hash = source.row_hash"
                ).whenMatchedUpdate(
                    condition="target.row_hash != source.row_hash",
                    set={col: f"source.{col}" for col in sdf.columns}
                ).whenNotMatchedInsert(
                    values={col: f"source.{col}" for col in sdf.columns}
                ).execute()

In [0]:
write_delta_table(df_category)