In [0]:
from pyspark.sql.functions import col
from pyspark.sql import DataFrame
from delta.tables import DeltaTable

In [0]:
catalog_dev = "`land_topografisk-gdb_dev`"
schema_dev = "ai2025"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
log_table = "logs_predicted_masks"
silver_table = "predicted_silver"
gold_table = "predicted_gold_35m"

In [0]:
df_silver = spark.read.table(silver_table)
df_filtered = df_silver.filter(col("diameter") >= 35)

In [0]:
def write_delta_table(sdf: DataFrame):
    """
    Write delta table from spark dataframe.
    """
    if not spark.catalog.tableExists(gold_table):
        sdf.write.format("delta").option("mergeSchema", "true").mode("overwrite").saveAsTable(gold_table)
    else:
        delta_tbl = DeltaTable.forName(spark, gold_table)
        delta_tbl.alias("target").merge(
                    source=sdf.alias("source"),
                    condition="target.row_hash = source.row_hash"
                ).whenMatchedUpdate(
                    condition="target.row_hash != source.row_hash",
                    set={col: f"source.{col}" for col in sdf.columns}
                ).whenNotMatchedInsert(
                    values={col: f"source.{col}" for col in sdf.columns}
                ).execute()

In [0]:
write_delta_table(df_filtered)