In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window

In [0]:
catalog_dev = "`land_auto-gen-kart_dev`"
schema_dev = "dl_bildesegmentering"
spark.sql(f"USE CATALOG {catalog_dev}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_dev}")
spark.sql(f"USE SCHEMA {schema_dev}")
bronze_table = "helipads_bronze"

In [0]:
q = f"""
CREATE TABLE IF NOT EXISTS {bronze_table} (
    lokalid STRING,
    geometry BINARY,
    oppdateringsdato TIMESTAMP,
    ingest_time TIMESTAMP
) USING DELTA
"""
spark.sql(q)

In [0]:
def read_from_table():
    """
    Henter helikopterlandingsplassene med feltene vi trenger.
    """
    df_bronze = (
        spark.read.table("`land_ngis_dev`.silver_fkblufthavn.helikopterlandingsplass")
        .withColumn("oppdateringsdato", to_timestamp(col("oppdateringsdato")))
        .select("lokalid", "geometry", "oppdateringsdato")
    )
    return df_bronze

In [0]:
def write_delta_table(sdf: DataFrame, mode: str = "merge") -> None:
    """
    Skriver data til deltatabellen og opdaterer dersom bygningsnummer allerede finnes.
    """
    if mode == "overwrite":
        sdf.write.format("delta").option("mergeSchema", "true").mode(
            "overwrite"
        ).saveAsTable(bronze_table)
    else:
        from delta.tables import DeltaTable

        delta_tbl = DeltaTable.forName(spark, bronze_table)

        delta_tbl.alias("target").merge(
            sdf.alias("source"), condition="target.lokalid = source.lokalid"
        ).whenMatchedUpdate(
            condition="target.ingest_time < source.ingest_time",
            set={col: f"source.{col}" for col in sdf.columns},
        ).whenNotMatchedInsert(
            values={col: f"source.{col}" for col in sdf.columns}
        ).execute()

In [0]:
def main():
    df = read_from_table()
    df = df.withColumn("ingest_time", current_timestamp())
    write_delta_table(df)

In [0]:
main()