In [0]:
from pyspark.sql.functions import current_timestamp
def add_ingestion_date(df):
    df = df.withColumn("ingestion_date", current_timestamp())
    return df

In [0]:
from pyspark.sql.functions import col
def rearrange_partition_columns(df,partition_col):
    columns = df.schema.names
    res = []
    for column in columns:
        if column != partition_col:
            res.append(column)
    res.append(col(partition_col))
    result_df = df.select(res)
    return result_df


In [0]:
def overwrite_partition(df, partition_col, database, table):
    df = rearrange_partition_columns(df,partition_col)
    spark.conf.set("spark.sql.overwrite.partitionOverwriteMode", "dynamic")
    if (spark.catalog.tableExists(f"{database}.{table}")):
        df.write.mode("overwrite").insertInto(f"{database}.{table}")
    else:
        df.write.mode("overwrite").partitionBy(partition_col).format("parquet").saveAsTable(f"{database}.{table}")

In [0]:
def merge_data(df, path, database, table, partition_col, merge_condition):
    spark.conf.set("spark.databricks.optimizer.dynamicPartitionPruning", True) 
    from delta.tables import DeltaTable
    if (spark.catalog.tableExists(f"{database}.{table}")):
        deltatable = DeltaTable.forPath(spark, f"{path}/{table}")
        deltatable.alias("tgt").merge(df.alias("src"), merge_condition).whenMatchedUpdateAll()\
                .whenNotMatchedInsertAll()\
                    .execute()
    else:
        df.write.mode("overwrite").partitionBy(partition_col).format("delta").saveAsTable(f"{database}.{table}")