In [0]:

from delta.tables import *

# create DeltaTable instances using the path
delta_table = DeltaTable.forPath(spark, "path/to/table")

# convert an existing Parquet table in place into a Delta table
delta_table = DeltaTable.spark(spark, "parquet.`path/to/table`")

# Delete data from the table using predicate
delta_table.delete("date < '2017-01-01'")   # predicate using SQL formatted string
deltaTable.delete(col("date") < "2017-01-01")   # predicate using Spark SQL functions

# Update data from the table on the rows that match the given condition, which performs the rules defined by set
# 1. condition using SQL formatted string
delta_table.update(
    condition = "dt = existing_date",
    set = {"dt": "'your_date'"}
)

# 2. condition using Spark SQL functions
delta_table.update(
    condition = col("dt") == "existing_date",
    set = {col("dt"): lit("your_date")}
)

# merge command
target_df.alias("trgt").merge(source_df.alias("src"), "trgt.id = src.id")\
    .whenMatchedUpdateAll()\
    .whenNotMatchedInsertAll()\
    .execute()

target_df.alias("trgt").merge(source_df.alias("src"), "trgt.id = src.id")\
    .whenMatchedUpdate(set = {
        "key1": "value1",
        "key2": "value2"
    })\
    .whenNotMatchedInsert(values = {
        "key3": "value3",
        "key4": "value4"
    }).execute()

# vacuum, physically delete files
delta_table = vacuum() # vacuum files not required by versions more than 7 days old
deltaTable.vacuum(100)  # vacuum files not required by versions more than 100 hours old

# history command
full_table_history = delta_table.history()      # get the full history of the table
specific_table_history = delta_table.history(5) # get the last operation

# Get the details of a Delta table such as the format, name, and size.
delta_table.detail()

# restore / rollback
delta_table.restoreToVersion(2)
restoreToTimestamp('2021-01-01')
restoreToTimestamp('2021-01-01 01:01:01')

# optimize, small file compaction
delta_table.optimize().executeCompaction()
delta_table.optimize().where("date='2021-11-18'").executeCompaction()

# SCD
Type 1 https://github.com/delta-io/delta-examples/blob/master/notebooks/pyspark/delta-merge.ipynb
Type 2 https://iterationinsights.com/article/how-to-implement-slowly-changing-dimensions-scd-type-2-using-delta-table/