In [0]:
from pyspark.sql import SparkSession
from delta.tables import *

# Initialize Spark Session
spark = SparkSession.builder.appName("Upsert into Target Delta Table").getOrCreate()

# Paths for the staging and target Delta tables
staging_table_path = "dbfs:/tmp/staging_order_tracking"
target_table_path = "dbfs:/tmp/target_order_tracking"

# Read from the staging Delta table
staging_df = spark.read.format("delta").load(staging_table_path)
staging_df.show()
print("Data read from staging table completed")

# Check if the target Delta table exists, create it if not
if not DeltaTable.isDeltaTable(spark, target_table_path):
    staging_df.write.format("delta").save(target_table_path)

# Create DeltaTable object for the target table
target_delta_table = DeltaTable.forPath(spark, target_table_path)

# Perform upsert from staging to target table using tracking_num as key
target_delta_table.alias("target").merge(
    staging_df.alias("staging"),
    "target.tracking_num = staging.tracking_num"
).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()

print("Data upserted in target table")

# Register the target table in the Hive Metastore (Optional)
spark.sql(f"CREATE DATABASE IF NOT EXISTS hive_metastore.target")
spark.sql(f"CREATE TABLE IF NOT EXISTS hive_metastore.target.target_order_tracking USING DELTA LOCATION '{target_table_path}'")

+---------+------------+-----------------+--------------------+----------------+-------------+---------------------+
|order_num|tracking_num|pck_recieved_date|package_deliver_date|          status|      address|last_update_timestamp|
+---------+------------+-----------------+--------------------+----------------+-------------+---------------------+
|     1000|     TRK1000|       2023-01-01|          2023-01-06|        Returned|   456 Oak St|  2023-01-01 05:41:55|
|     1001|     TRK1001|       2023-01-01|          2023-01-06|      In Transit|  789 Pine St|  2023-01-01 08:43:50|
|     1002|     TRK1002|       2023-01-01|          2023-01-05|Out for Delivery|   123 Elm St|  2023-01-01 17:44:29|
|     1003|     TRK1003|       2023-01-01|          2023-01-04|      In Transit|  789 Pine St|  2023-01-01 05:29:16|
|     1004|     TRK1004|       2023-01-01|          2023-01-02|         Delayed| 202 Birch Rd|  2023-01-01 13:12:49|
|     1005|     TRK1005|       2023-01-01|          2023-01-03| 

DataFrame[]