In [0]:
from delta.tables import DeltaTable
from pyspark.sql.functions import lit ,col

# Load the Delta table
delta_table = DeltaTable.forName(spark, "tbl_sal")

# Get the history as a DataFrame
history_df = delta_table.history()  # This is a DataFrame


# Step 3: Extract latest and previous versions
versions = history_df.select("version").orderBy("version", ascending=False).limit(2).collect()
latest_version = versions[0]["version"]
previous_version = versions[1]["version"]

# Step 4: Read both versions of the Delta table
current_df = spark.read.format("delta").option("versionAsOf", latest_version).table("tbl_sal").withColumn("version", lit(latest_version))
prev_df = spark.read.format("delta").option("versionAsOf", previous_version).table("tbl_sal").withColumn("version", lit(previous_version))

# Step 5: Find Inserted rows
inserted_rows = current_df.exceptAll(prev_df)
print("✅ Inserted Rows:")
inserted_rows.show()

# Step 6: Find Deleted rows
deleted_rows = prev_df.exceptAll(current_df)
print("❌ Deleted Rows:")
deleted_rows.show()

join_cond = current_df["id"] == prev_df["id"]
updated_rows = current_df.alias("curr").join(
    prev_df.alias("prev"),
    on=join_cond,
    how="inner"
).where(
    (col("curr.name") != col("prev.name")) |
    (col("curr.age") != col("prev.age")) |
    (col("curr.salary") != col("prev.salary"))
).select("curr.*")

print("🔁 Updated Rows:")
updated_rows.show()


✅ Inserted Rows:
+---+------+---+------+-------+
| id|  name|age|salary|version|
+---+------+---+------+-------+
|  8|vishnu| 55| 30000|      1|
|  5| radha| 20|140000|      1|
|  3| shyam| 25|130000|      1|
|  4|  sita| 30|135000|      1|
|  1|  hari| 45|120000|      1|
|  2|   ram| 35|125000|      1|
+---+------+---+------+-------+

❌ Deleted Rows:
+---+-----+---+------+-------+
| id| name|age|salary|version|
+---+-----+---+------+-------+
|  5|radha| 20|140000|      0|
|  3|shyam| 25|130000|      0|
|  4| sita| 30|135000|      0|
|  1| hari| 45|120000|      0|
|  2|  ram| 35|125000|      0|
+---+-----+---+------+-------+

🔁 Updated Rows:
+---+----+---+------+-------+
| id|name|age|salary|version|
+---+----+---+------+-------+
+---+----+---+------+-------+

