In [0]:
# Databricks Notebook: 04_CDC_Tracking_EV
# Track changes in EV Silver table over time

from pyspark.sql import functions as F
from datetime import datetime, timedelta

silver_path = "/mnt/cti/silver/ev_data"
cdc_path    = "/mnt/cti/gold/ev_data/cdc_ev_changes"

print("✅ Setup complete")

# Read CDC for last 1 hour
print("\n🔍 Reading changes from last 1 hour...")

start_time = (datetime.now() - timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S")

try:
    df_changes = (
        spark.read
        .format("delta")
        .option("readChangeFeed", "true")
        .option("startingTimestamp", start_time)
        .load(silver_path)
    )

    change_count = df_changes.count()
    print(f"✅ Found {change_count} changes")

    if change_count > 0:
        print("\n📊 Change Types:")
        df_changes.groupBy("_change_type").count().show()

        print("\n📋 Sample Changes (EV fields):")
        df_changes.select(
            "_change_type",
            "record_id",
            "Make",
            "Model",
            "Electric_Range",
            "ingestion_timestamp"
        ).show(20, truncate=False)

        df_changes.write.format("delta").mode("append").save(cdc_path)
        print("💾 Saved to CDC history table")
    else:
        print("ℹ️ No EV changes in the last hour")

except Exception as e:
    print(f"⚠️ CDC not enabled or no changes: {e}")

print("\n🎉 EV CDC Tracking Complete!")


✅ Setup complete

🔍 Reading changes from last 1 hour...
⚠️ CDC not enabled or no changes: [DELTA_MISSING_CHANGE_DATA] Error getting change data for range [0 , 1] as change data was not
recorded for version [0]. If you've enabled change data feed on this table,
use `DESCRIBE HISTORY` to see when it was first enabled.
Otherwise, to start recording change data, use `ALTER TABLE table_name SET TBLPROPERTIES
(delta.enableChangeDataFeed=true)`.

🎉 EV CDC Tracking Complete!
