In [None]:
%md
# Kardiaflow reset

Drops Delta tables, deletes storage and checkpoints (bronze, silver, raw/source, enriched),
removes databases, and clears the Spark catalog cache to provide a clean slate.

In [None]:
from kflow.config import bronze_paths, silver_paths
from kflow.auth_adls import ensure_adls_oauth

# Use the Hive metastore (no Unity Catalog)
spark.sql("USE CATALOG hive_metastore")

# 1. Configure Spark with ADLS OAuth credentials
ensure_adls_oauth()


def safe_rm(path: str, description: str) -> None:
    """Remove a path via dbutils.fs.rm. Log and continue on error."""
    try:
        dbutils.fs.rm(path, recurse=True)
        print(f"Removed {description}: {path}")
    except Exception as e:
        print(f"  (ignore) failed removing {description} {path}: {e}")


def safe_drop_table(full_name: str) -> None:
    """Drop a table: try PURGE first, then regular DROP. Clear cache beforehand."""
    # Clear catalog cache
    try:
        spark.catalog.clearCache()
    except Exception:
        pass

    print(f"Dropping table {full_name}")

    # Remove data and metadata
    try:
        spark.sql(f"DROP TABLE IF EXISTS {full_name} PURGE")
        print(f"Dropped table {full_name} with PURGE")
        return
    except Exception as e:
        print(f"  (ignore) PURGE failed for {full_name}: {e}")

    # Fallback: regular DROP
    try:
        spark.sql(f"DROP TABLE IF EXISTS {full_name}")
        print(f"Dropped table {full_name} without PURGE")
    except Exception as e2:
        print(f"  (ignore) failed dropping {full_name}: {e2}")


# 2. Drop known tables from Bronze/Silver
to_drop = [
    "kardia_bronze.bronze_encounters",
    "kardia_bronze.bronze_claims",
    "kardia_bronze.bronze_patients",
    "kardia_bronze.bronze_providers",
    "kardia_bronze.bronze_feedback",
    "kardia_silver.silver_encounters",
    "kardia_silver.silver_claims",
    "kardia_silver.silver_patients",
    "kardia_silver.silver_providers",
    "kardia_silver.silver_feedback",
    "kardia_silver.silver_encounters_enriched"
]
for full_name in to_drop:
    safe_drop_table(full_name)


# 3. Remove storage and checkpoints per dataset
datasets = ("encounters", "claims", "patients", "providers", "feedback")
for name in datasets:
    # Bronze
    P = bronze_paths(name)
    print(f"\nCleaning bronze layer for '{name}':")
    safe_rm(P.bronze, f"bronze data for {name}")
    safe_rm(P.checkpoint, f"bronze checkpoint for {name}")

    # Raw / source
    print(f"Cleaning raw/source layer for '{name}':")
    safe_rm(P.raw, f"raw/source data for {name}")

    # Silver
    S = silver_paths(name)
    print(f"Cleaning silver layer for '{name}':")
    safe_rm(S.path, f"silver data for {name}")
    safe_rm(S.checkpoint, f"silver checkpoint for {name}")


# 4. Remove enriched target
print("\nCleaning enriched target 'encounters_enriched':")
S_enriched = silver_paths("encounters_enriched")
safe_rm(S_enriched.path, "enriched path")
safe_rm(S_enriched.checkpoint, "enriched checkpoint")


# 5. Drop databases to remove namespaces
print("\nDropping databases (if they exist):")
for db in ("kardia_bronze", "kardia_silver", "kardia_gold"):
    try:
        spark.sql(f"DROP DATABASE IF EXISTS {db} CASCADE")
        print(f"Dropped database {db}")
    except Exception as e:
        print(f" (ignore) failed dropping database {db}: {e}")


# 5. Clear Spark catalog cache (current session only)
print("\nClearing Spark catalog cache.")
try:
    spark.catalog.clearCache()
except Exception as e:
    print(f" (ignore) failed clearing cache: {e}")

print("\nReset complete.")