In [0]:
%pip install databricks-labs-dqx
dbutils.library.restartPython()

In [0]:
from databricks.labs.dqx.engine import DQEngine
from databricks.labs.dqx.profiler.profiler import DQProfiler
from databricks.labs.dqx.profiler.generator import DQGenerator
from databricks.sdk import WorkspaceClient

In [0]:
tables = {
    "tables_abilities": spark.read.table("02_silver.staging.warcraftlogs_tables_abilities"),
    "tables_damage_abilities": spark.read.table("02_silver.staging.warcraftlogs_tables_damage_abilities"),
    "tables_damage_abilities_nested": spark.read.table("02_silver.staging.warcraftlogs_tables_damage_abilities_nested"),
    "tables_damage_sources": spark.read.table("02_silver.staging.warcraftlogs_tables_damage_sources"),
    "tables_events": spark.read.table("02_silver.staging.warcraftlogs_tables_events"),
    "tables_gear": spark.read.table("02_silver.staging.warcraftlogs_tables_gear"),
    "tables_healing_abilities": spark.read.table("02_silver.staging.warcraftlogs_tables_healing_abilities"),
    "tables_pets": spark.read.table("02_silver.staging.warcraftlogs_tables_pets"),
    "tables_healing_sources": spark.read.table("02_silver.staging.warcraftlogs_tables_healing_sources"),
    "tables_summary": spark.read.table("02_silver.staging.warcraftlogs_tables_summary"),
    "tables_talents": spark.read.table("02_silver.staging.warcraftlogs_tables_talents"),
    "tables_targets": spark.read.table("02_silver.staging.warcraftlogs_tables_targets"),
}

In [0]:
ws = WorkspaceClient()
profiler = DQProfiler(ws)
generator = DQGenerator(ws)
engine = DQEngine(spark)

In [0]:
for name, df in tables.items():
    # Profile
    _, profiles = profiler.profile(df)
    all_checks = generator.generate_dq_rules(profiles)

    # Remove problematic checks
    checks = [
        c for c in all_checks
        if c.get("check", {}).get("function") != "is_in_range"
        and not (
            c.get("check", {}).get("function") == "is_in_list"
        )
    ]

    # Validate
    valid_df, quarantine_df = engine.apply_checks_by_metadata_and_split(df, checks)

    # Save
    engine.save_results_in_table(
        output_df=valid_df,
        quarantine_df=quarantine_df,
        output_table=f"02_silver.warcraftlogs.{name}",
        quarantine_table=f"02_silver.dq_monitoring.warcraftlogs_quarantine_{name}"
    )

    # Clean staging area
    spark.sql(f"""DROP TABLE IF EXISTS 02_silver.staging.warcraftlogs_{name}""")