In [0]:
# Databricks notebook source
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.catalog import (
    MonitorInferenceLog,
    MonitorInferenceLogProblemType,
)
from pyspark.sql import SparkSession

from utils import load_config
from pyspark.sql.types import ArrayType, DoubleType, StringType, StructField, StructType, TimestampType

In [0]:
config = load_config("../project_config.yml")
catalog_name = config.catalog_name
schema_name = config.schema_name

# Create new monitoring table with complete schema
monitoring_schema = StructType([
    StructField("timestamp", TimestampType(), True),
    StructField("timestamp_ms", DoubleType(), True),
    StructField("databricks_request_id", StringType(), True),
    StructField("execution_time_ms", DoubleType(), True),
    StructField("Id", StringType(), True),
    StructField("prediction", DoubleType(), True),
    StructField("model_name", StringType(), True),
    StructField("default", DoubleType(), True),
    # Add all feature columns
    StructField("Limit_bal", DoubleType(), True),
    StructField("Sex", DoubleType(), True),
    StructField("Education", DoubleType(), True),
    StructField("Marriage", DoubleType(), True),
    StructField("Age", DoubleType(), True),
    StructField("Pay_0", DoubleType(), True),
    StructField("Pay_2", DoubleType(), True),
    StructField("Pay_3", DoubleType(), True),
    StructField("Pay_4", DoubleType(), True),
    StructField("Pay_5", DoubleType(), True),
    StructField("Pay_6", DoubleType(), True),
    StructField("Bill_amt1", DoubleType(), True),
    StructField("Bill_amt2", DoubleType(), True),
    StructField("Bill_amt3", DoubleType(), True),
    StructField("Bill_amt4", DoubleType(), True),
    StructField("Bill_amt5", DoubleType(), True),
    StructField("Bill_amt6", DoubleType(), True),
    StructField("Pay_amt1", DoubleType(), True),
    StructField("Pay_amt2", DoubleType(), True),
    StructField("Pay_amt3", DoubleType(), True),
    StructField("Pay_amt4", DoubleType(), True),
    StructField("Pay_amt5", DoubleType(), True),
    StructField("Pay_amt6", DoubleType(), True)
])
empty_monitoring_df = spark.createDataFrame([], monitoring_schema)
empty_monitoring_df.write.format("delta").saveAsTable(f"{catalog_name}.{schema_name}.model_monitoring")

# Enable Change Data Feed for the table
spark.sql(f"ALTER TABLE {catalog_name}.{schema_name}.model_monitoring SET TBLPROPERTIES (delta.enableChangeDataFeed = true);")

[32m2025-04-04 20:52:12.642[0m | [1mINFO    [0m | [36mutils[0m:[36mload_config[0m:[36m66[0m - [1mLoaded configuration from ../project_config.yml[0m


DataFrame[]

In [0]:
workspace = WorkspaceClient()

# Create quality monitor for the table with the right schema
workspace.quality_monitors.create(
    table_name=f"{catalog_name}.{schema_name}.model_monitoring",
    assets_dir=f"/Workspace/Shared/lakehouse_monitoring/{catalog_name}.{schema_name}.model_monitoring",
    output_schema_name=f"{catalog_name}.{schema_name}",
    inference_log=MonitorInferenceLog(
        problem_type=MonitorInferenceLogProblemType.PROBLEM_TYPE_CLASSIFICATION,
        prediction_col="prediction",
        timestamp_col="timestamp",
        granularities=["30 minutes"],
        model_id_col="model_name",
        label_col="default",
    ),
)

MonitorInfo(table_name='credit.default.model_monitoring', status=<MonitorInfoStatus.MONITOR_STATUS_PENDING: 'MONITOR_STATUS_PENDING'>, monitor_version=0, profile_metrics_table_name='credit.default.model_monitoring_profile_metrics', drift_metrics_table_name='credit.default.model_monitoring_drift_metrics', assets_dir='/Workspace/Shared/lakehouse_monitoring/credit.default.model_monitoring', baseline_table_name=None, custom_metrics=[], dashboard_id=None, data_classification_config=None, inference_log=MonitorInferenceLog(timestamp_col='timestamp', granularities=['30 minutes'], model_id_col='model_name', problem_type=<MonitorInferenceLogProblemType.PROBLEM_TYPE_CLASSIFICATION: 'PROBLEM_TYPE_CLASSIFICATION'>, prediction_col='prediction', label_col='default', prediction_proba_col=None), latest_monitor_failure_msg=None, notifications=None, output_schema_name='credit.default', schedule=None, slicing_exprs=None, snapshot=None, time_series=None)

In [0]:
# spark.sql(f"DROP TABLE IF EXISTS {catalog_name}.{schema_name}.model_monitoring")

DataFrame[]

In [0]:
## How to delete a monitor
# workspace.quality_monitors.delete(
#     table_name=f"{catalog_name}.{schema_name}.model_monitoring"
# )

[0;31m---------------------------------------------------------------------------[0m
[0;31mNotFound[0m                                  Traceback (most recent call last)
File [0;32m<command-7746108655932792>, line 2[0m
[1;32m      1[0m [38;5;66;03m## How to delete a monitor[39;00m
[0;32m----> 2[0m workspace[38;5;241m.[39mquality_monitors[38;5;241m.[39mdelete(
[1;32m      3[0m     table_name[38;5;241m=[39m[38;5;124mf[39m[38;5;124m"[39m[38;5;132;01m{[39;00mcatalog_name[38;5;132;01m}[39;00m[38;5;124m.[39m[38;5;132;01m{[39;00mschema_name[38;5;132;01m}[39;00m[38;5;124m.model_monitoring[39m[38;5;124m"[39m
[1;32m      4[0m )

File [0;32m/local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.11/site-packages/databricks/sdk/service/catalog.py:7887[0m, in [0;36mQualityMonitorsAPI.delete[0;34m(self, table_name)[0m
[1;32m   7865[0m [38;5;250m[39m[38;5;124;03m"""Delete a table monitor.[39;00m
[1;32m   7866[0m [38;5;124;03m[39;00m
[1;