In [None]:
ddl_statements = [
    """CREATE SCHEMA IF NOT EXISTS datamodel_db""",
    """
CREATE TABLE IF NOT EXISTS datamodel_db.episode (
  episode_id STRING NOT NULL,
  episode_version INT NOT NULL,
  created_at TIMESTAMP,
  status STRING NOT NULL,
  source STRING,
  source_ref STRING,
  inputs_json STRING,
  expected_outputs_json STRING,
  actual_outputs_json STRING,
  metadata_json STRING,
  input_tokens BIGINT,
  output_tokens BIGINT,
  total_tokens BIGINT,
  CONSTRAINT episode_status_check CHECK (status IN ('completed', 'failed', 'partial'))
)
USING DELTA
""",
    """
CREATE TABLE IF NOT EXISTS datamodel_db.episode_steps (
  episode_id STRING NOT NULL,
  episode_version INT NOT NULL,
  step_index INT NOT NULL,
  step_type STRING,
  content STRING,
  created_at TIMESTAMP,
  metadata_json STRING,
  CONSTRAINT step_index_nonnegative CHECK (step_index >= 0)
)
USING DELTA
""",
    """
CREATE TABLE IF NOT EXISTS datamodel_db.episode_evaluation (
  evaluation_id STRING NOT NULL,
  episode_id STRING NOT NULL,
  episode_version INT NOT NULL,
  evaluated_at TIMESTAMP,
  evaluator_name STRING,
  evaluator_version STRING,
  mlflow_run_id STRING,
  match_outcome STRING,
  overall_score DOUBLE,
  drift_score DOUBLE,
  coherence_score DOUBLE,
  idempotency_score DOUBLE,
  metrics_json STRING,
  CONSTRAINT match_outcome_check CHECK (match_outcome IN ('match', 'mismatch', 'undetermined'))
)
USING DELTA
""",
]

try:
    spark
except NameError as e:
    raise RuntimeError('This notebook must be run on a Spark cluster (Databricks) with an active `spark` session.') from e

for stmt in ddl_statements:
    spark.sql(stmt)

print('âœ“ Created schema and tables in datamodel_db')

In [None]:
spark.sql('SHOW TABLES IN datamodel_db').show(truncate=False)
spark.sql('DESCRIBE TABLE datamodel_db.episode').show(truncate=False)