In [0]:
# Databricks notebook source
import dlt
from pyspark.sql.functions import *
from pyspark.sql.types import *
 
# ============================================
# DATA QUALITY RULES FOR SILVER
# ============================================
dataset_rules = {
    "R1_valid_dba_name": "DBA_Name IS NOT NULL AND DBA_Name != ''",
    "R2_valid_inspection_date": "Inspection_Date IS NOT NULL",
    "R3_valid_inspection_type": "Inspection_Type IS NOT NULL AND Inspection_Type != ''",
    "R4_valid_zip_code": "Zip_Code IS NOT NULL",
    "R5_valid_inspection_result": "Inspection_Results IS NOT NULL AND Inspection_Results != ''",
    "R6_valid_score_max": "Inspection_Score <= 100 AND Inspection_Score >= 0",
    "R7_valid_high_score_violations": "NOT (Inspection_Score >= 90 AND Violation_Count > 3)",
    "R8_no_critical_on_pass": "NOT (Inspection_Results = 'PASS' AND (is_violation_critical = true OR is_violation_urgent = true))"
}
 
# ============================================
# BRONZE TABLE: Read from raw_table
# ============================================
@dlt.table(
    name="bronze_table",
    comment="Bronze layer - Raw food inspection data from raw_table with CDC enabled",
    
)
def bronze_table():
    """
    Read from raw_table (created outside DLT pipeline)
    No transformations or quality checks - just load raw data
    """
    return spark.read.table("midterm.source1_layer.raw_table")
 
 
# ============================================
# SILVER TABLE: Cleansed data from Bronze
# ============================================
@dlt.table(
    name="silver_table",
    comment="Silver layer - Cleansed food inspection data with quality rules applied",
    
)
@dlt.expect_all_or_drop(dataset_rules)  # Apply quality rules and drop bad rows
def silver_table():
    """
    Read from bronze_table and apply data quality rules
    Bad records are dropped based on dataset_rules
    """
    return spark.read.table("LIVE.bronze_table")
 