## Pipeline Parameters

In [0]:
dbutils.widgets.text("input_load_date", "YYYY-MM-DD", "Input Load Date")
input_load_date = dbutils.widgets.get("input_load_date")

In [0]:
# Standard library imports
import os

# Third-party library imports
from dotenv import load_dotenv
from pyspark.testing import assertDataFrameEqual

In [0]:
load_dotenv()

catalog_name = os.getenv('DATABRICKS_CATALOG_NAME')
schema_name = os.getenv('DATABRICKS_SCHEMA_NAME')

In [0]:
print(f'Validating data for date: {input_load_date}')
dq_checks = spark.sql(
    f"""
    WITH data_quality_checks AS (
    SELECT 
        -- Unique_identifier: ticker_symbol + trading_date
        COUNT(DISTINCT ticker_symbol, trading_date) = COUNT(*) AS is_unique_check
        -- All price columns should contain positive values
        , COUNT(CASE WHEN open_price <= 0 OR close_price <= 0 OR highest_price <= 0 OR lowest_price <= 0 THEN 1 END) = 0 AS is_prices_postive_check
    FROM {catalog_name}.{schema_name}.kdayno_bronze_SP500_stock_prices
    WHERE trading_date = '{input_load_date}'
    )

    SELECT 
        is_unique_check AND is_prices_postive_check AS all_dq_checks_passed
    FROM data_quality_checks
    """)

In [0]:
expected_results = spark.createDataFrame(data=[(True,)], schema=['all_dq_checks_passed'])

assertDataFrameEqual(expected_results, dq_checks)  # If successful, all data quality checks passed