In [0]:
%run ../config/project_config

In [0]:
from pyspark.sql.functions import col
import pyspark.sql.functions as F

In [0]:
dbutils.widgets.text("table_name", "")

In [0]:
table = dbutils.widgets.get("table_name")

In [0]:
table_name = f"{CATALOG_NAME}.{SCHEMA_SILVER}.{table}"
df_silver = spark.table(table_name)

In [0]:
# TEST 1: Schema Standardization Check
invalid_cols = [c for c in df_silver.columns if not c.islower() or " " in c]
assert len(invalid_cols) == 0, f"❌ FAILED: Found non-standard headers:{invalid_cols}"
print("✅ PASSED: All headers are snake_case.")

✅ PASSED: All headers are snake_case.


In [0]:
# TEST 2: Null Check for Critical Keys
critical_nulls = df_silver.select([
    F.count(F.when(F.col(c).isNull(), c)).alias(c) 
    for c in ["region_name", "date"]
]).collect()[0].asDict()

print(f"Nulls in Critical Keys (Silver): {critical_nulls}")

Nulls in Critical Keys (Silver): {'region_name': 0, 'date': 0}


In [0]:
# TEST 3: Data Uniqueness Check
duplicate_count = df_silver.groupBy("region_name", "date").count().filter("count > 1").count()
assert duplicate_count == 0, f"❌ FAILED: Found {duplicate_count} duplicaterecords!"
print("✅ PASSED: Data is unique by region_name and date.")

✅ PASSED: Data is unique by region_name and date.


In [0]:
# TEST 4: Comprehensive Null Summary (All Columns)
null_summary = df_silver.select([
    F.count(F.when(F.col(c).isNull(), c)).alias(c) 
    for c in df_silver.columns
])
display(null_summary)

date,region_name,days_on_zillow_all_homes,inventory_seasonally_adjusted_all_homes,inventory_raw_all_homes,median_listing_price_per_sqft_1_bedroom,median_listing_price_per_sqft_2_bedroom,median_listing_price_per_sqft_3_bedroom,median_listing_price_per_sqft_4_bedroom,median_listing_price_per_sqft_5_bedroom_or_more,median_listing_price_per_sqft_all_homes,median_listing_price_per_sqft_condo_coop,median_listing_price_per_sqft_duplex_triplex,median_listing_price_per_sqft_single_family_residence,median_listing_price_1_bedroom,median_listing_price_2_bedroom,median_listing_price_3_bedroom,median_listing_price_4_bedroom,median_listing_price_5_bedroom_or_more,median_listing_price_all_homes,median_listing_price_condo_coop,median_listing_price_duplex_triplex,median_listing_price_single_family_residence,median_pct_of_price_reduction_all_homes,median_pct_of_price_reduction_condo_coop,median_pct_of_price_reduction_single_family_residence,median_price_cut_dollar_all_homes,median_price_cut_dollar_condo_coop,median_price_cut_dollar_single_family_residence,median_rental_price_per_sqft_1_bedroom,median_rental_price_per_sqft_2_bedroom,median_rental_price_per_sqft_3_bedroom,median_rental_price_per_sqft_4_bedroom,median_rental_price_per_sqft_5_bedroom_or_more,median_rental_price_per_sqft_all_homes,median_rental_price_per_sqft_condo_coop,median_rental_price_per_sqft_duplex_triplex,median_rental_price_per_sqft_multi_family_residence5_plus_units,median_rental_price_per_sqft_single_family_residence,median_rental_price_per_sqft_studio,median_rental_price_1_bedroom,median_rental_price_2_bedroom,median_rental_price_3_bedroom,median_rental_price_4_bedroom,median_rental_price_5_bedroom_or_more,median_rental_price_all_homes,median_rental_price_condo_coop,median_rental_price_duplex_triplex,median_rental_price_multi_family_residence5_plus_units,median_rental_price_single_family_residence,median_rental_price_studio,zhviper_sqft_all_homes,pct_of_homes_decreasing_in_values_all_homes,pct_of_homes_increasing_in_values_all_homes,pct_of_homes_selling_for_gain_all_homes,pct_of_homes_selling_for_loss_all_homes,pct_of_listings_with_price_reductions_seas_adj_all_homes,pct_of_listings_with_price_reductions_seas_adj_condo_coop,pct_of_listings_with_price_reductions_seas_adj_single_family_residence,pct_of_listings_with_price_reductions_all_homes,pct_of_listings_with_price_reductions_condo_coop,pct_of_listings_with_price_reductions_single_family_residence,price_to_rent_ratio_all_homes,sale_counts,sale_counts_seas_adj,sale_prices,zhvi_1bedroom,zhvi_2bedroom,zhvi_3bedroom,zhvi_4bedroom,zhvi_5_bedroom_or_more,zhvi_all_homes,zhvi_bottom_tier,zhvi_condo_coop,zhvi_middle_tier,zhvi_single_family_residence,zhvi_top_tier,zri_all_homes,zri_all_homes_plus_multifamily,zri_per_sqft_all_homes,zri_multi_family_residence_rental,zri_single_family_residence_rental
0,0,129031,47091,47091,148409,108674,69577,100654,130972,36344,133898,146421,37155,149191,115173,75680,106571,134449,40191,136498,146660,40441,71043,142410,71633,71043,142410,71633,144786,136639,135429,147039,153423,126481,146676,150454,135178,128023,146300,143762,135791,134823,146991,153564,127270,147063,146833,133683,128032,144795,58602,89382,89382,155470,155470,57736,139521,58207,57736,139521,58207,69541,129615,129642,139369,110104,79580,64320,71281,91078,55194,66797,113723,55194,55381,52292,50609,49801,58295,87415,49978
