In [0]:
import time

In [0]:
test_table = "zillow.gold.benchmarking_temp"
spark.sql(f"DROP TABLE IF EXISTS {test_table}")

spark.table("zillow.gold.stagnant_inventory_report").write.saveAsTable(test_table)

In [0]:
query = """
        SELECT * FROM zillow.gold.stagnant_inventory_report 
        WHERE state_name = 'Alaska' 
        ORDER BY last_sale_date DESC
    """

In [0]:
print("Running Pre-Optimization benchmark...")
start = time.time()
spark.sql(query).collect()
pre_time = time.time() - start

# 2. Apply Optimization: Z-Order by the filter and sort columns
print("Applying Z-Order optimization on state_name andlast_sale_date...")
spark.sql("OPTIMIZE zillow.gold.stagnant_inventory_report ZORDER BY(state_name)")

# 3. Post-optimization: Measure performance after data skipping isenabled
print("Running Post-Optimization benchmark...")
start = time.time()
spark.sql(query).collect()
post_time = time.time() - start

# 4. Final Results for Deliverables
improvement = ((pre_time - post_time) / pre_time) * 100
print(f"\n--- Serverless Performance Results ---")
print(f"Initial Scan: {pre_time:.2f}s")
print(f"Optimized Scan: {post_time:.2f}s")
print(f"Efficiency Gain: {improvement:.2f}%")

Running Pre-Optimization benchmark...
Applying Z-Order optimization on state_name andlast_sale_date...
Running Post-Optimization benchmark...

--- Serverless Performance Results ---
Initial Scan: 0.63s
Optimized Scan: 0.59s
Efficiency Gain: 6.35%
