In [0]:
%run ../../config/project_config

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import col

In [0]:
df_fact = spark.table(f"{CATALOG_NAME}.{SCHEMA_GOLD}.fact_metrics")

In [0]:
# Identify "Stagnant Markets" (No sales in the last 3 months)

latest_date = df_fact.select(F.max("date")).collect()[0][0]
stagnant_threshold = F.add_months(F.lit(latest_date), -2)

stagnant_counties = df_fact.groupBy("region_name") \
    .agg(F.max("date").alias("last_sale_date"), 
         F.sum("sale_counts").alias("total_recent_sales")) \
    .filter((F.col("last_sale_date") < stagnant_threshold) | (F.col("total_recent_sales") == 0))

In [0]:
# Join with dimensions to make the report readable
stagnant_report_final = stagnant_counties.join(
    spark.table(f"{CATALOG_NAME}.{SCHEMA_GOLD}.dim_location"), 
    "region_name"
).select(
    "region_name", "county_name", "state_name", 
    "last_sale_date", "total_recent_sales"
)

In [0]:
final_df = stagnant_report_final.withColumn("last_sale_date", col("last_sale_date").cast("string"))

In [0]:
final_df.write.mode("overwrite").saveAsTable(f"{CATALOG_NAME}.{SCHEMA_GOLD}.stagnant_inventory_report")