In [0]:
%run ../../config/project_config

In [0]:
from pyspark.sql import functions as F

In [0]:
df_fact = spark.table(f"{CATALOG_NAME}.{SCHEMA_GOLD}.fact_metrics")

In [0]:
# Identify "Stagnant Markets" (No sales in the last 3 months)

latest_date = df_fact.select(F.max("date")).collect()[0][0]
stagnant_threshold = F.add_months(F.lit(latest_date), -2)

stagnant_counties = df_fact.groupBy("region_name") \
    .agg(F.max("date").alias("last_sale_date"), 
         F.sum("sale_counts").alias("total_recent_sales")) \
    .filter((F.col("last_sale_date") < stagnant_threshold) | (F.col("total_recent_sales") == 0))

In [0]:
# Join with dimensions to make the report readable
stagnant_report_final = stagnant_counties.join(
    spark.table(f"{CATALOG_NAME}.{SCHEMA_GOLD}.dim_location"), 
    "region_name"
).select(
    "region_name", "county_name", "state_name", 
    "last_sale_date", "total_recent_sales"
)

In [0]:
display(stagnant_report_final)

region_name,county_name,state_name,last_sale_date,total_recent_sales
21003,Allen,Kentucky,2017-12-31,0.0
19033,Cerro Gordo,Iowa,2017-12-31,0.0
23013,Knox,Maine,2017-12-31,0.0
46135,Yankton,South Dakota,2017-12-31,0.0
28021,Claiborne,Mississippi,2017-12-31,0.0
1033,Colbert,Alabama,2017-12-31,0.0
47151,Scott,Tennessee,2017-12-31,0.0
55073,Marathon,Wisconsin,2017-12-31,0.0
1007,Bibb,Alabama,2017-12-31,0.0
54087,Roane,West Virginia,2017-12-31,0.0


In [0]:
stagnant_report_final.write.mode("overwrite").saveAsTable(f"{CATALOG_NAME}.{SCHEMA_GOLD}.stagnant_inventory_report")