In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

silver_df = spark.table("walmart_cat.silver.sales_cleaned")

silver_df.printSchema()
silver_df.show(5)


root
 |-- Store: integer (nullable = true)
 |-- Date: date (nullable = true)
 |-- IsHoliday: boolean (nullable = true)
 |-- Dept: integer (nullable = true)
 |-- Weekly_Sales: double (nullable = true)
 |-- ingestion_ts: timestamp (nullable = true)
 |-- Temperature: double (nullable = true)
 |-- Fuel_Price: double (nullable = true)
 |-- MarkDown1: string (nullable = true)
 |-- MarkDown2: string (nullable = true)
 |-- MarkDown3: string (nullable = true)
 |-- MarkDown4: string (nullable = true)
 |-- MarkDown5: string (nullable = true)
 |-- CPI: string (nullable = true)
 |-- Unemployment: string (nullable = true)
 |-- Type: string (nullable = true)
 |-- Size: integer (nullable = true)

+-----+----------+---------+----+------------+--------------------+-----------+----------+---------+---------+---------+---------+---------+-----------+------------+----+------+
|Store|      Date|IsHoliday|Dept|Weekly_Sales|        ingestion_ts|Temperature|Fuel_Price|MarkDown1|MarkDown2|MarkDown3|MarkDown4|Ma

In [0]:
gold_df = silver_df \
    .withColumn("year", F.year("Date")) \
    .withColumn("month", F.month("Date")) \
    .withColumn("week_of_year", F.weekofyear("Date")) \
    .withColumn("is_holiday", F.col("IsHoliday").cast("int"))


In [0]:
window_spec = Window.partitionBy("Store", "Dept").orderBy("Date")

gold_df = gold_df \
    .withColumn("lag_1_week", F.lag("Weekly_Sales", 1).over(window_spec)) \
    .withColumn("lag_2_week", F.lag("Weekly_Sales", 2).over(window_spec)) \
    .withColumn("lag_4_week", F.lag("Weekly_Sales", 4).over(window_spec))


In [0]:
rolling_window = window_spec.rowsBetween(-4, -1)

gold_df = gold_df \
    .withColumn("rolling_avg_4w", F.avg("Weekly_Sales").over(rolling_window))


In [0]:
gold_df = gold_df.dropna()


In [0]:
spark.sql("CREATE SCHEMA IF NOT EXISTS walmart_cat.gold")


DataFrame[]

In [0]:
gold_df.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("walmart_cat.gold.sales_features")


In [0]:
%sql
SELECT * FROM walmart_cat.gold.sales_features LIMIT 10;


Store,Date,IsHoliday,Dept,Weekly_Sales,ingestion_ts,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,Type,Size,year,month,week_of_year,is_holiday,lag_1_week,lag_2_week,lag_4_week,rolling_avg_4w
1,2010-03-05,False,1,21827.9,2026-01-28T13:10:42.054Z,46.5,2.625,,,,,,211.3501429,8.106,A,151315,2010,3,9,0,19403.54,41595.55,24924.5,32990.77
1,2010-03-12,False,1,21043.39,2026-01-28T13:10:42.054Z,57.79,2.667,,,,,,211.3806429,8.106,A,151315,2010,3,10,0,21827.9,19403.54,46039.49,32216.620000000003
1,2010-03-19,False,1,22136.64,2026-01-28T13:10:42.054Z,54.58,2.72,,,,,,211.215635,8.106,A,151315,2010,3,11,0,21043.39,21827.9,41595.55,25967.595
1,2010-03-26,False,1,26229.21,2026-01-28T13:10:42.054Z,51.45,2.732,,,,,,211.0180424,8.106,A,151315,2010,3,12,0,22136.64,21043.39,19403.54,21102.8675
1,2010-04-02,False,1,57258.43,2026-01-28T13:10:42.054Z,62.27,2.719,,,,,,210.8204499,7.808,A,151315,2010,4,13,0,26229.21,22136.64,21827.9,22809.285
1,2010-04-09,False,1,42960.91,2026-01-28T13:10:42.054Z,65.86,2.77,,,,,,210.6228574,7.808,A,151315,2010,4,14,0,57258.43,26229.21,21043.39,31666.9175
1,2010-04-16,False,1,17596.96,2026-01-28T13:10:42.054Z,66.32,2.808,,,,,,210.4887,7.808,A,151315,2010,4,15,0,42960.91,57258.43,22136.64,37146.2975
1,2010-04-23,False,1,16145.35,2026-01-28T13:10:42.054Z,64.84,2.795,,,,,,210.4391228,7.808,A,151315,2010,4,16,0,17596.96,42960.91,26229.21,36011.3775
1,2010-04-30,False,1,16555.11,2026-01-28T13:10:42.054Z,67.41,2.78,,,,,,210.3895456,7.808,A,151315,2010,4,17,0,16145.35,17596.96,57258.43,33490.4125
1,2010-05-07,False,1,17413.94,2026-01-28T13:10:42.054Z,72.55,2.835,,,,,,210.3399684,7.808,A,151315,2010,5,18,0,16555.11,16145.35,42960.91,23314.5825
