In [1]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.storagelevel import StorageLevel
from pyspark.sql.types import *

In [2]:
df= spark.read.csv('/FileStore/tables/timetravel1.csv', inferSchema=True,header=True) #read sample data from location

In [3]:
df.printSchema() #verify schema

In [4]:
df= df.withColumn("Sale1", F.col("Sale").cast("float")).drop("Sale").persist(StorageLevel.MEMORY_AND_DISK) #changed Sale column from integer to float. New column: Sale1. AND persisted to memory for faster in-memory computations

# 6 new columns will be engineered to provide historical context and granularity to Sales. Sum over last4weeks, last13weeks, last52weeks. First three columns will be agg by type and coupon. Last three will agg by TOTAL SALES combined.

In [6]:
days = lambda i: i * 86400
w1 = (Window().partitionBy(F.col("Type"),F.col("Coupon")).orderBy(F.col("Date").cast('long')).rangeBetween(-days(27), 0))
w2 = (Window().partitionBy(F.col("Type"),F.col("Coupon")).orderBy(F.col("Date").cast('long')).rangeBetween(-days(90), 0))
w3 = (Window().partitionBy(F.col("Type"),F.col("Coupon")).orderBy(F.col("Date").cast('long')).rangeBetween(-days(364), 0))
w4 = (Window().orderBy(F.col("Date").cast('long')).rangeBetween(-days(27), 0))
w5 = (Window().orderBy(F.col("Date").cast('long')).rangeBetween(-days(90), 0))
w6 = (Window().orderBy(F.col("Date").cast('long')).rangeBetween(-days(364), 0))
df= df.withColumn("Sales-Last4weeks-by-C-S", F.sum("Sale1").over(w1))\
  .withColumn("Sales-Last13weeks-by-C-S", F.sum("Sale1").over(w2))\
  .withColumn("Sales-Last52weeks-by-C-S", F.sum("Sale1").over(w3))\
  .withColumn("Sales-Last4weeks-TOTAL", F.sum("Sale1").over(w4))\
  .withColumn("Sales-Last13weeks-TOTAL", F.sum("Sale1").over(w5))\
  .withColumn("Sales-Last52weeks-TOTAL", F.sum("Sale1").over(w6)).orderBy(df.Date.desc())
df.show(10)

In [7]:
df.createOrReplaceTempView("timetravel")

In [8]:
spark.sql("Select * from timetravel where Date Between '2019-06-01' AND '2019-09-01'").show()