In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType
from pyspark.sql.window import Window
from pyspark.sql.functions import lag, col, expr

# Initialize Spark session
spark = SparkSession.builder \
    .appName("Create and Insert Data into Table") \
    .getOrCreate()

# Define the schema for tableA
schema = StructType([
    StructField("Tenant", StringType(), True),
    StructField("Year", IntegerType(), True),
    StructField("sales", FloatType(), True)
])

# Sample data
data = [
    ('tenant 1', 2014, 2000.00),
    ('tenant 1', 2015, 5000.00),
    ('tenant 2', 2013, 1000.00),
    ('tenant 2', 2014, 1500.00),
    ('tenant 2', 2015, 800.00)
]

# Create a DataFrame from the data and schema
df = spark.createDataFrame(data, schema)

# Create tableA in Spark SQL
df.createOrReplaceTempView("tableA")

# Display the contents of tableA
df.display()

Tenant,Year,sales
tenant 1,2014,2000.0
tenant 1,2015,5000.0
tenant 2,2013,1000.0
tenant 2,2014,1500.0
tenant 2,2015,800.0


In [0]:
# Define the window specification for the LAG function
window_spec = Window.partitionBy("Tenant").orderBy("Year")

# Add the last year's sales using the LAG function
df_with_lag = df.withColumn("last_year_sales", lag("sales", 1).over(window_spec))

# Calculate the YoY percentage
df_with_yoy = df_with_lag.withColumn("YoY", 
    expr("((sales - last_year_sales) / last_year_sales) * 100"))

# Display the final DataFrame with YoY
df_with_yoy.display()

Tenant,Year,sales,last_year_sales,YoY
tenant 1,2014,2000.0,,
tenant 1,2015,5000.0,2000.0,150.0
tenant 2,2013,1000.0,,
tenant 2,2014,1500.0,1000.0,50.0
tenant 2,2015,800.0,1500.0,-46.66666666666666


In [0]:
%sql
WITH CTE AS (
    SELECT 
        Tenant,
        Year,
        Sales,
        LAG(Sales, 1) OVER (PARTITION BY Tenant ORDER BY Year) AS last_year_sales
    FROM 
        tableA
)
SELECT 
    Tenant,
    Year,
    Sales,
    last_year_sales,
    round(((Sales - last_year_sales) / last_year_sales),4) * 100 AS YoY
FROM 
    CTE;


Tenant,Year,Sales,last_year_sales,YoY
tenant 1,2014,2000.0,,
tenant 1,2015,5000.0,2000.0,150.0
tenant 2,2013,1000.0,,
tenant 2,2014,1500.0,1000.0,50.0
tenant 2,2015,800.0,1500.0,-46.67
