In [0]:
"""
    1. What is the % of sales each month based on the last 6 months sales
    2. What is the % of loss or gain based on previous month sale
"""

product_df = spark.createDataFrame(
    [
        (1,"iphone","01-01-2023",1500000),
        (2,"samsung","01-01-2023",1100000),
        (3,"oneplus","01-01-2023",1100000),
        (1,"iphone","01-02-2023",1300000),
        (2,"samsung","01-02-2023",1120000),
        (3,"oneplus","01-02-2023",1120000),
        (1,"iphone","01-03-2023",1600000),
        (2,"samsung","01-03-2023",1080000),
        (3,"oneplus","01-03-2023",1160000),
        (1,"iphone","01-04-2023",1700000),
        (2,"samsung","01-04-2023",1800000),
        (3,"oneplus","01-04-2023",1170000),
        (1,"iphone","01-05-2023",1200000),
        (2,"samsung","01-05-2023",980000),
        (3,"oneplus","01-05-2023",1175000),
        (1,"iphone","01-06-2023",1100000),
        (2,"samsung","01-06-2023",1100000),
        (3,"oneplus","01-06-2023",1200000)
    ], 
    ["product_id", "product_name", "sales_date", "sales"]
)

product_df.show()

+----------+------------+----------+-------+
|product_id|product_name|sales_date|  sales|
+----------+------------+----------+-------+
|         1|      iphone|01-01-2023|1500000|
|         2|     samsung|01-01-2023|1100000|
|         3|     oneplus|01-01-2023|1100000|
|         1|      iphone|01-02-2023|1300000|
|         2|     samsung|01-02-2023|1120000|
|         3|     oneplus|01-02-2023|1120000|
|         1|      iphone|01-03-2023|1600000|
|         2|     samsung|01-03-2023|1080000|
|         3|     oneplus|01-03-2023|1160000|
|         1|      iphone|01-04-2023|1700000|
|         2|     samsung|01-04-2023|1800000|
|         3|     oneplus|01-04-2023|1170000|
|         1|      iphone|01-05-2023|1200000|
|         2|     samsung|01-05-2023| 980000|
|         3|     oneplus|01-05-2023|1175000|
|         1|      iphone|01-06-2023|1100000|
|         2|     samsung|01-06-2023|1100000|
|         3|     oneplus|01-06-2023|1200000|
+----------+------------+----------+-------+



In [0]:
# 1. What is the % of sales each month based on the last 6 months sales
from pyspark.sql.window import Window
from pyspark.sql.functions import *

# Need to pass input format then Spark will convert it to yyyy-MM-dd, no need to pass expected format
formatted_sales_date = product_df.withColumn("formatted_sales_dt", to_date(product_df["sales_date"], "dd-MM-yyyy"))
formatted_sales_date.withColumn("mnth", month(col("formatted_sales_dt")))\
    .groupBy("mnth").agg(sum("sales").alias("monthly_total_sales"))\
    .withColumn("last_6_months_sales", lag(col("monthly_total_sales"), 5).over(Window.orderBy("mnth")))\
    .withColumn("perc_sales_off_6_months", round(100 * (col("monthly_total_sales")-col("last_6_months_sales")) / col("monthly_total_sales"), 2 ))\
    .show()

+----+-------------------+-------------------+-----------------------+
|mnth|monthly_total_sales|last_6_months_sales|perc_sales_off_6_months|
+----+-------------------+-------------------+-----------------------+
|   1|            3700000|               null|                   null|
|   2|            3540000|               null|                   null|
|   3|            3840000|               null|                   null|
|   4|            4670000|               null|                   null|
|   5|            3355000|               null|                   null|
|   6|            3400000|            3700000|                  -8.82|
+----+-------------------+-------------------+-----------------------+



In [0]:
# 2. What is the % of loss or gain based on previous month sale
from pyspark.sql.window import Window
from pyspark.sql.functions import *

product_df.withColumn("prev_month_sale", lag(col("sales"), 1).over(Window.partitionBy("product_name").orderBy("sales_date")))\
    .withColumn("perc_gain_loss", round(100 * (col("sales")-col("prev_month_sale"))/col("sales"), 2))\
    .show()

+----------+------------+----------+-------+---------------+--------------+
|product_id|product_name|sales_date|  sales|prev_month_sale|perc_gain_loss|
+----------+------------+----------+-------+---------------+--------------+
|         1|      iphone|01-01-2023|1500000|           null|          null|
|         1|      iphone|01-02-2023|1300000|        1500000|        -15.38|
|         1|      iphone|01-03-2023|1600000|        1300000|         18.75|
|         1|      iphone|01-04-2023|1700000|        1600000|          5.88|
|         1|      iphone|01-05-2023|1200000|        1700000|        -41.67|
|         1|      iphone|01-06-2023|1100000|        1200000|         -9.09|
|         3|     oneplus|01-01-2023|1100000|           null|          null|
|         3|     oneplus|01-02-2023|1120000|        1100000|          1.79|
|         3|     oneplus|01-03-2023|1160000|        1120000|          3.45|
|         3|     oneplus|01-04-2023|1170000|        1160000|          0.85|
|         3|