In [0]:
"""
Create pizza topping combination table as below

Input
+------------+----+
|topping_name|cost|
+------------+----+
|   Pepperoni| 0.5|
|     Sausage| 0.7|
|     Chicken|0.55|
|Extra Cheese| 0.4|
+------------+----+


Output

"""

pizza_toppings_df = spark.createDataFrame([
    ('Pepperoni', 0.50),
    ('Sausage', 0.70),
    ('Chicken', 0.55),
    ('Extra Cheese', 0.40)
], ["topping_name", "cost"])

pizza_toppings_df.show()

+------------+----+
|topping_name|cost|
+------------+----+
|   Pepperoni| 0.5|
|     Sausage| 0.7|
|     Chicken|0.55|
|Extra Cheese| 0.4|
+------------+----+



In [0]:
from pyspark.sql.functions import *

pizza_toppings_df.orderBy("topping_name").alias("t1") \
    .join(pizza_toppings_df.alias("t2"), col("t1.topping_name") < col("t2.topping_name"), "inner") \
    .join(pizza_toppings_df.alias("t3"), col("t2.topping_name") < col("t3.topping_name"), "inner") \
    .withColumn("pizza", concat_ws(",", col("t1.topping_name"), col("t2.topping_name"), col("t3.topping_name"))) \
    .withColumn("cost_", col("t1.cost") + col("t2.cost") + col("t3.cost")) \
    .groupBy(col("pizza")).agg(sum(col("cost_")).alias("cost_")) \
    .orderBy(desc(col("cost_"))) \
    .show(truncate=False)

+------------------------------+------------------+
|pizza                         |cost_             |
+------------------------------+------------------+
|Chicken,Pepperoni,Sausage     |1.75              |
|Chicken,Extra Cheese,Sausage  |1.65              |
|Extra Cheese,Pepperoni,Sausage|1.6               |
|Chicken,Extra Cheese,Pepperoni|1.4500000000000002|
+------------------------------+------------------+



In [0]:
pizza_toppings_df.createOrReplaceTempView("pizza_toppings")

spark.sql("""
          with cte as (
            select * from pizza_toppings order by topping_name
          ), cte2 as (
          select 
            t1.topping_name as topping_name1, t2.topping_name as topping_name2, t3.topping_name as topping_name3,
            t1.cost as cost1, t2.cost as cost2, t3.cost as cost3
          from cte t1 
          inner join cte t2 on t1.topping_name < t2.topping_name
          inner join cte t3 on t2.topping_name < t3.topping_name
          )
          select 
            concat(topping_name1, "," ,topping_name2, "," ,topping_name3) as topping_name ,
            cost1 + cost2 + cost3 as cost_
          from cte2
          order by cost_ desc
          """).show(truncate=False)

+------------------------------+------------------+
|topping_name                  |cost_             |
+------------------------------+------------------+
|Chicken,Pepperoni,Sausage     |1.75              |
|Chicken,Extra Cheese,Sausage  |1.65              |
|Extra Cheese,Pepperoni,Sausage|1.6               |
|Chicken,Extra Cheese,Pepperoni|1.4500000000000002|
+------------------------------+------------------+

