In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,StringType,IntegerType
from pyspark.sql.functions import col, sum
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("app").master("local[3]").getOrCreate()

In [0]:
schema = [ 'person_id' , 'person_name' , 'weight' , 'turn' ]
data = [
( 5         , 'Alice'       , 250    , 1 )   ,
( 4         , 'Bob'         , 175    , 5 )   ,
( 3         , 'Alex'        , 350    , 2 )   ,
( 6         , 'John Cena'   , 400    , 3 )   ,
( 1         , 'Winston'     , 500    , 6 )   ,
( 2         , 'Marie'       , 200    , 4 )   
]
queue = spark.createDataFrame(data,schema)
queue.show()

+---------+-----------+------+----+
|person_id|person_name|weight|turn|
+---------+-----------+------+----+
|        5|      Alice|   250|   1|
|        4|        Bob|   175|   5|
|        3|       Alex|   350|   2|
|        6|  John Cena|   400|   3|
|        1|    Winston|   500|   6|
|        2|      Marie|   200|   4|
+---------+-----------+------+----+



In [0]:
# There is a queue of people waiting to board a bus. However, the bus has a weight limit of 1000 kilograms, so there may be some people who cannot board.
# Write a solution to find the person_name of the last person that can fit on the bus without exceeding the weight limit. The test cases are generated such that the first person does not exceed the weight limit.

window_spec = Window.orderBy("turn")
queue.withColumn("total_weight",sum("weight").over(window_spec)).filter(col("total_weight")<=1000).orderBy(col("total_weight").desc()).select("person_name").limit(1).show()

+-----------+
|person_name|
+-----------+
|  John Cena|
+-----------+



In [0]:
queue.createOrReplaceTempView("queue")
spark.sql("""
          with cte as (
              select person_name, sum(weight) over(order by turn) total_weight from queue order by turn
          )
          select person_name from cte where total_weight<=1000 order by total_weight desc limit 1 
          """).show()

+-----------+
|person_name|
+-----------+
|  John Cena|
+-----------+



In [0]:
spark.stop()