In [0]:
#Exchange Seat of 2 Students in a class

from pyspark.sql.functions import lag,lead,when,col,coalesce

#create a dataframe with different student names
data=[(1,'Alice'),(2,'Bob'),(3,'Charlie'),(4,'David'),(5,'Eve')]
df=spark.createDataFrame(data,['id','student'])

df.display()

id,student
1,Alice
2,Bob
3,Charlie
4,David
5,Eve


In [0]:
#find next and prev seat for each student

from pyspark.sql.window import Window

exchanged_df = df.withColumn("next_student",lead("student",1).over(Window.orderBy("id"))) \
        .withColumn("prev_student",lag("student",1).over(Window.orderBy("id")))


In [0]:
#exchange seat based on odd/even logic:

#coalesce -> returns first non-NULL value

exchanged_df = exchanged_df.withColumn("Exchanged_Seating",
                    when(col("id")%2==0,coalesce("prev_student",col("student"))) \
                   .when(exchanged_df["id"]%2==1,coalesce(exchanged_df["next_student"],exchanged_df["student"])) \
                    .otherwise(exchanged_df["student"]))

exchanged_df.display()

id,student,next_student,prev_student,Exchanged_Seating
1,Alice,Bob,,Bob
2,Bob,Charlie,Alice,Alice
3,Charlie,David,Bob,David
4,David,Eve,Charlie,Charlie
5,Eve,,David,Eve


In [0]:
#renaming the column:

exchanged_df = exchanged_df.withColumnRenamed("student", "Original_Seating") \
                           .drop("prev_student", "next_student")

#or exchanged_df.withColumn("student", col("Original_Seating"))

exchanged_df.display()

id,Original_Seating,Exchanged_Seating
1,Alice,Bob
2,Bob,Alice
3,Charlie,David
4,David,Charlie
5,Eve,Eve


In [0]:
#drop unwanted columns and rename columns as per requirement

exchanged_df = exchanged_df.drop("Original_Seating").withColumnRenamed("Exchanged_Seating","student")

exchanged_df.display()

id,student
1,Bob
2,Alice
3,David
4,Charlie
5,Eve


##Exchange the seat only for first 2 students:

In [0]:
#direct exchange seat of first 2 students

from pyspark.sql.window import Window

new_df = df.withColumn("next_student",lead("student",1).over(Window.orderBy("id"))) \
        .withColumn("prev_student",lag("student",1).over(Window.orderBy("id")))


new_df = new_df.withColumn("Exchanged_Seating",
                    when(col("id")==1,new_df["prev_student"])
                   .when(new_df["id"]==2,new_df["next_student"])
                   .otherwise(col("student")))

new_df = new_df.withColumnRenamed("student","Original_Seating").drop("prev_student","next_student")

new_df.display()

id,Original_Seating,Exchanged_Seating
1,Alice,
2,Bob,Charlie
3,Charlie,Charlie
4,David,David
5,Eve,Eve


##using SPARK SQL

In [0]:
#convert dataframe to TempView

df.createOrReplaceTempView("students")

In [0]:
%sql
select * from students

id,student
1,Alice
2,Bob
3,Charlie
4,David
5,Eve


In [0]:
%sql

select id,student as orginal_seating,
case 
 when id%2==1 then coalesce(lead(student,1) over (order by id),student)
 when id%2==0 then coalesce(lag(student,1) over (order by id), student)
 else student
end as exchanged_seating
from students

id,orginal_seating,exchanged_seating
1,Alice,Bob
2,Bob,Alice
3,Charlie,David
4,David,Charlie
5,Eve,Eve


##Using Spark SQL
###Exchange the seat only for first 2 students:

In [0]:
%sql

select id,student as orginal_seating,
case 
 when id%2==1 then lead(student,1) over (order by id)
 when id%2==0 then lag(student,1) over (order by id)
 else student
end as exchanged_seating
from students

id,orginal_seating,exchanged_seating
1,Alice,Bob
2,Bob,Alice
3,Charlie,David
4,David,Charlie
5,Eve,


#without using Window Function

In [0]:
%sql

select s1.*, s2.student as next_student, s3.student as prev_student
from students s1
left join students s2 on s1.id=s2.id-1
left join students s3 on s1.id=s3.id+1

id,student,next_student,prev_student
1,Alice,Bob,
2,Bob,Charlie,Alice
3,Charlie,David,Bob
4,David,Eve,Charlie
5,Eve,,David
