In [0]:
data = [
    (1, "Alice", 10, 20, 30),
    (2, "Bob",   40, 50, 60),
    (3, "Cathy", 70, 80, 90)
]

columns = ["id", "name", "math", "science", "english"]

df = spark.createDataFrame(data, columns)

df.display()

id,name,math,science,english
1,Alice,10,20,30
2,Bob,40,50,60
3,Cathy,70,80,90


In [0]:
# Unpivot using stack
unpivotDF = df.selectExpr(
    "id",
    "name",
    "stack(3, 'math', math, 'science', science, 'english', english) as (subject, score)"
)

unpivotDF.display()


id,name,subject,score
1,Alice,math,10
1,Alice,science,20
1,Alice,english,30
2,Bob,math,40
2,Bob,science,50
2,Bob,english,60
3,Cathy,math,70
3,Cathy,science,80
3,Cathy,english,90


In [0]:
from pyspark.sql.functions import lit

unpivotDF2 = (
    df.select("id", "name", lit("math").alias("subject"), df["math"].alias("score"))
    .union(df.select("id", "name", lit("science"), df["science"]))
    .union(df.select("id", "name", lit("english"), df["english"]))
)

unpivotDF2.display()


id,name,subject,score
1,Alice,math,10
2,Bob,math,40
3,Cathy,math,70
1,Alice,science,20
2,Bob,science,50
3,Cathy,science,80
1,Alice,english,30
2,Bob,english,60
3,Cathy,english,90


### ✅ Best Practice

Use stack() when you know the columns in advance (best performance).

Use dynamic stack generation (via Python list join) if column names are many.