Example to explode/stack multiple columns in spark

In [2]:
import pandas as pd
df = pd.DataFrame({
                   'fruits':['apple','apple','apple', 'apple'],
                   'vegetables':['carrot', 'carrot', 'carrot', 'carrot'],
                   'fruit_rank':[1, 1, 1, 1],
                   'vegetable_rank':[2, 2, 2, 2],
                   'other':['test1', 'test2', 'test3', 'test4'],
})

df = spark.createDataFrame(df)

In [3]:
display(df)

fruits,vegetables,fruit_rank,vegetable_rank,other
apple,carrot,1,2,test1
apple,carrot,1,2,test2
apple,carrot,1,2,test3
apple,carrot,1,2,test4


In [4]:
from pyspark.sql.functions import arrays_zip, col, explode, array, posexplode, expr

# Explodes ['fruit', 'vegetable'] and ['fruit_rank', 'vegetable_rank'] to food/rank columns.
df_explode = df \
    .withColumn('food', array('fruits', 'vegetables')) \
    .withColumn('rank', array('fruit_rank', 'vegetable_rank')) \
    .withColumn('explode', explode(arrays_zip('food', 'rank'))) \
    .drop('food', 'rank') \
    .select('explode.food', 'explode.rank', 'other')

display(df_explode)

food,rank,other
apple,1,test1
carrot,2,test1
apple,1,test2
carrot,2,test2
apple,1,test3
carrot,2,test3
apple,1,test4
carrot,2,test4
