In [0]:
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [0]:
schema = T.StructType([
    T.StructField("id", T.IntegerType(), True),
    T.StructField("name", T.StringType(), True),
    T.StructField("values", T.ArrayType(
        T.StructType([
            T.StructField("val1", T.IntegerType(), True),
            T.StructField("val2", T.IntegerType(), True)
        ])
    ), True)
])

In [0]:
data = [(1, "John", [{"val1": 1, "val2": 2}, {"val1": 3, "val2": 4}]),
        (2, "Jane", [{"val1": 5, "val2": 6}, {"val1": 7, "val2": 8}]),
        (3, "Bob", [{"val1": 9, "val2": 10}, {"val1": 11, "val2": 12}])]

# Create the DataFrame
df = spark.createDataFrame(data, schema)

In [0]:
df.display()

id,name,values
1,John,"List(List(1, 2), List(3, 4))"
2,Jane,"List(List(5, 6), List(7, 8))"
3,Bob,"List(List(9, 10), List(11, 12))"


In [0]:
df_explode = df.select(F.col("id"), F.col("name"), F.explode(F.col("values")).alias("dictionary_data"))

In [0]:
df_explode.display()

id,name,dictionary_data
1,John,"List(1, 2)"
1,John,"List(3, 4)"
2,Jane,"List(5, 6)"
2,Jane,"List(7, 8)"
3,Bob,"List(9, 10)"
3,Bob,"List(11, 12)"


In [0]:
df_parse = (df_explode \
            .select(F.col("*"), \
                              F.col("dictionary_data.val1").alias("value_1"), F.col("dictionary_data.val2").alias("value_2")) \
            .drop("dictionary_data")
           )

In [0]:
df_parse.display()

id,name,value_1,value_2
1,John,1,2
1,John,3,4
2,Jane,5,6
2,Jane,7,8
3,Bob,9,10
3,Bob,11,12
