### Example

![](Images/75/75 Example.jpg)

### Create Sample Dataframe

In [0]:
# df = spark.createDataFrame(sc.parallelize([['ABC', [1, 2, 3]], ['XYZ', [2, None, 4]], ['KLM', [8, 7]], ['IJK', [5]]]), ["key", "value"])
# df.display()

data = [
    ['ABC', [1, 2, 3]],
    ['XYZ', [2, None, 4]],
    ['KLM', [8, 7]],
    ['IJK', [5]]
]
df = spark.createDataFrame(data, ["key", "value"])
display(df)

### Split Array Values into Separate Columns

In [0]:
# df.select("key", df.value[0], df.value[1], df.value[2]).display()

from pyspark.sql.functions import col

from pyspark.sql.functions import expr

df.select(
    "key",
    expr("get(value, 0)").alias("value[0]"),
    expr("get(value, 1)").alias("value[1]"),
    expr("get(value, 2)").alias("value[2]")
).display()

### How to Automate this Solution?

### Determine the Size of Each Array

In [0]:
from pyspark.sql.functions import size, col

dfSize = df.select("key", "value", size("value").alias("noOfArrayElements"))
display(dfSize)

### Get the Maximum Size of All Arrays

In [0]:
# max_value = dfSize.agg({"noOfArrayElements": "max"}).collect()[0]['max(noOfArrayElements)']
max_value = dfSize.agg({"noOfArrayElements": "max"}).collect()[0][0]
print(max_value)

### UDF to Convert Array Elements into Columns

In [0]:
def arraySplitIntoCols(df, maxElements):
    for i in range(maxElements):
        df = df.withColumn(
            f"new_col_{i}",
            expr(f"get(value, {i})")
        )
    return df

### UDF Call

In [0]:
dfout = arraySplitIntoCols(df, max_value)
display(dfout)