# Explode

In [None]:
# What is explode in PySpark?
# explode is used to flatten an array or map column into multiple rows.
# If you have a column containing an array or map, explode will create a new row for each element of the array (or each key-value pair in a map).
# Very useful when dealing with nested data (like JSONs or arrays from split())

In [1]:
from pyspark.sql.functions import explode, split, col
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('exploade').getOrCreate()
data = [("Alice,25,USA",), ("Bob,30,UK",)]
df = spark.createDataFrame(data, ["info"])


In [2]:
df_array = df.withColumn("info_array", split(col("info"), ","))
df_array.show(truncate=False)


+------------+----------------+
|info        |info_array      |
+------------+----------------+
|Alice,25,USA|[Alice, 25, USA]|
|Bob,30,UK   |[Bob, 30, UK]   |
+------------+----------------+



In [3]:
df_exploded = df_array.withColumn("info_element", explode(col("info_array")))
df_exploded.show()


+------------+----------------+------------+
|        info|      info_array|info_element|
+------------+----------------+------------+
|Alice,25,USA|[Alice, 25, USA]|       Alice|
|Alice,25,USA|[Alice, 25, USA]|          25|
|Alice,25,USA|[Alice, 25, USA]|         USA|
|   Bob,30,UK|   [Bob, 30, UK]|         Bob|
|   Bob,30,UK|   [Bob, 30, UK]|          30|
|   Bob,30,UK|   [Bob, 30, UK]|          UK|
+------------+----------------+------------+



In [3]:
df1 = df.groupBy('Type').agg(collect_list('id').alias("id_list"))
df1.show()
df1.withColumn("id's",explode('id_list'))\
   .select("Type","id's")\
   .show()
# df.show()

+----+---------+
|Type|  id_list|
+----+---------+
|   A|[1, 2, 3]|
|   B|[1, 2, 1]|
+----+---------+

+----+----+
|Type|id's|
+----+----+
|   A|   1|
|   A|   2|
|   A|   3|
|   B|   1|
|   B|   2|
|   B|   1|
+----+----+

