In [1]:
import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('spark').getOrCreate()

Data = [
  ("James",[["Java","Scala","C++"],["Spark","Java"]]),
  ("Michael",[["Spark","Java","C++"],["Spark","Java"]]),
  ("Robert",[["CSharp","VB"],["Spark","Python"]])
]

df = spark.createDataFrame(data=Data,schema=['name','subjects'])
df.printSchema()
df.show(truncate=False)

root
 |-- name: string (nullable = true)
 |-- subjects: array (nullable = true)
 |    |-- element: array (containsNull = true)
 |    |    |-- element: string (containsNull = true)

+-------+-----------------------------------+
|name   |subjects                           |
+-------+-----------------------------------+
|James  |[[Java, Scala, C++], [Spark, Java]]|
|Michael|[[Spark, Java, C++], [Spark, Java]]|
|Robert |[[CSharp, VB], [Spark, Python]]    |
+-------+-----------------------------------+



In [2]:
from pyspark.sql.functions import explode

df.select(df.name,explode(df.subjects)).show(truncate=False)

+-------+------------------+
|name   |col               |
+-------+------------------+
|James  |[Java, Scala, C++]|
|James  |[Spark, Java]     |
|Michael|[Spark, Java, C++]|
|Michael|[Spark, Java]     |
|Robert |[CSharp, VB]      |
|Robert |[Spark, Python]   |
+-------+------------------+



In [10]:
from pyspark.sql.functions import flatten

df1 = df.select(df.name,flatten(df.subjects).alias('sub'))
df1.show()
df1.select(df1.name,explode(df1.sub)).show(truncate=False)

+-------+--------------------+
|   name|                 sub|
+-------+--------------------+
|  James|[Java, Scala, C++...|
|Michael|[Spark, Java, C++...|
| Robert|[CSharp, VB, Spar...|
+-------+--------------------+

+-------+------+
|name   |col   |
+-------+------+
|James  |Java  |
|James  |Scala |
|James  |C++   |
|James  |Spark |
|James  |Java  |
|Michael|Spark |
|Michael|Java  |
|Michael|C++   |
|Michael|Spark |
|Michael|Java  |
|Robert |CSharp|
|Robert |VB    |
|Robert |Spark |
|Robert |Python|
+-------+------+

