##### Create dataframe with array column

In [0]:
array_appliance = [
                ('Raja',['TV','Refrigerator','Oven','AC']),
                ('Raghav',['AC','Washing machine',None]),
                ('Ram',['Grinder','TV']),
                ('Ramesh',['Refrigerator','TV',None]),
                ('Rajesh',None)
            ]

df_app = spark.createDataFrame(data=array_appliance, schema = ['name','Appliances'])
df_app.printSchema()
display(df_app)

root
 |-- name: string (nullable = true)
 |-- Appliances: array (nullable = true)
 |    |-- element: string (containsNull = true)



name,Appliances
Raja,"List(TV, Refrigerator, Oven, AC)"
Raghav,"List(AC, Washing machine, null)"
Ram,"List(Grinder, TV)"
Ramesh,"List(Refrigerator, TV, null)"
Rajesh,


##### Create a dataframe with map column

In [0]:
map_brand = [
            ('Raja', {'TV': 'LG', 'Refrigerator': 'Samsung', 'Oven' : 'Philipps', 'AC': 'Voltas' }), 
            ('Raghav', {'AC': 'Samsung', 'Washing machine': 'LG'}),
            ('Ram', {'Grinder': 'Preethi','TV':''}),
            ('Ramesh', {'Refrigerator': 'LG', 'TV' :'Croma'}),
            ('Rajesh', None) ]
df_brand = spark.createDataFrame(data=map_brand, schema = ['name', 'Brand'])
df_brand.printSchema()
display(df_brand)

root
 |-- name: string (nullable = true)
 |-- Brand: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



name,Brand
Raja,"Map(Refrigerator -> Samsung, AC -> Voltas, TV -> LG, Oven -> Philipps)"
Raghav,"Map(AC -> Samsung, Washing machine -> LG)"
Ram,"Map(TV -> , Grinder -> Preethi)"
Ramesh,"Map(Refrigerator -> LG, TV -> Croma)"
Rajesh,


#####Explode array field

In [0]:
from pyspark.sql.functions import explode

df_explode_array = df_app.select(df_app.name, explode(df_app.Appliances))

df_app.printSchema()
df_explode_array.printSchema()

display(df_app)
display(df_explode_array)

root
 |-- name: string (nullable = true)
 |-- Appliances: array (nullable = true)
 |    |-- element: string (containsNull = true)

root
 |-- name: string (nullable = true)
 |-- col: string (nullable = true)



name,Appliances
Raja,"List(TV, Refrigerator, Oven, AC)"
Raghav,"List(AC, Washing machine, null)"
Ram,"List(Grinder, TV)"
Ramesh,"List(Refrigerator, TV, null)"
Rajesh,


name,col
Raja,TV
Raja,Refrigerator
Raja,Oven
Raja,AC
Raghav,AC
Raghav,Washing machine
Raghav,
Ram,Grinder
Ram,TV
Ramesh,Refrigerator


#####Explode map field

In [0]:
from pyspark.sql.functions import explode

df_explode_map = df_brand.select(df_brand.name, explode(df_brand.Brand))

df_brand.printSchema()
df_explode_map.printSchema()

display(df_brand)
display(df_explode_map)


root
 |-- name: string (nullable = true)
 |-- Brand: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

root
 |-- name: string (nullable = true)
 |-- key: string (nullable = false)
 |-- value: string (nullable = true)



name,Brand
Raja,"Map(Refrigerator -> Samsung, AC -> Voltas, TV -> LG, Oven -> Philipps)"
Raghav,"Map(AC -> Samsung, Washing machine -> LG)"
Ram,"Map(TV -> , Grinder -> Preethi)"
Ramesh,"Map(Refrigerator -> LG, TV -> Croma)"
Rajesh,


name,key,value
Raja,Refrigerator,Samsung
Raja,AC,Voltas
Raja,TV,LG
Raja,Oven,Philipps
Raghav,AC,Samsung
Raghav,Washing machine,LG
Ram,TV,
Ram,Grinder,Preethi
Ramesh,Refrigerator,LG
Ramesh,TV,Croma


##### Explode outer to consider the null values

In [0]:
from pyspark.sql.functions import explode_outer

df_expouter_array = df_app.select(df_app.name, explode_outer(df_app.Appliances))

df_app.printSchema()
df_expouter_array.printSchema()

display(df_app)
display(df_expouter_array)

root
 |-- name: string (nullable = true)
 |-- Appliances: array (nullable = true)
 |    |-- element: string (containsNull = true)

root
 |-- name: string (nullable = true)
 |-- col: string (nullable = true)



name,Appliances
Raja,"List(TV, Refrigerator, Oven, AC)"
Raghav,"List(AC, Washing machine, null)"
Ram,"List(Grinder, TV)"
Ramesh,"List(Refrigerator, TV, null)"
Rajesh,


name,col
Raja,TV
Raja,Refrigerator
Raja,Oven
Raja,AC
Raghav,AC
Raghav,Washing machine
Raghav,
Ram,Grinder
Ram,TV
Ramesh,Refrigerator


##### Explode_outer map field to consider the null values

In [0]:
from pyspark.sql.functions import explode_outer

df_exp_outer_map = df_brand.select(df_brand.name, explode_outer(df_brand.Brand))

df_brand.printSchema()
df_exp_outer_map.printSchema()

display(df_brand)
display(df_exp_outer_map)


root
 |-- name: string (nullable = true)
 |-- Brand: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

root
 |-- name: string (nullable = true)
 |-- key: string (nullable = true)
 |-- value: string (nullable = true)



name,Brand
Raja,"Map(Refrigerator -> Samsung, AC -> Voltas, TV -> LG, Oven -> Philipps)"
Raghav,"Map(AC -> Samsung, Washing machine -> LG)"
Ram,"Map(TV -> , Grinder -> Preethi)"
Ramesh,"Map(Refrigerator -> LG, TV -> Croma)"
Rajesh,


name,key,value
Raja,Refrigerator,Samsung
Raja,AC,Voltas
Raja,TV,LG
Raja,Oven,Philipps
Raghav,AC,Samsung
Raghav,Washing machine,LG
Ram,TV,
Ram,Grinder,Preethi
Ramesh,Refrigerator,LG
Ramesh,TV,Croma


##### Positional Explode

In [0]:
from pyspark.sql.functions import posexplode

display(df_app.select(df_app.name, posexplode(df_app.Appliances)))
display(df_brand.select(df_brand.name, posexplode(df_brand.Brand)))

name,pos,col
Raja,0,TV
Raja,1,Refrigerator
Raja,2,Oven
Raja,3,AC
Raghav,0,AC
Raghav,1,Washing machine
Raghav,2,
Ram,0,Grinder
Ram,1,TV
Ramesh,0,Refrigerator


name,pos,key,value
Raja,0,Refrigerator,Samsung
Raja,1,AC,Voltas
Raja,2,TV,LG
Raja,3,Oven,Philipps
Raghav,0,AC,Samsung
Raghav,1,Washing machine,LG
Ram,0,TV,
Ram,1,Grinder,Preethi
Ramesh,0,Refrigerator,LG
Ramesh,1,TV,Croma


##### Positional explode using null

In [0]:
from pyspark.sql.functions import posexplode_outer

display(df_app.select(df_app.name, posexplode_outer(df_app.Appliances)))
display(df_brand.select(df_brand.name, posexplode_outer(df_brand.Brand)))

name,pos,col
Raja,0.0,TV
Raja,1.0,Refrigerator
Raja,2.0,Oven
Raja,3.0,AC
Raghav,0.0,AC
Raghav,1.0,Washing machine
Raghav,2.0,
Ram,0.0,Grinder
Ram,1.0,TV
Ramesh,0.0,Refrigerator


name,pos,key,value
Raja,0.0,Refrigerator,Samsung
Raja,1.0,AC,Voltas
Raja,2.0,TV,LG
Raja,3.0,Oven,Philipps
Raghav,0.0,AC,Samsung
Raghav,1.0,Washing machine,LG
Ram,0.0,TV,
Ram,1.0,Grinder,Preethi
Ramesh,0.0,Refrigerator,LG
Ramesh,1.0,TV,Croma
