In [17]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType,StructField, StringType, IntegerType

spark = SparkSession.builder.appName('mapType').getOrCreate()


## Create MapType From StructType
Create a MapType by using PySpark `StructType` & `StructField`, StructType() constructor takes list of StructField, StructField takes a fieldname and type of the value.

In [18]:
from pyspark.sql.types import StructField, StructType, StringType, MapType

schema = StructType([
                    StructField('name', StringType(), True),
                    StructField('properties', MapType( StringType(), StringType()), True)
                    ])

dataDictionary = [
                ('James',{'hair':'black','eye':'brown'}),
                ('Michael',{'hair':'brown','eye':None}),
                ('Robert',{'hair':'red','eye':'black'}),
                ('Washington',{'hair':'grey','eye':'grey'}),
                ('Jefferson',{'hair':'brown','eye':''})
                ]
df1 = spark.createDataFrame(data=dataDictionary, schema = schema)
df1.printSchema()
df1.show(truncate=False)                    

root
 |-- name: string (nullable = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+----------+-----------------------------+
|name      |properties                   |
+----------+-----------------------------+
|James     |{eye -> brown, hair -> black}|
|Michael   |{eye -> NULL, hair -> brown} |
|Robert    |{eye -> black, hair -> red}  |
|Washington|{eye -> grey, hair -> grey}  |
|Jefferson |{eye -> , hair -> brown}     |
+----------+-----------------------------+



## Access PySpark MapType Elements
To extract the key and values from the PySpark DataFrame Dictionary column, we've used PySpark map transformation to read the values of `properties` (MapType column)

In [19]:
df2 = df1.rdd.map(lambda x: (x.name,x.properties["hair"],x.properties["eye"])) \
             .toDF(["name","hair","eye"])
             
df2.printSchema()
df2.show()

root
 |-- name: string (nullable = true)
 |-- hair: string (nullable = true)
 |-- eye: string (nullable = true)

+----------+-----+-----+
|      name| hair|  eye|
+----------+-----+-----+
|     James|black|brown|
|   Michael|brown| NULL|
|    Robert|  red|black|
|Washington| grey| grey|
| Jefferson|brown|     |
+----------+-----+-----+



same as above

In [20]:
df1.show(truncate=False)

df1.withColumn("hair", df1.properties.getItem("hair")) \
   .withColumn("eye", df1.properties.getItem("eye")) \
   .drop("properties").show()

+----------+-----------------------------+
|name      |properties                   |
+----------+-----------------------------+
|James     |{eye -> brown, hair -> black}|
|Michael   |{eye -> NULL, hair -> brown} |
|Robert    |{eye -> black, hair -> red}  |
|Washington|{eye -> grey, hair -> grey}  |
|Jefferson |{eye -> , hair -> brown}     |
+----------+-----------------------------+

+----------+-----+-----+
|      name| hair|  eye|
+----------+-----+-----+
|     James|black|brown|
|   Michael|brown| NULL|
|    Robert|  red|black|
|Washington| grey| grey|
| Jefferson|brown|     |
+----------+-----+-----+



In [21]:
df1.withColumn("hair",df1.properties["hair"]) \
  .withColumn("eye",df1.properties["eye"]) \
  .drop("properties") \
  .show()

+----------+-----+-----+
|      name| hair|  eye|
+----------+-----+-----+
|     James|black|brown|
|   Michael|brown| NULL|
|    Robert|  red|black|
|Washington| grey| grey|
| Jefferson|brown|     |
+----------+-----+-----+



### Explode

In [22]:
from pyspark.sql.functions import explode

df1.select(df1.name,explode(df1.properties)).show()

+----------+----+-----+
|      name| key|value|
+----------+----+-----+
|     James| eye|brown|
|     James|hair|black|
|   Michael| eye| NULL|
|   Michael|hair|brown|
|    Robert| eye|black|
|    Robert|hair|  red|
|Washington| eye| grey|
|Washington|hair| grey|
| Jefferson| eye|     |
| Jefferson|hair|brown|
+----------+----+-----+



### map_keys() – Get All Map Keys

In [23]:
from pyspark.sql.functions import map_keys

df1.select(df1.name,map_keys(df1.properties)).show()

+----------+--------------------+
|      name|map_keys(properties)|
+----------+--------------------+
|     James|         [eye, hair]|
|   Michael|         [eye, hair]|
|    Robert|         [eye, hair]|
|Washington|         [eye, hair]|
| Jefferson|         [eye, hair]|
+----------+--------------------+



### map_values() – Get All map Values

In [24]:
from pyspark.sql.functions import map_values

df1.select(df1.name,map_values(df1.properties)).show()

+----------+----------------------+
|      name|map_values(properties)|
+----------+----------------------+
|     James|        [brown, black]|
|   Michael|         [NULL, brown]|
|    Robert|          [black, red]|
|Washington|          [grey, grey]|
| Jefferson|             [, brown]|
+----------+----------------------+

