In [0]:
people = [
    {
        "name": "John Doe",
        "age": 35,
        "telephone": {"home": "555-1234", "office": "555-5678"},
    },
    {
        "name": "Jane Smith",
        "age": 28,
        "telephone": {"home": "555-9012", "office": "555-3456"},
    },
    {
        "name": "Bob Johnson",
        "age": 42,
        "telephone": {"home": "555-7890", "office": "555-2109"},
    },
    {"name": "David Brenson", "age": 23, "telephone": {"home": None, "office": None}},
    {"name": "George Sacks", "age": 36, "telephone": {"home": None}},
]

from pyspark.sql import Row

people_as_rows = [Row(**person) for person in people]

df = spark.createDataFrame(people_as_rows)

df.show(truncate=False)

+-------------+---+--------------------------------------+
|name         |age|telephone                             |
+-------------+---+--------------------------------------+
|John Doe     |35 |{office -> 555-5678, home -> 555-1234}|
|Jane Smith   |28 |{office -> 555-3456, home -> 555-9012}|
|Bob Johnson  |42 |{office -> 555-2109, home -> 555-7890}|
|David Brenson|23 |{office -> NULL, home -> NULL}        |
|George Sacks |36 |{home -> NULL}                        |
+-------------+---+--------------------------------------+



### Type map

In [0]:
df.printSchema()

root
 |-- name: string (nullable = true)
 |-- age: long (nullable = true)
 |-- telephone: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



### Getting value from a map

In [0]:
from pyspark.sql.functions import col

df.select(
    "name",
    col("telephone")["home"].alias("home_phone"),
    col("telephone")["office"].alias("office_phone"),
).show()

+-------------+----------+------------+
|         name|home_phone|office_phone|
+-------------+----------+------------+
|     John Doe|  555-1234|    555-5678|
|   Jane Smith|  555-9012|    555-3456|
|  Bob Johnson|  555-7890|    555-2109|
|David Brenson|      NULL|        NULL|
| George Sacks|      NULL|        NULL|
+-------------+----------+------------+



### explode

In [0]:
from pyspark.sql.functions import explode, explode_outer

df.select('name', explode('telephone')).show()

+-------------+------+--------+
|         name|   key|   value|
+-------------+------+--------+
|     John Doe|office|555-5678|
|     John Doe|  home|555-1234|
|   Jane Smith|office|555-3456|
|   Jane Smith|  home|555-9012|
|  Bob Johnson|office|555-2109|
|  Bob Johnson|  home|555-7890|
|David Brenson|office|    NULL|
|David Brenson|  home|    NULL|
| George Sacks|  home|    NULL|
+-------------+------+--------+



### explode_outer

In [0]:
df.select('name', explode_outer('telephone')).show()

+-------------+------+--------+
|         name|   key|   value|
+-------------+------+--------+
|     John Doe|office|555-5678|
|     John Doe|  home|555-1234|
|   Jane Smith|office|555-3456|
|   Jane Smith|  home|555-9012|
|  Bob Johnson|office|555-2109|
|  Bob Johnson|  home|555-7890|
|David Brenson|office|    NULL|
|David Brenson|  home|    NULL|
| George Sacks|  home|    NULL|
+-------------+------+--------+



### Rename the explode key and values

In [0]:
df.select("name", explode("telephone")).withColumnsRenamed(
    {"key": "phone_type", "value": "number"}
).show()

+-------------+----------+--------+
|         name|phone_type|  number|
+-------------+----------+--------+
|     John Doe|    office|555-5678|
|     John Doe|      home|555-1234|
|   Jane Smith|    office|555-3456|
|   Jane Smith|      home|555-9012|
|  Bob Johnson|    office|555-2109|
|  Bob Johnson|      home|555-7890|
|David Brenson|    office|    NULL|
|David Brenson|      home|    NULL|
| George Sacks|      home|    NULL|
+-------------+----------+--------+

