In [0]:
"""
https://github.com/manisnitt/resturant_json_data/blob/main/resturant_json_data.json

Use explode for Array type and . to extract struct type fields
""" 

from pyspark.sql.functions import *

restaurant_data_df = spark.read.format("json")\
    .option("inferschema", "true")\
    .option("multiline", "true")\
    .load("/FileStore/tables/resturant_json_data.json")

restaurant_data_df.show()

+----+-------+--------------------+-------------+-------------+-------------+------+
|code|message|         restaurants|results_found|results_shown|results_start|status|
+----+-------+--------------------+-------------+-------------+-------------+------+
|null|   null|                  []|            0|            0|            1|  null|
|null|   null|[{{{17066603}, b9...|         6835|           20|            1|  null|
|null|   null|                  []|            0|            0|            1|  null|
|null|   null|                  []|            0|            0|            1|  null|
|null|   null|[{{{17093124}, b9...|         8680|           20|            1|  null|
|null|   null|                  []|            0|            0|            1|  null|
|null|   null|                  []|            0|            0|            1|  null|
|null|   null|[{{{17580142}, b9...|          943|           20|            1|  null|
|null|   null|                  []|            0|            0|  

In [0]:
restaurant_data_df.printSchema()

root
 |-- code: long (nullable = true)
 |-- message: string (nullable = true)
 |-- restaurants: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- restaurant: struct (nullable = true)
 |    |    |    |-- R: struct (nullable = true)
 |    |    |    |    |-- res_id: long (nullable = true)
 |    |    |    |-- apikey: string (nullable = true)
 |    |    |    |-- average_cost_for_two: long (nullable = true)
 |    |    |    |-- cuisines: string (nullable = true)
 |    |    |    |-- currency: string (nullable = true)
 |    |    |    |-- deeplink: string (nullable = true)
 |    |    |    |-- establishment_types: array (nullable = true)
 |    |    |    |    |-- element: string (containsNull = true)
 |    |    |    |-- events_url: string (nullable = true)
 |    |    |    |-- featured_image: string (nullable = true)
 |    |    |    |-- has_online_delivery: long (nullable = true)
 |    |    |    |-- has_table_booking: long (nullable = true)
 |    |    |    |-- i

In [0]:
from pyspark.sql.functions import *

restaurant_data_df.select("*", explode("restaurants").alias("new_restaurants"))\
    .drop("restaurants").printSchema()

root
 |-- code: long (nullable = true)
 |-- message: string (nullable = true)
 |-- results_found: long (nullable = true)
 |-- results_shown: long (nullable = true)
 |-- results_start: string (nullable = true)
 |-- status: string (nullable = true)
 |-- new_restaurants: struct (nullable = true)
 |    |-- restaurant: struct (nullable = true)
 |    |    |-- R: struct (nullable = true)
 |    |    |    |-- res_id: long (nullable = true)
 |    |    |-- apikey: string (nullable = true)
 |    |    |-- average_cost_for_two: long (nullable = true)
 |    |    |-- cuisines: string (nullable = true)
 |    |    |-- currency: string (nullable = true)
 |    |    |-- deeplink: string (nullable = true)
 |    |    |-- establishment_types: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |    |    |-- events_url: string (nullable = true)
 |    |    |-- featured_image: string (nullable = true)
 |    |    |-- has_online_delivery: long (nullable = true)
 |    |    |-- has_tab

In [0]:
restaurant_data_df.select("*", explode("restaurants").alias("new_restaurants"))\
    .drop("restaurants")\
        .select("*", "new_restaurants.restaurant.R.res_id", explode("new_restaurants.restaurant.establishment_types").alias("new_")).printSchema()

root
 |-- code: long (nullable = true)
 |-- message: string (nullable = true)
 |-- results_found: long (nullable = true)
 |-- results_shown: long (nullable = true)
 |-- results_start: string (nullable = true)
 |-- status: string (nullable = true)
 |-- new_restaurants: struct (nullable = true)
 |    |-- restaurant: struct (nullable = true)
 |    |    |-- R: struct (nullable = true)
 |    |    |    |-- res_id: long (nullable = true)
 |    |    |-- apikey: string (nullable = true)
 |    |    |-- average_cost_for_two: long (nullable = true)
 |    |    |-- cuisines: string (nullable = true)
 |    |    |-- currency: string (nullable = true)
 |    |    |-- deeplink: string (nullable = true)
 |    |    |-- establishment_types: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |    |    |-- events_url: string (nullable = true)
 |    |    |-- featured_image: string (nullable = true)
 |    |    |-- has_online_delivery: long (nullable = true)
 |    |    |-- has_tab

In [0]:
"""
    explode: If any col is found to be null entire row is displayed as null
    use explode_outer: to extract col with nulls
"""

restaurant_data_df.select("*", explode("restaurants").alias("new_restaurants"))\
    .drop("restaurants")\
        .select("*", "new_restaurants.restaurant.R.res_id", explode_outer("new_restaurants.restaurant.establishment_types").alias("new_establishment_types"), "new_restaurants.restaurant.name")\
            .drop("new_restaurants", "code", "message", "results_found", "results_start", "status", "results_shown")\
            .show(truncate=False)

+--------+-----------------------+------------------------------------+
|res_id  |new_establishment_types|name                                |
+--------+-----------------------+------------------------------------+
|17066603|null                   |The Coop                            |
|17059541|null                   |Maggiano's Little Italy             |
|17064405|null                   |Tako Cheena by Pom Pom              |
|17057797|null                   |Bosphorous Turkish Cuisine          |
|17057591|null                   |Bahama Breeze Island Grille         |
|17064266|null                   |Hawkers Asian Street Fare           |
|17060516|null                   |Seasons 52 Fresh Grill              |
|17060320|null                   |Raglan Road Irish Pub and Restaurant|
|17059060|null                   |Hillstone                           |
|17059012|null                   |Hollerbach's Willow Tree Café       |
|17060869|null                   |Texas de Brazil               