### Sytax

![](Images/15 Read Json Syntax.jpg)

### **Read JSON File**

In [0]:
df = spark.read.option("multiline", "true").json("/Volumes/workspace/default/mandy/SampleData.json")
display(df)

### **Define Python Function to Flatten Deeply Nested JSON File**

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

# flatten Array of structs and structs
def flatten(df):
    # compute complex fields (lists and structs) in schema
    complex_fields = dict([(field.name, field.dataType)
                           for field in df.schema.fields
                           if type(field.dataType) == ArrayType or type(field.dataType) == StructType])
    while len(complex_fields) != 0:
        col_name = list(complex_fields.keys())[0]
        print("Processing :" + col_name + " Type : " + str(type(complex_fields[col_name])))

        # if structType then convert all sub element to columns
        # i.e. flatten structs
        if (type(complex_fields[col_name]) == StructType):
            expanded = [col(col_name + '.' + k).alias(col_name + '_' + k) for k in [n.name for n in complex_fields[col_name]]]
            df = df.select("*", *expanded).drop(col_name)

        # if arrayType then add the array elements as rows using the explode function
        # i.e. explode arrays
        elif (type(complex_fields[col_name]) == ArrayType):
            df = df.withColumn(col_name, explode_outer(col_name))

        # recompute remaining complex fields in schema
        complex_fields = dict([(field.name, field.dataType)
                               for field in df.schema.fields
                               if type(field.dataType) == ArrayType or type(field.dataType) == StructType])
    return df

### **Apply Flattening Function and Display Flattened Data**

In [0]:
df_flatten = flatten(df)
display(df_flatten)