In [2]:
from pyspark.sql import SparkSession,Row
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

In [4]:
jsonString="""{"Zipcode":704,"ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}"""
df=spark.createDataFrame([(1, jsonString)],["id","value"])
df.show(truncate=False)

+---+--------------------------------------------------------------------------+
|id |value                                                                     |
+---+--------------------------------------------------------------------------+
|1  |{"Zipcode":704,"ZipCodeType":"STANDARD","City":"PARC PARQUE","State":"PR"}|
+---+--------------------------------------------------------------------------+



In [5]:
#Convert JSON string column to Map type
from pyspark.sql.types import MapType,StringType
from pyspark.sql.functions import from_json
df2=df.withColumn("value",from_json(df.value,MapType(StringType(),StringType())))
df2.printSchema()
df2.show(truncate=False)

root
 |-- id: long (nullable = true)
 |-- value: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+---+---------------------------------------------------------------------------+
|id |value                                                                      |
+---+---------------------------------------------------------------------------+
|1  |{Zipcode -> 704, ZipCodeType -> STANDARD, City -> PARC PARQUE, State -> PR}|
+---+---------------------------------------------------------------------------+



In [6]:
#Create schema for JSON
from pyspark.sql.types import StructType,StructField, StringType
schema = StructType([ 
    StructField("Zipcode",StringType(),True), 
    StructField("ZipCodeType",StringType(),True), 
    StructField("City",StringType(),True), 
    StructField("State", StringType(), True)
  ])

In [7]:
#Convert JSON string column to struct type
from pyspark.sql.functions import col,from_json
df3 = df.withColumn("value",from_json(col("value"),schema))
df3.printSchema()
df3.show(truncate=False)

root
 |-- id: long (nullable = true)
 |-- value: struct (nullable = true)
 |    |-- Zipcode: string (nullable = true)
 |    |-- ZipCodeType: string (nullable = true)
 |    |-- City: string (nullable = true)
 |    |-- State: string (nullable = true)

+---+--------------------------------+
|id |value                           |
+---+--------------------------------+
|1  |{704, STANDARD, PARC PARQUE, PR}|
+---+--------------------------------+



In [8]:
#Convert to multile columns
df4=df3.select("id", "value.*")
df4.printSchema()
df4.show()

root
 |-- id: long (nullable = true)
 |-- Zipcode: string (nullable = true)
 |-- ZipCodeType: string (nullable = true)
 |-- City: string (nullable = true)
 |-- State: string (nullable = true)

+---+-------+-----------+-----------+-----+
| id|Zipcode|ZipCodeType|       City|State|
+---+-------+-----------+-----------+-----+
|  1|    704|   STANDARD|PARC PARQUE|   PR|
+---+-------+-----------+-----------+-----+

