# Ex-2240 - json w Spark


In [1]:
# Write CSV file
json_content = """{
  "colors": [
    {
      "color": "black",
      "category": "hue",
      "type": "primary",
      "code": {
        "rgba": [255,255,255,1],
        "hex": "#000"
      }
    },
    {
      "color": "white",
      "category": "value",
      "code": {
        "rgba": [0,0,0,1],
        "hex": "#FFF"
      }
    },
    {
      "color": "red",
      "category": "hue",
      "type": "primary",
      "code": {
        "rgba": [255,0,0,1],
        "hex": "#FF0"
      }
    },
    {
      "color": "blue",
      "category": "hue",
      "type": "primary",
      "code": {
        "rgba": [0,0,255,1],
        "hex": "#00F"
      }
    },
    {
      "color": "yellow",
      "category": "hue",
      "type": "primary",
      "code": {
        "rgba": [255,255,0,1],
        "hex": "#FF0"
      }
    },
    {
      "color": "green",
      "category": "hue",
      "type": "secondary",
      "code": {
        "rgba": [0,255,0,1],
        "hex": "#0F0"
      }
    }
  ]
}"""

with open("colors.json", "w") as f:
    f.write(json_content)

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *

spark = SparkSession.builder.appName("ColorsDataframe").getOrCreate()

df = spark.read.option("multiline", True).json("colors.json")

print(df.printSchema())
print(df.show())

root
 |-- colors: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- category: string (nullable = true)
 |    |    |-- code: struct (nullable = true)
 |    |    |    |-- hex: string (nullable = true)
 |    |    |    |-- rgba: array (nullable = true)
 |    |    |    |    |-- element: long (containsNull = true)
 |    |    |-- color: string (nullable = true)
 |    |    |-- type: string (nullable = true)

None
+--------------------+
|              colors|
+--------------------+
|[{hue, {#000, [25...|
+--------------------+

None


In [3]:
df.selectExpr("explode(colors) as color_struct") \
  .selectExpr("color_struct.color as color",
              "color_struct.type as type",
              "color_struct.category as category",
              "color_struct.code as code") \
  .show(truncate=False)

+------+---------+--------+--------------------------+
|color |type     |category|code                      |
+------+---------+--------+--------------------------+
|black |primary  |hue     |{#000, [255, 255, 255, 1]}|
|white |NULL     |value   |{#FFF, [0, 0, 0, 1]}      |
|red   |primary  |hue     |{#FF0, [255, 0, 0, 1]}    |
|blue  |primary  |hue     |{#00F, [0, 0, 255, 1]}    |
|yellow|primary  |hue     |{#FF0, [255, 255, 0, 1]}  |
|green |secondary|hue     |{#0F0, [0, 255, 0, 1]}    |
+------+---------+--------+--------------------------+



In [4]:
df.selectExpr("explode(colors) as color_struct") \
  .selectExpr("color_struct.color as color",
              "color_struct.type as type",
              "color_struct.category as category",
              "color_struct.code.hex as hex_value",
              "color_struct.code.rgba as rgba_values") \
  .show(truncate=False)

+------+---------+--------+---------+------------------+
|color |type     |category|hex_value|rgba_values       |
+------+---------+--------+---------+------------------+
|black |primary  |hue     |#000     |[255, 255, 255, 1]|
|white |NULL     |value   |#FFF     |[0, 0, 0, 1]      |
|red   |primary  |hue     |#FF0     |[255, 0, 0, 1]    |
|blue  |primary  |hue     |#00F     |[0, 0, 255, 1]    |
|yellow|primary  |hue     |#FF0     |[255, 255, 0, 1]  |
|green |secondary|hue     |#0F0     |[0, 255, 0, 1]    |
+------+---------+--------+---------+------------------+



In [5]:
df.selectExpr("explode(colors) as color_struct") \
  .selectExpr("color_struct.color as color",
              "color_struct.type as type",
              "color_struct.category as category",
              "color_struct.code.hex as hex_value",
              "color_struct.code.rgba[0] as red",
              "color_struct.code.rgba[1] as green",
              "color_struct.code.rgba[2] as blue",
              "color_struct.code.rgba[3] as alpha") \
  .show(truncate=False)

+------+---------+--------+---------+---+-----+----+-----+
|color |type     |category|hex_value|red|green|blue|alpha|
+------+---------+--------+---------+---+-----+----+-----+
|black |primary  |hue     |#000     |255|255  |255 |1    |
|white |NULL     |value   |#FFF     |0  |0    |0   |1    |
|red   |primary  |hue     |#FF0     |255|0    |0   |1    |
|blue  |primary  |hue     |#00F     |0  |0    |255 |1    |
|yellow|primary  |hue     |#FF0     |255|255  |0   |1    |
|green |secondary|hue     |#0F0     |0  |255  |0   |1    |
+------+---------+--------+---------+---+-----+----+-----+

