# Transform Orders Data - Explode Arrays
1. Access elements from the JSON object
2. Deduplicate Array elements
3. Explode Arrays
4. Write the Transformed Data to Silver Schema

In [0]:
orders_df = spark.table('gizmobox.silver.py_orders_json')
display(orders_df)

### 1. Access elements from the JSON object
column_name.object

In [0]:
from pyspark.sql.functions import *

orders_normalized_df = orders_df.select(
    "json_value.order_id",
    "json_value.order_status",
    "json_value.payment_method",
    "json_value.total_amount",
    "json_value.transaction_timestamp",
    "json_value.customer_id",
    "json_value.items"
)
display(orders_normalized_df)

### 2. Deduplicate Array elements
array_distinct function

In [0]:
orders_normalized_df = orders_df.select(
    "json_value.order_id",
    "json_value.order_status",
    "json_value.payment_method",
    "json_value.total_amount",
    "json_value.transaction_timestamp",
    "json_value.customer_id",
    array_distinct("json_value.items").alias("items")
)
display(orders_normalized_df)

### 3. Explode Arrays
explode function

In [0]:
orders_exploded_df = orders_normalized_df.select(
    'order_id',
    'order_status',
    'payment_method',
    'total_amount',
    'transaction_timestamp',
    'customer_id',
    explode('items').alias("item")
)
display(orders_exploded_df)

In [0]:
order_items_df = orders_exploded_df.select(
    'order_id',
    'order_status',
    'payment_method',
    'total_amount',
    'transaction_timestamp',
    'customer_id',
    'item.item_id',
    'item.name',
    'item.price',
    'item.quantity',
    'item.category',
    'item.details.brand',
    'item.details.color'
)
display(order_items_df)

### 4. Write the Transformed Data to Silver Schema

In [0]:
order_items_df.writeTo('gizmobox.silver.py_orders').createOrReplace()


In [0]:
%sql
select *
from gizmobox.silver.py_orders;