# ================ SEMI-STRUCTURED DATA HANDLING ================

In [1]:
import pandas as pd
import numpy as np

- Unlike tables (rows & columns), semi-structured data (like JSON, XML, YAML) doesn’t have a fixed schema.

- It often contains nested objects and arrays

In [2]:
data = [
    {
        "id": 1,
        "name": "Alice",
        "address": {"city": "Delhi", "zip": 110001},
        "orders": [{"id": 101, "amount": 250}, {"id": 102, "amount": 450}]
    },
    {
        "id": 2,
        "name": "Bob",
        "address": {"city": "Mumbai", "zip": 400001},
        "orders": [{"id": 103, "amount": 300}]
    }
]
data

[{'id': 1,
  'name': 'Alice',
  'address': {'city': 'Delhi', 'zip': 110001},
  'orders': [{'id': 101, 'amount': 250}, {'id': 102, 'amount': 450}]},
 {'id': 2,
  'name': 'Bob',
  'address': {'city': 'Mumbai', 'zip': 400001},
  'orders': [{'id': 103, 'amount': 300}]}]

In [3]:
df = pd.DataFrame(data)
df

Unnamed: 0,id,name,address,orders
0,1,Alice,"{'city': 'Delhi', 'zip': 110001}","[{'id': 101, 'amount': 250}, {'id': 102, 'amou..."
1,2,Bob,"{'city': 'Mumbai', 'zip': 400001}","[{'id': 103, 'amount': 300}]"


##### Normalizing Nested JSON (for address)
- pandas.json_normalize() flattens nested JSON into columns.

In [5]:
df_normalized = pd.json_normalize(data)
df_normalized

Unnamed: 0,id,name,orders,address.city,address.zip
0,1,Alice,"[{'id': 101, 'amount': 250}, {'id': 102, 'amou...",Delhi,110001
1,2,Bob,"[{'id': 103, 'amount': 300}]",Mumbai,400001


##### Expanding Arrays (for orders)
- For list-type fields (orders), use json_normalize with record_path.

In [7]:
df_orders = pd.json_normalize(data, record_path="orders", meta=["id", "name"])
df_orders

ValueError: Conflicting metadata name id, need distinguishing prefix 

In [8]:
df2 = df.explode("orders", ignore_index=True)
df2 = pd.concat([df2.drop("orders", axis=1), df2["orders"].apply(pd.Series)], axis=1)
df2

Unnamed: 0,id,name,address,id.1,amount
0,1,Alice,"{'city': 'Delhi', 'zip': 110001}",101,250
1,1,Alice,"{'city': 'Delhi', 'zip': 110001}",102,450
2,2,Bob,"{'city': 'Mumbai', 'zip': 400001}",103,300
