## **Inflate JSON**
###  Expand a Flat DataFrame into a Tiered JSON
####           .xml -> json -> flat df -> json
#### Author: Seth Close
#### 05 JAN 2024

In [1]:
import pandas as pd
import json
import xml.etree.ElementTree as ET

In [34]:
filename = './xml/users.xml'
with open(filename,'r') as f:
    xml_text = f.read()
#print(xml_text[:480])

In [3]:
tree = ET.parse('./xml/users.xml')
root = tree.getroot()

In [4]:
def xml_branch (branch, json_list, appended):
    json_dict = {}
    for child in branch:
        if len(child) == 0:
            json_dict.update({child.tag: child.text})
        else:
            if appended == False:
                json_list.append(json_dict)
                appended = True
            json_dict.update({child.tag: xml_branch (child, json_list, appended)})
    return(json_dict)

def xml_to_json (root):
    json_list = []
    for branch in root:
        appended = False 
        xml_branch(branch, json_list, appended)
    return json_list

json_list = xml_to_json(root)
print(json_list)

[{'Name': 'Roope Wirta', 'Email': 'roope.wirta@example.com', 'City': 'Pargas', 'File': {'One': 'RrP0', 'Two': 'RrP1', 'Three': 'RrP2', 'Notes': {'Note1': 'Hello', 'Note2': 'Goodbye', 'PS': {'alpha': 'Hi', 'beta': 'Bye'}}}}, {'Name': 'Nathaniel Phillips', 'Email': 'nathaniel.phillips@example.com', 'City': 'Rockhampton', 'File': {'One': 'NnR0', 'Two': 'NnR1', 'Three': 'NnR2', 'Notes': {'Note1': 'Hello', 'Note2': 'Goodbye', 'PS': {'alpha': 'Hi', 'beta': 'Bye'}}}}]


In [5]:
# Make a JSON file
filename = "./json/users.json"
with open(filename, 'w') as file:
    file.write(json.dumps(json_list))

In [6]:
# Create a Flat DataFrame
user_flat = pd.json_normalize(json_list)
user_flat

Unnamed: 0,Name,Email,City,File.One,File.Two,File.Three,File.Notes.Note1,File.Notes.Note2,File.Notes.PS.alpha,File.Notes.PS.beta
0,Roope Wirta,roope.wirta@example.com,Pargas,RrP0,RrP1,RrP2,Hello,Goodbye,Hi,Bye
1,Nathaniel Phillips,nathaniel.phillips@example.com,Rockhampton,NnR0,NnR1,NnR2,Hello,Goodbye,Hi,Bye


In [31]:
# Convert a Flattened Dataframe into a Tiered JSON
def json_level(key, value, current_dict):
    sub_dict = {}
    dot_index = key.find('.')
    # Is this the bottom of the hierarchy?
    if dot_index == -1:
        sub_dict[key] = value
    # There is another level below this one
    else:
        # Separate out the actual key for this level and instantiate 
        parent_key = key[:dot_index]
        child_key = key[dot_index+1:]
        if parent_key not in sub_dict:
            sub_dict[parent_key] = {}
        if parent_key not in current_dict:
            current_dict[parent_key] = {}  
            
        # Are we at the bottom level?
        dot_index = child_key.find('.')
        if dot_index == -1:
            sub_dict[parent_key].update({child_key: value})
        else:
            returned_dict = json_level(child_key, value, current_dict[parent_key])
            if child_key[:dot_index] in returned_dict:
                current_dict[parent_key][child_key[:dot_index]].update(returned_dict[child_key[:dot_index]])
            return returned_dict            
    return sub_dict
    
def json_inflate(df):
    json_list = []
    for id in range(len(df)):
        json_dict = {}
        for key, value in df.iloc[id].items():
            sub_dict = json_level(key, value, json_dict)
            # Key: Literal
            if sub_dict.get(key) is not None:
                json_dict.update(sub_dict) 
            # Key: Sub Dict
            dot_index = key.find(".")
            if sub_dict.get(key[:dot_index]) is not None:
                json_dict[key[:dot_index]].update(sub_dict.get(key[:dot_index]))
        json_list.append(json_dict)
    return json_list

In [32]:
json_list = json_inflate(user_flat)
print(json_list)

[{'Name': 'Roope Wirta', 'Email': 'roope.wirta@example.com', 'City': 'Pargas', 'File': {'One': 'RrP0', 'Two': 'RrP1', 'Three': 'RrP2', 'Notes': {'Note1': 'Hello', 'Note2': 'Goodbye', 'PS': {'alpha': 'Hi', 'beta': 'Bye'}}}}, {'Name': 'Nathaniel Phillips', 'Email': 'nathaniel.phillips@example.com', 'City': 'Rockhampton', 'File': {'One': 'NnR0', 'Two': 'NnR1', 'Three': 'NnR2', 'Notes': {'Note1': 'Hello', 'Note2': 'Goodbye', 'PS': {'alpha': 'Hi', 'beta': 'Bye'}}}}]


In [33]:
# Create a JSON File
filename = "./json/users_complex.json"
with open(filename, "w") as file:
    json_data = json.dumps(json_list)
    file.write(json_data)