In [15]:
import json
import pandas as pd
from pathlib import Path
from collections.abc import MutableMapping

def flatten_json(nested_json, parent_key='', sep='_'):
    """Recursively flattens nested JSON."""
    items = []
    if isinstance(nested_json, MutableMapping):
        for k, v in nested_json.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            items.extend(flatten_json(v, new_key, sep=sep).items())
    elif isinstance(nested_json, list):
        for idx, item in enumerate(nested_json):
            items.extend(flatten_json(item, f"{parent_key}{sep}{idx}", sep=sep).items())
    else:
        items.append((parent_key, nested_json))
    return dict(items)

def normalize_dlt_data(jsonl_file_path):
    normalized_records = []

    with open(jsonl_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            record = json.loads(line)
            flattened_record = flatten_json(record)
            normalized_records.append(flattened_record)

    df = pd.DataFrame(normalized_records)
    return df

jsonl_file_path = Path(r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl")


df = normalize_dlt_data(jsonl_file_path)

# View the first few rows
print(df.head())

# Save to Excel or CSV if needed
df.to_csv('normalized_output.csv', index=False)


  articles_0_ArticleId articles_0_Position  articles_0_Quantity  \
0           9839261880                   2               3240.0   
1           9824896480                   2                 48.0   
2           9845215780                   1                240.0   
3           9818362280                   2              24000.0   
4           9859518980                   4                 12.0   

   articles_0_Weight articles_1_ArticleId articles_1_Position  \
0              700.0           9839257180                   1   
1             7100.0           9824896480                   1   
2            32500.0                  NaN                 NaN   
3                7.0           9818361480                   1   
4                0.0           9859518980                   5   

   articles_1_Quantity  articles_1_Weight basics_CommunicatedDate  \
0               3240.0              654.0    2025-03-06T10:11:25Z   
1                 48.0             7100.0    2025-03-06T10:11:25Z   

In [17]:
import json
import os
from collections.abc import MutableMapping

# Function to recursively flatten nested JSON
def flatten_json(nested_json, parent_key='', sep='_'):
    """Recursively flattens nested JSON object into a flat dictionary."""
    items = []
    if isinstance(nested_json, MutableMapping):
        for k, v in nested_json.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            items.extend(flatten_json(v, new_key, sep=sep).items())
    elif isinstance(nested_json, list):
        # For lists, handle each item individually and store as separate flattened object
        for idx, item in enumerate(nested_json):
            items.extend(flatten_json(item, f"{parent_key}{sep}{idx}", sep=sep).items())
    else:
        items.append((parent_key, nested_json))
    return dict(items)

# Function to split nested data into multiple flat JSON files
def process_and_split_jsonl(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        count = 0

        for line in file:
            count += 1
            record = json.loads(line)

            # Flatten the top-level record
            main_record = {}
            nested_records = {}

            for key, value in record.items():
                if isinstance(value, list):
                    # Handle nested lists (like articles or handling_units) separately
                    nested_records[key] = value
                elif isinstance(value, dict):
                    # Flatten nested dicts directly into the main record
                    flat_part = flatten_json(value, key)
                    main_record.update(flat_part)
                else:
                    main_record[key] = value

            # Write the main flat record
            main_file = os.path.join(output_folder, f'record_{count}_main.json')
            with open(main_file, 'w', encoding='utf-8') as f:
                json.dump(main_record, f, indent=4)

            # Write each nested list into separate files
            for nested_key, nested_list in nested_records.items():
                nested_file = os.path.join(output_folder, f'record_{count}_{nested_key}.json')
                with open(nested_file, 'w', encoding='utf-8') as f:
                    flattened_list = [flatten_json(item) for item in nested_list]
                    json.dump(flattened_list, f, indent=4)

            print(f"Processed and saved record {count}")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Update with your actual path
    output_folder = r"C:\Users\munna\Downloads\flattened_output"

    process_and_split_jsonl(input_jsonl_path, output_folder)

    print(f"All flattened data saved to '{output_folder}' folder.")


Processed and saved record 1
Processed and saved record 2
Processed and saved record 3
Processed and saved record 4
Processed and saved record 5
Processed and saved record 6
Processed and saved record 7
Processed and saved record 8
Processed and saved record 9
Processed and saved record 10
Processed and saved record 11
Processed and saved record 12
Processed and saved record 13
Processed and saved record 14
Processed and saved record 15
Processed and saved record 16
Processed and saved record 17
Processed and saved record 18
Processed and saved record 19
Processed and saved record 20
Processed and saved record 21
Processed and saved record 22
Processed and saved record 23
Processed and saved record 24
Processed and saved record 25
Processed and saved record 26
Processed and saved record 27
Processed and saved record 28
Processed and saved record 29
Processed and saved record 30
Processed and saved record 31
Processed and saved record 32
Processed and saved record 33
Processed and saved

In [18]:
import json
import os

def process_jsonl_to_parent_child(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        count = 0

        for line in file:
            count += 1
            record = json.loads(line)

            parent_data = {}
            child_data = {}

            # Split parent (simple fields) and child (nested lists)
            for key, value in record.items():
                if isinstance(value, list):
                    # Nested lists go into the child file
                    child_data[key] = value
                else:
                    # Simple fields go into the parent file
                    parent_data[key] = value

            # Save parent JSON (top-level fields only)
            parent_file = os.path.join(output_folder, f'record_{count}_parent.json')
            with open(parent_file, 'w', encoding='utf-8') as parent_out:
                json.dump(parent_data, parent_out, indent=4)

            # Save child JSON (only nested lists)
            child_file = os.path.join(output_folder, f'record_{count}_child.json')
            with open(child_file, 'w', encoding='utf-8') as child_out:
                json.dump(child_data, child_out, indent=4)

            print(f"Saved: {parent_file}, {child_file}")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Adjust to your file path
    output_folder = r"C:\Users\munna\Downloads\parent_child_output"

    process_jsonl_to_parent_child(input_jsonl_path, output_folder)

    print(f"Processing complete. Files saved in '{output_folder}'.")


Saved: C:\Users\munna\Downloads\parent_child_output\record_1_parent.json, C:\Users\munna\Downloads\parent_child_output\record_1_child.json
Saved: C:\Users\munna\Downloads\parent_child_output\record_2_parent.json, C:\Users\munna\Downloads\parent_child_output\record_2_child.json
Saved: C:\Users\munna\Downloads\parent_child_output\record_3_parent.json, C:\Users\munna\Downloads\parent_child_output\record_3_child.json
Saved: C:\Users\munna\Downloads\parent_child_output\record_4_parent.json, C:\Users\munna\Downloads\parent_child_output\record_4_child.json
Saved: C:\Users\munna\Downloads\parent_child_output\record_5_parent.json, C:\Users\munna\Downloads\parent_child_output\record_5_child.json
Saved: C:\Users\munna\Downloads\parent_child_output\record_6_parent.json, C:\Users\munna\Downloads\parent_child_output\record_6_child.json
Saved: C:\Users\munna\Downloads\parent_child_output\record_7_parent.json, C:\Users\munna\Downloads\parent_child_output\record_7_child.json
Saved: C:\Users\munna\Downl

In [19]:
import json
import os

def process_jsonl_to_parent_child_with_links(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        count = 0

        for line in file:
            count += 1
            record = json.loads(line)

            parent_data = {}
            child_files = {}

            # Split fields into parent and child
            for key, value in record.items():
                if isinstance(value, list):
                    # This is a nested array - save to a separate child file
                    child_file_name = f"record_{count}_{key}.json"
                    child_file_path = os.path.join(output_folder, child_file_name)

                    with open(child_file_path, 'w', encoding='utf-8') as child_file:
                        json.dump(value, child_file, indent=4)

                    child_files[key] = child_file_name  # Track the file name in parent data
                else:
                    # This is a simple field - keep it in the parent record
                    parent_data[key] = value

            # Add child file references to parent data
            parent_data['child_files'] = child_files

            # Write parent data with child file links
            parent_file_name = f"record_{count}_parent.json"
            parent_file_path = os.path.join(output_folder, parent_file_name)

            with open(parent_file_path, 'w', encoding='utf-8') as parent_file:
                json.dump(parent_data, parent_file, indent=4)

            print(f"Saved: {parent_file_path} with child files: {list(child_files.values())}")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Replace with your file path
    output_folder = r"C:\Users\munna\Downloads\parent_child_with_links_output"

    process_jsonl_to_parent_child_with_links(input_jsonl_path, output_folder)

    print(f"Processing complete. Files saved in '{output_folder}'.")


Saved: C:\Users\munna\Downloads\parent_child_with_links_output\record_1_parent.json with child files: ['record_1_articles.json', 'record_1_handling_units.json']
Saved: C:\Users\munna\Downloads\parent_child_with_links_output\record_2_parent.json with child files: ['record_2_articles.json', 'record_2_handling_units.json']
Saved: C:\Users\munna\Downloads\parent_child_with_links_output\record_3_parent.json with child files: ['record_3_articles.json', 'record_3_handling_units.json']
Saved: C:\Users\munna\Downloads\parent_child_with_links_output\record_4_parent.json with child files: ['record_4_articles.json', 'record_4_handling_units.json']
Saved: C:\Users\munna\Downloads\parent_child_with_links_output\record_5_parent.json with child files: ['record_5_articles.json', 'record_5_handling_units.json']
Saved: C:\Users\munna\Downloads\parent_child_with_links_output\record_6_parent.json with child files: ['record_6_articles.json', 'record_6_handling_units.json']
Saved: C:\Users\munna\Downloads\pa

In [21]:
import json
import os

def extract_and_split_jsonl(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        count = 0

        for line in file:
            count += 1
            record = json.loads(line)

            # Initialize files content
            basics_data = {}
            articles_data = []
            handling_units_data = []

            # Extract and split the data
            if 'basics' in record:
                basics_data = record['basics']

            if 'articles' in record:
                articles_data = record['articles']

            if 'handling_units' in record:
                handling_units_data = record['handling_units']

            # Write parent file (basics)
            basics_file = os.path.join(output_folder, f'record_{count}_basics.json')
            with open(basics_file, 'w', encoding='utf-8') as basics_out:
                json.dump(basics_data, basics_out, indent=4)

            # Write child file - articles
            articles_file = os.path.join(output_folder, f'record_{count}_articles.json')
            with open(articles_file, 'w', encoding='utf-8') as articles_out:
                json.dump(articles_data, articles_out, indent=4)

            # Write child file - handling units
            handling_units_file = os.path.join(output_folder, f'record_{count}_handling_units.json')
            with open(handling_units_file, 'w', encoding='utf-8') as handling_units_out:
                json.dump(handling_units_data, handling_units_out, indent=4)

            print(f"Processed Record {count}:")
            print(f"  - Basics saved to {basics_file}")
            print(f"  - Articles saved to {articles_file}")
            print(f"  - Handling Units saved to {handling_units_file}")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Replace with your correct path
    output_folder = r"C:\Users\munna\Downloads\parent_child_three_files_output"

    extract_and_split_jsonl(input_jsonl_path, output_folder)

    print(f"\n✅ Processing complete! All files are saved in '{output_folder}'.")


Processed Record 1:
  - Basics saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_1_basics.json
  - Articles saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_1_articles.json
  - Handling Units saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_1_handling_units.json
Processed Record 2:
  - Basics saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_2_basics.json
  - Articles saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_2_articles.json
  - Handling Units saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_2_handling_units.json
Processed Record 3:
  - Basics saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_3_basics.json
  - Articles saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_3_articles.json
  - Handling Units saved to C:\Users\munna\Downloads\parent_child_three_files_output\record_3_handling_units.jso

In [22]:
import json
import os
from collections.abc import MutableMapping

# Function to recursively flatten nested JSON
def flatten_json(nested_json, parent_key='', sep='_'):
    """Recursively flattens nested JSON object into a flat dictionary."""
    items = []
    if isinstance(nested_json, MutableMapping):
        for k, v in nested_json.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            items.extend(flatten_json(v, new_key, sep=sep).items())
    elif isinstance(nested_json, list):
        # For lists, handle each item individually and store as separate flattened object
        for idx, item in enumerate(nested_json):
            items.extend(flatten_json(item, f"{parent_key}{sep}{idx}", sep=sep).items())
    else:
        items.append((parent_key, nested_json))
    return dict(items)

# Process and split all data into 3 combined files (main, articles, handling_units)
def process_and_combine_jsonl(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # Hold combined data
    all_main_records = []
    all_articles = []
    all_handling_units = []

    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        for line in file:
            record = json.loads(line)

            # Flatten top-level (main) record
            main_record = {}
            articles = record.get("articles", [])
            handling_units = record.get("handling_units", [])

            for key, value in record.items():
                if key not in ["articles", "handling_units"]:
                    if isinstance(value, dict):
                        flat_part = flatten_json(value, key)
                        main_record.update(flat_part)
                    else:
                        main_record[key] = value

            # Append to combined lists
            all_main_records.append(main_record)

            for article in articles:
                all_articles.append(flatten_json(article))

            for unit in handling_units:
                all_handling_units.append(flatten_json(unit))

    # Write combined outputs to files
    with open(os.path.join(output_folder, 'main.json'), 'w', encoding='utf-8') as main_file:
        json.dump(all_main_records, main_file, indent=4)

    with open(os.path.join(output_folder, 'rarticles.json'), 'w', encoding='utf-8') as articles_file:
        json.dump(all_articles, articles_file, indent=4)

    with open(os.path.join(output_folder, 'handling_units.json'), 'w', encoding='utf-8') as handling_units_file:
        json.dump(all_handling_units, handling_units_file, indent=4)

    print(f"✅ Process complete! All data saved into 3 files in '{output_folder}' folder.")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Update your path
    output_folder = r"C:\Users\munna\Downloads\flattened_outputs"

    process_and_combine_jsonl(input_jsonl_path, output_folder)

    print(f"✅ All flattened data saved to '{output_folder}' folder.")


✅ Process complete! All data saved into 3 files in 'C:\Users\munna\Downloads\flattened_outputs' folder.
✅ All flattened data saved to 'C:\Users\munna\Downloads\flattened_outputs' folder.


In [23]:
import json
import os
from collections.abc import MutableMapping

# Function to recursively flatten nested JSON
def flatten_json(nested_json, parent_key='', sep='_'):
    """Recursively flattens nested JSON object into a flat dictionary."""
    items = []
    if isinstance(nested_json, MutableMapping):
        for k, v in nested_json.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            items.extend(flatten_json(v, new_key, sep=sep).items())
    elif isinstance(nested_json, list):
        # Lists are not flattened at this stage (handled separately as children)
        items.append((parent_key, nested_json))
    else:
        items.append((parent_key, nested_json))
    return dict(items)

# Process JSONL file into parent and dynamic children
def process_jsonl_dynamically(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    all_main_records = []
    all_children = {}

    # Read each line (record) in the JSONL
    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        for line in file:
            record = json.loads(line)

            main_record = {}
            record_children = {}

            # Split parent and child fields
            for key, value in record.items():
                if isinstance(value, list):
                    # Track this as a child array
                    if key not in all_children:
                        all_children[key] = []
                    record_children[key] = value
                elif isinstance(value, dict):
                    # Flatten nested dict into parent record
                    flat_dict = flatten_json(value, key)
                    main_record.update(flat_dict)
                else:
                    # Scalar values stay directly in the parent
                    main_record[key] = value

            # Append main record to main list
            all_main_records.append(main_record)

            # Append children into their respective global lists
            for child_name, child_list in record_children.items():
                for child_item in child_list:
                    # Optionally, you can add a reference key to link child to parent (like order_id if needed)
                    # Example: child_item['parent_id'] = main_record.get('transport_order_id', f'record_{len(all_main_records)}')
                    all_children[child_name].append(flatten_json(child_item))

    # Write main file
    main_file_path = os.path.join(output_folder, 'main.json')
    with open(main_file_path, 'w', encoding='utf-8') as main_file:
        json.dump(all_main_records, main_file, indent=4)

    # Write each child array to its own file
    for child_name, child_list in all_children.items():
        child_file_path = os.path.join(output_folder, f'{child_name}.json')
        with open(child_file_path, 'w', encoding='utf-8') as child_file:
            json.dump(child_list, child_file, indent=4)

    print(f"✅ Processing complete! Data saved to '{output_folder}'")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Change this path as needed
    output_folder = r"C:\Users\munna\Downloads\dynamic_flattened_output"

    process_jsonl_dynamically(input_jsonl_path, output_folder)

    print(f"✅ All data saved in '{output_folder}'.")


✅ Processing complete! Data saved to 'C:\Users\munna\Downloads\dynamic_flattened_output'
✅ All data saved in 'C:\Users\munna\Downloads\dynamic_flattened_output'.


In [25]:
import json
import os
from collections.abc import MutableMapping

# Function to recursively flatten nested JSON
def flatten_json(nested_json, parent_key='', sep='_'):
    """Recursively flattens nested JSON object into a flat dictionary."""
    items = []
    if isinstance(nested_json, MutableMapping):
        for k, v in nested_json.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            items.extend(flatten_json(v, new_key, sep=sep).items())
    elif isinstance(nested_json, list):
        # Lists are handled as children
        items.append((parent_key, nested_json))
    else:
        items.append((parent_key, nested_json))
    return dict(items)

# Function to detect primary object name (first non-list top-level key)
def detect_main_object_name(record):
    for key, value in record.items():
        if not isinstance(value, list):
            return key
    return "main"  # Fallback if all fields are lists (rare case)

# Process JSONL into parent and children, dynamically naming parent file
def process_jsonl_dynamically(input_jsonl_path, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    all_main_records = []
    all_children = {}

    # Detect main object name from the first record
    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        first_record = json.loads(file.readline())
        main_object_name = detect_main_object_name(first_record)

    # Process each record
    with open(input_jsonl_path, 'r', encoding='utf-8') as file:
        for line in file:
            record = json.loads(line)

            main_record = {}
            record_children = {}

            # Split parent (non-list) and children (list) fields
            for key, value in record.items():
                if isinstance(value, list):
                    if key not in all_children:
                        all_children[key] = []
                    record_children[key] = value
                elif isinstance(value, dict):
                    flat_dict = flatten_json(value, key)
                    main_record.update(flat_dict)
                else:
                    main_record[key] = value

            all_main_records.append(main_record)

            # Add children to global list
            for child_name, child_list in record_children.items():
                for child_item in child_list:
                    all_children[child_name].append(flatten_json(child_item))

    # Write dynamically named main file
    main_file_name = f"{main_object_name}.json"
    main_file_path = os.path.join(output_folder, main_file_name)
    with open(main_file_path, 'w', encoding='utf-8') as main_file:
        json.dump(all_main_records, main_file, indent=4)

    # Write each child array to its own file
    for child_name, child_list in all_children.items():
        child_file_path = os.path.join(output_folder, f'{child_name}.json')
        with open(child_file_path, 'w', encoding='utf-8') as child_file:
            json.dump(child_list, child_file, indent=4)

    print(f"✅ Processing complete! Parent data saved as '{main_file_name}' in '{output_folder}'")

# Example Usage
if __name__ == "__main__":
    input_jsonl_path = r"C:\Users\munna\Downloads\flat_t_os__items_1741258336.7634916_4b1a615482.jsonl"  # Update to actual path
    output_folder = r"C:\Users\munna\Downloads\dynamic_flattened_output"

    process_jsonl_dynamically(input_jsonl_path, output_folder)

    print(f"✅ All data saved to '{output_folder}'")


✅ Processing complete! Parent data saved as 'basics.json' in 'C:\Users\munna\Downloads\dynamic_flattened_output'
✅ All data saved to 'C:\Users\munna\Downloads\dynamic_flattened_output'
