# JSON Explorer Notebook

In [None]:
import json
import pandas as pd
from pathlib import Path

In [None]:
def load_json_file(file_path):
    """Load a JSON file and return its contents."""
    with open(file_path, 'r') as f:
        return json.load(f)

def explore_json(data, path="", max_depth=3, current_depth=0):
    """Recursively explore a JSON structure and print its contents."""
    if current_depth >= max_depth:
        print(f"{path} ... (max depth reached)")
        return

    if isinstance(data, dict):
        print(f"{path} (dict with {len(data)} keys)")
        for key, value in list(data.items())[:5]:  # Show first 5 keys
            new_path = f"{path}.{key}" if path else key
            explore_json(value, new_path, max_depth, current_depth + 1)
        if len(data) > 5:
            print(f"{path} ... ({len(data) - 5} more keys)")
    elif isinstance(data, list):
        print(f"{path} (list with {len(data)} items)")
        if data and len(data) > 0:
            # Show structure of first item
            explore_json(data[0], f"{path}[0]", max_depth, current_depth + 1)
            if len(data) > 1:
                print(f"{path} ... ({len(data) - 1} more items)")
    else:
        # For primitive values, show type and truncated content
        value_str = str(data)
        if len(value_str) > 50:
            value_str = value_str[:47] + "..."
        print(f"{path} = {type(data).__name__}: {value_str}")

In [None]:
# Replace with your JSON file path
json_file_path = "path/to/your/file.json"

# Uncomment to load and analyze a JSON file
# data = load_json_file(json_file_path)
# explore_json(data, max_depth=3)

In [None]:
# Analyze df_check.csv files
cases_dir = Path("/Users/tod/Desktop/LayoutLM_annotation_ms/du_cases")

# Uncomment to analyze CSV files
# case_dirs = [d for d in cases_dir.iterdir() if d.is_dir()]
# 
# csv_files = []
# for case_path in case_dirs:
#     csv_path = case_path / "processing" / "form-recogniser" / "df_check.csv"
#     if csv_path.exists():
#         csv_files.append((case_path.name, csv_path))
# 
# if csv_files:
#     case_id, csv_path = csv_files[0]
#     df = pd.read_csv(csv_path)
#     print(df.head())