In [6]:
import os
import json

def json_to_embedding_text(data: dict, prefix="") -> str:
    lines = []
    for key, value in data.items():
        full_key = f"{prefix}{key}" if prefix == "" else f"{prefix}.{key}"
        if isinstance(value, dict):
            lines.append(json_to_embedding_text(value, prefix=full_key))
        elif isinstance(value, list):
            lines.append(f"{full_key}:")
            lines.extend([f"- {v}" for v in value])
        else:
            lines.append(f"{full_key}: {value}")
    return "\n".join(lines)

def process_json_folder(folder_path: str, output_file: str):
    all_texts = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, "r", encoding="utf-8") as f:
                try:
                    data = json.load(f)
                    text = json_to_embedding_text(data)
                    all_texts.append(f"### File: {filename}\n{text}\n")
                except json.JSONDecodeError as e:
                    print(f"Skipping {filename}: invalid JSON - {e}")
    
    with open(output_file, "w", encoding="utf-8") as f_out:
        f_out.write("\n\n".join(all_texts))
    print(f"Processed {len(all_texts)} files. Output saved to {output_file}")

# Example usage
folder_path = r"C:\Users\yashr\OneDrive\Documents\json fieles\601 - 800"
output_file = "embedding_ready3.txt"
process_json_folder(folder_path, output_file)


Processed 193 files. Output saved to embedding_ready3.txt
