<a href="https://colab.research.google.com/github/sahandtebyani/Instagram-Influencer-/blob/main/Json_commetn_tool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import json

def extract_text_fields(data):
    """
    Recursively extracts all "text" fields from a JSON structure.

    Args:
        data (dict or list): The JSON structure (can be a dict or list).

    Returns:
        set: A set of unique "text" field values.
    """
    text_fields = set()

    if isinstance(data, dict):
        for key, value in data.items():
            if key == "text" and isinstance(value, str):
                text_fields.add(value)
            elif isinstance(value, (dict, list)):
                text_fields.update(extract_text_fields(value))
    elif isinstance(data, list):
        for item in data:
            text_fields.update(extract_text_fields(item))

    return text_fields


def mix_and_extract_texts(input_folder, output_file):
    """
    Mixes all JSON files in a folder, extracts "text" fields, removes duplicates,
    and saves the results to a new JSON file.

    Args:
        input_folder (str): Path to the folder containing JSON files.
        output_file (str): Path to the output JSON file.
    """
    all_texts = set()

    # Iterate through all JSON files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(".json"):  # Only process JSON files
            file_path = os.path.join(input_folder, filename)
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    data = json.load(f)
                    all_texts.update(extract_text_fields(data))
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

    # Save the unique "text" fields to the output file
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(list(all_texts), f, indent=4, ensure_ascii=False)

    print(f"Extracted {len(all_texts)} unique 'text' fields from all files in {input_folder}")
    print(f"Results saved to {output_file}")


# Example usage
input_folder = "/content/json"  # Replace with the path to your JSON folder
output_file = "unique_texts.json"  # Replace with the desired output file name
mix_and_extract_texts(input_folder, output_file)


Extracted 171 unique 'text' fields from all files in /content/json
Results saved to unique_texts.json
