In [2]:
import json

def load_entities(json_path):
    with open(json_path, "r", encoding="utf-8") as f:
        return json.load(f)

def anonymize_text(text, entities, mode="mask"):
    """
    Replace sensitive entities in the text.
    
    Parameters:
      - text: the original text.
      - entities: a list of dictionaries with keys "text", "type", "start", "end".
      - mode: if "mask", replace with a string of '#' characters (matching the entity length);
              if "type", replace with the entity type in brackets (e.g. [PER]).
              
    Returns:
      The anonymized text.
    """
    # Sort entities in descending order by start index.
    entities_sorted = sorted(entities, key=lambda x: x["start"], reverse=True)
    
    anonymized_text = text
    for ent in entities_sorted:
        start, end = ent["start"], ent["end"]
        if mode == "mask":
            replacement = "#" * (end - start)  # Replace each character with '#'
        elif mode == "type":
            replacement = f"[{ent['type']}]"
        else:
            replacement = "###"  # default
        
        anonymized_text = anonymized_text[:start] + replacement + anonymized_text[end:]
    
    return anonymized_text

entities = load_entities(r"C:\Users\hp\Downloads\clever contact\models_contract_1\withnewdataset_merged_spacy_flair_entities.json")


with open(r"C:\Users\hp\Downloads\clever contact\cleaned_contracts\cleaned_contract_fixed_final.txt", "r", encoding="utf-8") as f:
    contract_text = f.read()

# Choose anonymization mode: "mask" to replace with hashes, or "type" to replace with entity type
anonymized = anonymize_text(contract_text, entities, mode="mask")


output_path = "anonymized_contract1.txt"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(anonymized)

print(f" Anonymized contract saved to {output_path}")


 Anonymized contract saved to anonymized_contract1.txt
