In [None]:
from transformers import pipeline

# Load the NER model
ner_model = pipeline("token-classification", model="loolootech/no-name-ner-th", device="cpu")

input_text = "คนไข้ชื่อสมชาย ได้รับยาจากหมอเหน่ง ให้โทรหาที่เบอร์ 081-123-4567"

# Get the raw output from the model
ner_results = ner_model(input_text)

In [None]:
# Let's say we want to anonymize just a few types of entities for demonstration purposes.
# You can expand this dictionary based on your requirements.
ENTITY_TO_ANONYMIZED_TOKEN_MAP = {
    "PERSON": "[PERSON]",
    "PHONE": "[PHONE]",
    # "EMAIL": "[EMAIL]",
    # "ADDRESS": "[LOCATION]",
    # "DATE": "[DATE]",
    # "NATIONAL_ID": "[NATIONAL_ID]",
    # "HOSPITAL_IDS": "[HOSPITAL_IDS]",
}

def anonymize_text(text: str, ner_results: list) -> str:
    """
    Anonymizes text based on NER results by replacing entities with tokens.
    """
    # Sort entities by their start position in reverse order to avoid
    # messing up the indices of future replacements.
    sorted_results = sorted(ner_results, key=lambda x: x['start'], reverse=True)
    
    anonymized_text = text
    
    for entity in sorted_results:
        # Extract entity type (e.g., 'B-PERSON' -> 'PERSON')
        entity_type = entity['entity'].split('-')[-1]
        
        # Check if the entity type should be anonymized and if a token exists for it.
        if entity_type in ENTITY_TO_ANONYMIZED_TOKEN_MAP:
            start = entity['start']
            end = entity['end']
            anonymization_token = ENTITY_TO_ANONYMIZED_TOKEN_MAP[entity_type]
            
            anonymized_text = anonymized_text[:start] + anonymization_token + anonymized_text[end:]
            
    return anonymized_text

# Example usage
anonymized_text = anonymize_text(input_text, ner_results)
print(anonymized_text)