# Test the Fine-tuned Model

Use the finetuned checkpoint to perform inference and test performance on test data


In [None]:
# import libraries
try:
    import torch
    from dotenv import dotenv_values
    from libs.dataset import anonymize_text
    from libs.utility import detect_accelerator

    # import HF transformers
    from transformers import (
        AutoModelForSeq2SeqLM, AutoTokenizer
    )
except ImportError as e:
    print(f"Exception during library import {e}")

# load dotenv
config_env: dict = dotenv_values("localenv")

# load configuration parameters
CONFIG_FILE: str = config_env.get("PARAMETER_FILE", "parameters.yaml")
OUTPUT_DIR: str = config_env.get("OUTPUT_DIR", "flan-finetuned-ita")

## 1. Load Finetuned model from disk

In [None]:
# load model from disk using transformers
finetuned_model = AutoModelForSeq2SeqLM.from_pretrained(OUTPUT_DIR)
finetuned_tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)

# mode model to device
device, dtype = detect_accelerator()
finetuned_model.to(device)

## 2. Use the loaded finetuned model to perform inference on new data

In [None]:
# synthetic test data
# Test with Italian examples containing PII
test_sentences = [
    "Il signor Alessandro Bianchi abita in Via Nazionale 45, Milano.",
    "Per contattare Giulia Rossi chiamare il 339-8765432 o scrivere a giulia.rossi@email.it",
    "Il paziente Marco Esposito, nato il 25/08/1982, codice fiscale SPSMRC82M25H501Z.",
    "Pagamento con carta 5123-4567-8901-2345 intestata a Francesca Lombardi.",
    "Contattare la dottoressa Elena Ricci al numero 02-12345678, ufficio in Corso Italia 88, Roma.",
]

In [None]:
# perform inference
print("Testing the fine-tuned model on Italian PII anonymization:\n")
for sentence in test_sentences:
    anonymized = anonymize_text(sentence, finetuned_model, finetuned_tokenizer)
    print(f"Originale:    {sentence}")
    print(f"Anonimizzato: {anonymized}")
    print("-" * 100)