In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 16385 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
]  # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.3.4: Fast Llama patching. Transformers: 4.49.0. vLLM: 0.7.3.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 24.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.4.0+cu121. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27.post2. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [59]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth: Already have LoRA adapters! We shall skip this step.


In [42]:
import json

def read_jsonl_file(file_path):
    json_objects = []
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                # Strip any leading/trailing whitespace and parse the JSON
                json_object = json.loads(line.strip())
                
                # Replace null "content" with an empty string
                if "messages" in json_object:
                    for message in json_object["messages"]:
                        if "content" in message and message["content"] is None:
                            message["content"] = ""
                
                # Add "description" to root-level ['function'] if it exists
                # if "tools" in json_object:
                #     json_object['tools'][0]["function"]["description"] = "To Extract the Personally Identifiable Information data from the given text by user."
                
                json_objects.append(json_object)
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    
    return json_objects

# Example usage
file_path = 'training_dataset.jsonl'
json_list = read_jsonl_file(file_path)

# Print the first JSON object to verify changes
print(json.dumps(json_list[0], indent=4))


{
    "messages": [
        {
            "role": "system",
            "content": "\nYou are an expert model trained to redact potentially sensitive information from documents. You have been given a document to redact. Your goal is to accurately redact the sensitive information from the document. Sensitive information can be in one of the following categories:\n\n- ACCOUNTNAME: name of an account\n- ACCOUNTNUMBER: number of an account\n- AGE: a person's age\n- AMOUNT: information indicating a certain monetary amount\n- BIC: a business identifier code\n- BITCOINADDRESS: bitcoint address, generally stored in a cryptocurrency wallet\n- BUILDINGNUMBER: number of a building in a physical address\n- CITY: name of a city indicating location or address\n- COMPANYNAME: name of a company\n- COUNTRY: name of a country indicating location or address\n- CREDITCARDCVV: credit card CVV\n- CREDITCARDISSUER: credit card issuer\n- CREDITCARDNUMBER: credit card number\n- CURRENCY: currency of a balance 

In [24]:
json_list[0]['messages']

[{'role': 'system',
  'content': "\nYou are an expert model trained to redact potentially sensitive information from documents. You have been given a document to redact. Your goal is to accurately redact the sensitive information from the document. Sensitive information can be in one of the following categories:\n\n- ACCOUNTNAME: name of an account\n- ACCOUNTNUMBER: number of an account\n- AGE: a person's age\n- AMOUNT: information indicating a certain monetary amount\n- BIC: a business identifier code\n- BITCOINADDRESS: bitcoint address, generally stored in a cryptocurrency wallet\n- BUILDINGNUMBER: number of a building in a physical address\n- CITY: name of a city indicating location or address\n- COMPANYNAME: name of a company\n- COUNTRY: name of a country indicating location or address\n- CREDITCARDCVV: credit card CVV\n- CREDITCARDISSUER: credit card issuer\n- CREDITCARDNUMBER: credit card number\n- CURRENCY: currency of a balance or transaction\n- CURRENCYCODE: the code a currenc

In [43]:
tools_json = json_list[0]['tools']

In [44]:
from datasets import Dataset

# Load the json_list as a Hugging Face dataset
dataset = Dataset.from_list(json_list)

# Print the first example to verify
print(dataset[1])

{'messages': [{'content': "\nYou are an expert model trained to redact potentially sensitive information from documents. You have been given a document to redact. Your goal is to accurately redact the sensitive information from the document. Sensitive information can be in one of the following categories:\n\n- ACCOUNTNAME: name of an account\n- ACCOUNTNUMBER: number of an account\n- AGE: a person's age\n- AMOUNT: information indicating a certain monetary amount\n- BIC: a business identifier code\n- BITCOINADDRESS: bitcoint address, generally stored in a cryptocurrency wallet\n- BUILDINGNUMBER: number of a building in a physical address\n- CITY: name of a city indicating location or address\n- COMPANYNAME: name of a company\n- COUNTRY: name of a country indicating location or address\n- CREDITCARDCVV: credit card CVV\n- CREDITCARDISSUER: credit card issuer\n- CREDITCARDNUMBER: credit card number\n- CURRENCY: currency of a balance or transaction\n- CURRENCYCODE: the code a currency (e.g.

In [None]:

from utils import get_tools_prefix_messages
from litserve.specs.openai import Tool, ChatMessage
import json
from jinja2 import Template
from threading import Thread

def transform_to_llama_chat_format(messages_json_full, add_generation_prompt=False):
    
    tools_json = messages_json_full['tools']
    tools = [Tool.model_validate(tool) for tool in tools_json]
    chat_template="""
{%- for message in messages %}
    {%- set prefix = '<|begin_of_text|>' if loop.index0==0 else '' %}
    {{- prefix + '<|start_header_id|>'+message['role']+'<|end_header_id|>\n\n' -}}
    {%- if message['role'] == 'assistant' and 'tool_calls' in message %}
        {{- '(TOOL)' }}
        {%- for tool in message['tool_calls'] %}
            {%- set tool_json = {'id': tool['id'], 'name': tool['function']['name'], 'arguments': tool['function']['arguments']} %}
            {{- tool_json }}
        {%- endfor %}
        {{- '(TOOL)' + '<|eot_id|>\n' }}
    {%- else %}
        {{- message['content'] + '<|eot_id|>\n' }}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{%- endif %}
"""

    message_json = messages_json_full['messages']
    messages = [ChatMessage.model_validate(message) for message in message_json]

    messages = get_tools_prefix_messages(messages, tools)

    # convert Pydantic models to JSON
    messages_json = [message.model_dump(exclude_none=True) for message in messages]

    jinja_template = Template(chat_template.strip())
    prompt = jinja_template.render(messages=messages_json, add_generation_prompt=add_generation_prompt)
    return {"text":prompt}



In [38]:
transform_to_llama_chat_format(dataset)

{'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n\nToday Date: 06 March 2025\n\n\nYou have access to the following functions:\n\nUse the function \'redact\' to \'To Extract and Redact the Personally Identifiable Information data from the given text by user.\'\n{"type":"function","function":{"name":"redact","description":"To Extract and Redact the Personally Identifiable Information data from the given text by user.","parameters":{"type":"object","properties":{"fields_to_redact":{"type":"array","items":{"type":"object","required":["string","pii_type"],"properties":{"string":{"type":"string","description":"The exact matching string to redact. Include any whitespace or punctuation. Must be an exact string match! can be empty string if there is no string to redact."},"pii_type":{"enum":["ACCOUNTNAME","ACCOUNTNUMBER","AGE","AMOUNT","BIC","BITCOINADDRESS","BUILDINGNUMBER","CITY","COMPANYNAME","COUNTY","CREDITCARDCVV","CREDITCARDISSUER","CREDITCARDNUMBER","CURRENCY","C

In [39]:
# Split the dataset into train and test sets
train_test_split = dataset.train_test_split(test_size=0.3, seed=42)

# Extract train and test datasets
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

# Print the number of examples in each set to verify
print(f"Number of training examples: {len(train_dataset)}")
print(f"Number of testing examples: {len(test_dataset)}")

Number of training examples: 70000
Number of testing examples: 30000


In [52]:
train_dataset = dataset.select(range(50000))

In [53]:
test_dataset = dataset.select(range(50000, 60000))

In [55]:
dataset_transformed_format = train_dataset.map(
    transform_to_llama_chat_format
)

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [56]:
dataset_transformed_format[0]['text']

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n\nToday Date: 06 March 2025\n\n\nYou have access to the following functions:\n\nUse the function \'redact\' to \'To Extract and Redact the Personally Identifiable Information data from the given text by user.\'\n{"type":"function","function":{"name":"redact","description":"To Extract and Redact the Personally Identifiable Information data from the given text by user.","parameters":{"properties":{"fields_to_redact":{"items":{"properties":{"pii_type":{"enum":["ACCOUNTNAME","ACCOUNTNUMBER","AGE","AMOUNT","BIC","BITCOINADDRESS","BUILDINGNUMBER","CITY","COMPANYNAME","COUNTY","CREDITCARDCVV","CREDITCARDISSUER","CREDITCARDNUMBER","CURRENCY","CURRENCYCODE","CURRENCYNAME","CURRENCYSYMBOL","DATE","DOB","EMAIL","ETHEREUMADDRESS","EYECOLOR","FIRSTNAME","GENDER","HEIGHT","IBAN","IP","IPV4","IPV6","JOBAREA","JOBTITLE","JOBTYPE","LASTNAME","LITECOINADDRESS","MAC","MASKEDNUMBER","MIDDLENAME","NEARBYGPSCOORDINATE","ORDINALDIRECTION","PASSW

In [57]:
dataset_transformed_format.save_to_disk('dataset_transformed_format_newversion')

Saving the dataset (0/1 shards):   0%|          | 0/50000 [00:00<?, ? examples/s]

In [None]:
from datasets import load_from_disk
dataset_transformed_format = load_from_disk("dataset_transformed_format_newversion")

In [58]:
dataset_transformed_format

Dataset({
    features: ['messages', 'tools', 'tool_choice', 'text'],
    num_rows: 50000
})

In [60]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_transformed_format,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 4,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 300,
        learning_rate =  1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 1037,
        output_dir = "outputs_new",
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: We found double BOS tokens - we shall remove one automatically.


Tokenizing to ["text"] (num_proc=4):   0%|          | 0/50000 [00:00<?, ? examples/s]

In [61]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 3090. Max memory = 24.0 GB.
6.139 GB of memory reserved.


In [62]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 50,000 | Num Epochs = 1 | Total steps = 300
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856/3,237,063,680 (0.75% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.8468
2,1.8743
3,1.8447
4,1.8609
5,1.8597
6,1.7918
7,1.7126
8,1.6699
9,1.56
10,1.514


In [63]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

1380.027 seconds used for training.
23.0 minutes used for training.
Peak reserved memory = 7.256 GB.
Peak reserved memory for training = 1.117 GB.
Peak reserved memory % of max memory = 30.233 %.
Peak reserved memory for training % of max memory = 4.654 %.


In [None]:
#trainer_stats = trainer.train(resume_from_checkpoint=True) ##To resume training from the last checkpoint if needed 

In [None]:
model.save_pretrained("lora_model_llama323b_4bit_pii_100steps")
tokenizer.save_pretrained("lora_model_llama323b_4bit_pii_100steps")
model.save_pretrained_merged("vllm_model_llama323b_4bit_pii_300steps", tokenizer, save_method = "merged_16bit") #if you want to save the model in fp16 vllm compatible format

In [None]:
##FINETUNED MODEL - Loading again if not go ahead further skip this cell
from unsloth import FastLanguageModel
import torch
max_seq_length = 16385 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "lora_model_llama323b_4bit_pii_100steps",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.3.4: Fast Llama patching. Transformers: 4.49.0. vLLM: 0.7.3.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 24.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.4.0+cu121. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27.post2. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Some parameters are on the meta device because they were offloaded to the cpu.


In [None]:
## STOCK MODE:
from unsloth import FastLanguageModel
import torch
max_seq_length = 16385 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.

model_org, tokenizer_org = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [125]:
import re
def extract_content(text):
    # Define the regex pattern to extract the content
    pattern = r"<\|start_header_id\|>assistant<\|end_header_id\|>(.*?)<\|eot_id\|>"
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None  

In [143]:
import ast


def get_response_from_finetuned_model(model,tokenizer_ft,user_message):

    test_message = {
    "messages": [{
            "role": "system",
           "content": "You are an expert model trained to redact potentially sensitive information from documents. You have been given a document to redact. Your goal is to accurately redact the sensitive information from the document. Sensitive information can be in one of the following categories:\n\n- ACCOUNTNAME: name of an account\n- ACCOUNTNUMBER: number of an account\n- AGE: a person's age\n- AMOUNT: information indicating a certain monetary amount\n- BIC: a business identifier code\n- BITCOINADDRESS: bitcoint address, generally stored in a cryptocurrency wallet\n- BUILDINGNUMBER: number of a building in a physical address\n- CITY: name of a city indicating location or address\n- COMPANYNAME: name of a company\n- COUNTRY: name of a country indicating location or address\n- CREDITCARDCVV: credit card CVV\n- CREDITCARDISSUER: credit card issuer\n- CREDITCARDNUMBER: credit card number\n- CURRENCY: currency of a balance or transaction\n- CURRENCYCODE: the code a currency (e.g. USD)\n- CURRENCYNAME: name of a currency (e.g. US dollar)\n- CURRENCYSYMBOL: symbol of a currency (e.g. $)\n- DATE: a specific calendar date\n- DOB: a specific calendar date representing birth\n- EMAIL: an email ID\n- ETHEREUMADDRESS: ethereum address, generally stored in a cryptocurrency wallet\n- EYECOLOR: eye color, used to identify a person\n- FIRSTNAME: first name of a person\n- GENDER: a gender identifier\n- HEIGHT: height of a person\n- IBAN: international banking account number\n- IP: IP address\n- IPV4: IP v4 address\n- IPV6: IP v6 address\n- JOBAREA: job area, specialization or category\n- JOBTITLE: job title\n- LASTNAME: last name of a person\n- LITECOINADDRESS: litecoin address, generally stored in a cryptocurrency wallet\n- MAC: MAC address\n- MASKEDNUMBER: masked number\n- MIDDLENAME: middle name of a person\n- NEARBYGPSCOORDINATE: nearby GPS coordinates\n- ORDINALDIRECTION: ordinal direction (north, south, northeast, etc.)\n- PASSWORD: a secure string used for authentication\n- PHONEIMEI: the IMEI of a phone\n- PHONENUMBER: a telephone number\n- PIN: a personal identificaiton number (PIN)\n- PREFIX: prefix used to identify a person (Mr., Mrs., Dr. etc.)\n- SECONDARY ADDRESS: a secondary physical address address\n- SEX: a sex identifier (male/female)\n- SSN: a social security number\n- STATE: name of a state indicating location or address\n- STREET: name of a street indicating location or address\n- TIME: time of the day\n- URL: URL of a website\n- USERAGENT: user agent to identify the application, operating system, vendor etc.\n- USERNAME: user name to identify user\n- VERHICLEVIN: vehicle identification number or license number\n- VEHICLEVRM: vehicle registration mark\n- ZIPCODE: zipcode indicating location or address\n-SECURITYANSWER: Any Answer to a Security Question mentioned throughout the text \n\nYou should return the specific string that needs to be redacted, along with the category of sensitive information that it belongs to. If there is no sensitive information in the document, return no strings.\n",            "role": "user",
            "content": user_message
        }, 
    ],
    "tools": [{
            "function": {
                "name": "redact",
				 "description": "To Extract the Personally Identifiable Information data from the given text by user.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "fields_to_redact": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "required": ["string", "pii_type"],
                                "properties": {
                                    "string": {
                                        "type": "string",
                                        "description": "The exact matching string to redact. Include any whitespace or punctuation. Must be an exact string match!"
                                    },
                                    "pii_type": {
                                        "enum": ["ACCOUNTNAME", "ACCOUNTNUMBER", "AGE", "AMOUNT", "BIC", "BITCOINADDRESS", "BUILDINGNUMBER", "CITY", "COMPANYNAME", "COUNTY", "CREDITCARDCVV", "CREDITCARDISSUER", "CREDITCARDNUMBER", "CURRENCY", "CURRENCYCODE", "CURRENCYNAME", "CURRENCYSYMBOL", "DATE", "DOB", "EMAIL", "ETHEREUMADDRESS", "EYECOLOR", "FIRSTNAME", "GENDER", "HEIGHT", "IBAN", "IP", "IPV4", "IPV6", "JOBAREA", "JOBTITLE", "JOBTYPE", "LASTNAME", "LITECOINADDRESS", "MAC", "MASKEDNUMBER", "MIDDLENAME", "NEARBYGPSCOORDINATE", "ORDINALDIRECTION", "PASSWORD", "PHONEIMEI", "PHONENUMBER", "PIN", "PREFIX", "SECONDARYADDRESS", "SEX", "SSN", "STATE", "STREET", "TIME", "URL", "USERAGENT", "USERNAME", "VEHICLEVIN", "VEHICLEVRM", "ZIPCODE","SECURITYANSWER"],
                                        "type": "string"
                                    }
                                }
                            }
                        }
                    }
                }
            },
            "type": "function"
        }
    ],
    "tool_choice": {
        "type": "function",
        "function": {
            "name": "redact"
        }
    }
}
    tools_json = test_message['tools']
    tools = [Tool.model_validate(tool) for tool in tools_json]
    messages = [ChatMessage.model_validate(message) for message in test_message['messages']]

    messages = get_tools_prefix_messages(messages, tools)

    # convert Pydantic models to JSON
    messages_json = [message.model_dump(exclude_none=True) for message in messages]

    model_inputs_new = tokenizer_ft.apply_chat_template(
    messages_json,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
    ).to(model.device)

    outputs = model.generate(input_ids = model_inputs_new, max_new_tokens = 4096, use_cache = True)
    response = tokenizer.batch_decode(outputs)[0]
    print(response)
    json_data = ast.literal_eval(extract_content(response).strip("(TOOL)"))
    json_data['arguments'] = json.loads(json_data['arguments'])
    return json_data
    
def get_response_from_base_model(model,tokenizer_og,user_message):

    test_message = {
    "messages": [{
            "role": "system",
"content": "You are an expert model trained to redact potentially sensitive information from documents. You have been given a document to redact. Your goal is to accurately redact the sensitive information from the document. Sensitive information can be in one of the following categories:\n\n- ACCOUNTNAME: name of an account\n- ACCOUNTNUMBER: number of an account\n- AGE: a person's age\n- AMOUNT: information indicating a certain monetary amount\n- BIC: a business identifier code\n- BITCOINADDRESS: bitcoint address, generally stored in a cryptocurrency wallet\n- BUILDINGNUMBER: number of a building in a physical address\n- CITY: name of a city indicating location or address\n- COMPANYNAME: name of a company\n- COUNTRY: name of a country indicating location or address\n- CREDITCARDCVV: credit card CVV\n- CREDITCARDISSUER: credit card issuer\n- CREDITCARDNUMBER: credit card number\n- CURRENCY: currency of a balance or transaction\n- CURRENCYCODE: the code a currency (e.g. USD)\n- CURRENCYNAME: name of a currency (e.g. US dollar)\n- CURRENCYSYMBOL: symbol of a currency (e.g. $)\n- DATE: a specific calendar date\n- DOB: a specific calendar date representing birth\n- EMAIL: an email ID\n- ETHEREUMADDRESS: ethereum address, generally stored in a cryptocurrency wallet\n- EYECOLOR: eye color, used to identify a person\n- FIRSTNAME: first name of a person\n- GENDER: a gender identifier\n- HEIGHT: height of a person\n- IBAN: international banking account number\n- IP: IP address\n- IPV4: IP v4 address\n- IPV6: IP v6 address\n- JOBAREA: job area, specialization or category\n- JOBTITLE: job title\n- LASTNAME: last name of a person\n- LITECOINADDRESS: litecoin address, generally stored in a cryptocurrency wallet\n- MAC: MAC address\n- MASKEDNUMBER: masked number\n- MIDDLENAME: middle name of a person\n- NEARBYGPSCOORDINATE: nearby GPS coordinates\n- ORDINALDIRECTION: ordinal direction (north, south, northeast, etc.)\n- PASSWORD: a secure string used for authentication\n- PHONEIMEI: the IMEI of a phone\n- PHONENUMBER: a telephone number\n- PIN: a personal identificaiton number (PIN)\n- PREFIX: prefix used to identify a person (Mr., Mrs., Dr. etc.)\n- SECONDARY ADDRESS: a secondary physical address address\n- SEX: a sex identifier (male/female)\n- SSN: a social security number\n- STATE: name of a state indicating location or address\n- STREET: name of a street indicating location or address\n- TIME: time of the day\n- URL: URL of a website\n- USERAGENT: user agent to identify the application, operating system, vendor etc.\n- USERNAME: user name to identify user\n- VERHICLEVIN: vehicle identification number or license number\n- VEHICLEVRM: vehicle registration mark\n- ZIPCODE: zipcode indicating location or address\n-SECURITYANSWER: Any Answer to a Security Question mentioned throughout the text \n\nYou should return the specific string that needs to be redacted, along with the category of sensitive information that it belongs to. If there is no sensitive information in the document, return no strings.\n",     }, {
            "role": "user",
            "content": user_message
        }, 
    ],
    "tools": [{
            "function": {
                "name": "redact",
				 "description": "To Extract the Personally Identifiable Information data from the given text by user.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "fields_to_redact": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "required": ["string", "pii_type"],
                                "properties": {
                                    "string": {
                                        "type": "string",
                                        "description": "The exact matching string to redact. Include any whitespace or punctuation. Must be an exact string match!"
                                    },
                                    "pii_type": {
                                        "enum": ["ACCOUNTNAME", "ACCOUNTNUMBER", "AGE", "AMOUNT", "BIC", "BITCOINADDRESS", "BUILDINGNUMBER", "CITY", "COMPANYNAME", "COUNTY", "CREDITCARDCVV", "CREDITCARDISSUER", "CREDITCARDNUMBER", "CURRENCY", "CURRENCYCODE", "CURRENCYNAME", "CURRENCYSYMBOL", "DATE", "DOB", "EMAIL", "ETHEREUMADDRESS", "EYECOLOR", "FIRSTNAME", "GENDER", "HEIGHT", "IBAN", "IP", "IPV4", "IPV6", "JOBAREA", "JOBTITLE", "JOBTYPE", "LASTNAME", "LITECOINADDRESS", "MAC", "MASKEDNUMBER", "MIDDLENAME", "NEARBYGPSCOORDINATE", "ORDINALDIRECTION", "PASSWORD", "PHONEIMEI", "PHONENUMBER", "PIN", "PREFIX", "SECONDARYADDRESS", "SEX", "SSN", "STATE", "STREET", "TIME", "URL", "USERAGENT", "USERNAME", "VEHICLEVIN", "VEHICLEVRM", "ZIPCODE","SECURITYANSWER"],
                                        "type": "string"
                                    }
                                }
                            }
                        }
                    }
                }
            },
            "type": "function"
        }
    ],
    "tool_choice": {
        "type": "function",
        "function": {
            "name": "redact"
        }
    }
}
    
    model_org_inputs_new = tokenizer_og.apply_chat_template(
        test_message['messages'],
        custom_tools = test_message['tools'],
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)
    outputs = model.generate(input_ids = model_org_inputs_new, max_new_tokens = 4096, use_cache = True)
    response = tokenizer_og.batch_decode(outputs)[0]
    print(response)
    json_data = ast.literal_eval(extract_content(response))
    return json_data
    


In [156]:
user_msg = """User: Hello, thank you for calling TechSupport Solutions.",
            "Agent: Hi there! How can I assist you today?",
            "User: I'm having trouble logging into my account. It keeps saying my account is locked.",
            "Agent: I'm sorry to hear that. Let me help you with that. To assist you, I need to verify your identity first. Could you please answer security questions for me?",
            "Agent: Whats your backup email address??",
            "User: I think its Its Randy656@outlook.com.",
            "Agent: Perfect, thank you for verifying. Now, let me assist with unlocking your account. Since there were multiple failed attempts, the system has locked it as a security measure. We'll reset your password to unlock it. I can create a temporary password for you right now.",
            "User: Okay, that sounds good. What's the temporary password?",
            "Agent: The temporary password is Tech@1234567890! Please note that this will expire in 24 hours and you'll need to change it upon your first login.",
            "User: Got it. How do I create a strong password then?",
            "Agent: Great question! A strong password should be at least 12 characters long, include a mix of uppercase and lowercase letters, numbers, and special symbols. Avoid using easily guessable information like birthdays or common words. For example, something like Tech$upport@2023 would be strong.",
            "User: Okay, I'll make sure to create a password like that. Thank you for your help!",
            "Agent: You're welcome! If you encounter any more issues, don't hesitate to reach out. Have a great day ahead!"""

#user_msg = "i think i am able to pass this since i was 12/12/1990 birth and also paid the amount with 2706814429396205 which had 20 bucks balance"
#user_msg = "STEM model \u00e0 notre adresse 63298. Pay\u00e9 avec la carte 5588589585954874. D\u00e9bit de Seychelles Rupee pour cette transaction."
#user_msg = "Dr. Walker, nous avons remarqu\u00e9 des progr\u00e8s dans la compr\u00e9hension des aspects existentiels de votre Optimization. Continuez le bon travail !"
#user_msg = "Please contact Dr. Amanda Rodriguez at amanda.rodriguez@example.com or call her at (555) 123-4567 regarding account #AC-78593201."

ft_resposne = get_response_from_finetuned_model(model,tokenizer,user_msg)
og_response = get_response_from_base_model(model_org,tokenizer_org,user_msg)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>


Today Date: 07 March 2025


You have access to the following functions:

Use the function'redact' to 'To Extract the Personally Identifiable Information data from the given text by user.'
{"type":"function","function":{"name":"redact","description":"To Extract the Personally Identifiable Information data from the given text by user.","parameters":{"type":"object","properties":{"fields_to_redact":{"type":"array","items":{"type":"object","required":["string","pii_type"],"properties":{"string":{"type":"string","description":"The exact matching string to redact. Include any whitespace or punctuation. Must be an exact string match!"},"pii_type":{"enum":["ACCOUNTNAME","ACCOUNTNUMBER","AGE","AMOUNT","BIC","BITCOINADDRESS","BUILDINGNUMBER","CITY","COMPANYNAME","COUNTY","CREDITCARDCVV","CREDITCARDISSUER","CREDITCARDNUMBER","CURRENCY","CURRENCYCODE","CURRENCYNAME","CURRENCYSYMBOL","DATE","DOB","EMAIL","ETHEREUMADDRESS","EYECOLOR","FIR

In [157]:
ft_resposne

{'id': '',
 'name': 'redact',
 'arguments': {'fields_to_redact': [{'string': 'TechSupport Solutions.',
    'pii_type': 'COMPANYNAME'},
   {'string': 'Randy656@outlook.com', 'pii_type': 'EMAIL'},
   {'string': 'Tech@1234567890!', 'pii_type': 'PASSWORD'}]}}

In [158]:
ft_resposne['arguments']

{'fields_to_redact': [{'string': 'TechSupport Solutions.',
   'pii_type': 'COMPANYNAME'},
  {'string': 'Randy656@outlook.com', 'pii_type': 'EMAIL'},
  {'string': 'Tech@1234567890!', 'pii_type': 'PASSWORD'}]}

In [159]:
og_response['parameters']

{'fields_to_redact': '[{"string": "Whats your backup email address??", "pii_type": "SECURITYANSWER"}]'}

==((====))==  Unsloth 2025.3.4: Fast Llama patching. Transformers: 4.49.0. vLLM: 0.7.3.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 24.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.4.0+cu121. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27.post2. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [160]:
tokenizer_org.chat_template

'{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now("%d %b %Y") %}\n    {%- else %}\n        {%- set date_string = "26 Jul 2024" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0][\'role\'] == \'system\' %}\n    {%- set system_message = messages[0][\'content\']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = "" %}\n{%- endif %}\n\n{#- System message #}\n{{- "<|start_header_id|>system<|end_header_id|>\\n\\n" }}\n{%- if tools is not none %}\n    {{- "Environment: ipython\\n" }}\n{%- endif %}\n{{- "Cutting

In [160]:
import os 
os.environ['HF_TOKEN'] = ''

model.push_to_hub_merged("droidriz/FineLlama-3.2-3B-PII-Tool", tokenizer, save_method="merged_16bit")

Unsloth: You are pushing to hub, but you passed your HF username = droidriz.
We shall truncate droidriz/FineLlama-3.2-3B-PII-Tool to FineLlama-3.2-3B-PII-Tool


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 7.81 out of 31.11 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


  0%|          | 0/28 [00:00<?, ?it/s]
We will save to Disk and not RAM now.
100%|██████████| 28/28 [00:14<00:00,  1.87it/s]


Unsloth: Saving tokenizer...

  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

 Done.


README.md:   0%|          | 0.00/601 [00:00<?, ?B/s]

  0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/droidriz/FineLlama-3.2-3B-PII-Tool
