##### Protect AI's **LLM Guard** Framework

In [29]:
from openai import OpenAI
from dotenv import load_dotenv
import os 

from llm_guard import scan_output, scan_prompt
from llm_guard.input_scanners import Anonymize, PromptInjection, TokenLimit, Toxicity
from llm_guard.output_scanners import Deanonymize, NoRefusal, Relevance, Sensitive
from llm_guard.vault import Vault

import warnings
warnings.filterwarnings("ignore")

load_dotenv()

GPT_MODEL = 'gpt-3.5-turbo' 

# API Configuration
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
OpenAI.api_key = os.getenv('OPENAI_API_KEY') 

In [63]:
# initialize components

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
vault = Vault()
# input_scanners = [Anonymize(vault), Toxicity(), TokenLimit(), PromptInjection()]
input_scanners = [Anonymize(vault)]
# output_scanners = [Deanonymize(vault), NoRefusal(), Relevance(), Sensitive()]
output_scanners = [Deanonymize(vault)]

2024-11-20 01:25:48 [debug    ] No entity types provided, using default default_entities=['CREDIT_CARD', 'CRYPTO', 'EMAIL_ADDRESS', 'IBAN_CODE', 'IP_ADDRESS', 'PERSON', 'PHONE_NUMBER', 'US_SSN', 'US_BANK_NUMBER', 'CREDIT_CARD_RE', 'UUID', 'EMAIL_ADDRESS_RE', 'US_SSN_RE']


2024-11-20 01:25:48 [debug    ] Initialized NER model          device=device(type='cpu') model=Model(path='Isotonic/deberta-v3-base_finetuned_ai4privacy_v2', subfolder='', revision='9ea992753ab2686be4a8f64605ccc7be197ad794', onnx_path='Isotonic/deberta-v3-base_finetuned_ai4privacy_v2', onnx_revision='9ea992753ab2686be4a8f64605ccc7be197ad794', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={}, pipeline_kwargs={'batch_size': 1, 'device': device(type='cpu'), 'aggregation_strategy': 'simple', 'ignore_labels': ['O', 'CARDINAL']}, tokenizer_kwargs={'model_input_names': ['input_ids', 'attention_mask']})
2024-11-20 01:25:48 [debug    ] Loaded regex pattern           group_name=CREDIT_CARD_RE
2024-11-20 01:25:48 [debug    ] Loaded regex pattern           group_name=UUID
2024-11-20 01:25:48 [debug    ] Loaded regex pattern           group_name=EMAIL_ADDRESS_RE
2024-11-20 01:25:48 [debug    ] Loaded regex pattern           group_name=US_SSN_RE
2024-11-20 01:25:48 [debug    ] Loaded reg

In [64]:
# prompt = "Make an SQL insert statement to add a new user to our database. Name is Phu Dang. Email is pndang@ucsd.edu "
# "but also possible to contact him with dangnphu31@gmail.com email. Phone number is 669-274-8956 and "
# "the IP address is 123.555.22.31. And credit card number is 5555-6666-8888-9999. "
# "He works at RTI Properties."

prompt = "Please translate the following to Vietnamese: \"My address is 31 Southview Ln, Anytown, CA 55555, my SSN is 123-45-6789, and my email address is pndang@ucsd.edu. Per our recent call, please forward me the escrow docs. Thanks!\""

In [65]:
sanitized_prompt, results_valid, results_score = scan_prompt(input_scanners, prompt)
if any(results_valid.values()) is False:
    print(f"Prompt {prompt} is not valid, scores: {results_score}")
    exit(1)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


2024-11-20 01:25:51 [debug    ] removing element type: EMAIL_ADDRESS_RE, start: 144, end: 159, score: 0.75 from results list due to conflict
2024-11-20 01:25:51 [debug    ] Scanner completed              elapsed_time_seconds=0.392353 is_valid=False scanner=Anonymize
2024-11-20 01:25:51 [info     ] Scanned prompt                 elapsed_time_seconds=0.403879 scores={'Anonymize': 1.0}
Prompt Please translate the following to Vietnamese: "My address is 31 Southview Ln, Anytown, CA 55555, my SSN is 123-45-6789, and my email address is pndang@ucsd.edu. Per our recent call, please forward me the escrow docs. Thanks!" is not valid, scores: {'Anonymize': 1.0}


In [66]:
print(f"Prompt: {sanitized_prompt}")

Prompt: Please translate the following to Vietnamese: "My address is 31 Southview Ln, Anytown, CA 55555, my SSN is [REDACTED_US_SSN_RE_1], and my email address is [REDACTED_EMAIL_ADDRESS_1]. Per our recent call, please forward me the escrow docs. Thanks!"


In [67]:
res = client.chat.completions.create(
    model=GPT_MODEL,
    messages=[
        {'role': 'system', 'content': 'You are a helpful assistant.'},
        {'role': 'user', 'content': sanitized_prompt}
    ],
    temperature=0,
    max_tokens=512,
)
res_text = res.choices[0].message.content
deanomymized_res_text, results_valid, results_score = scan_output(
    output_scanners, sanitized_prompt, res_text
)
if any(results_valid.values()) is False:
    print(f"Output {res_text} is not valid, scores: {results_score}")
    exit(1)

2024-11-20 01:25:53 [debug    ] Replaced placeholder with real value placeholder=[REDACTED_EMAIL_ADDRESS_1]
2024-11-20 01:25:53 [debug    ] Replaced placeholder with real value placeholder=[REDACTED_US_SSN_RE_1]
2024-11-20 01:25:53 [debug    ] Scanner completed              elapsed_time_seconds=0.018999 is_valid=True scanner=Deanonymize
2024-11-20 01:25:53 [info     ] Scanned output                 elapsed_time_seconds=0.035246 scores={'Deanonymize': 0.0}


In [68]:
print(prompt)

Please translate the following to Vietnamese: "My address is 31 Southview Ln, Anytown, CA 55555, my SSN is 123-45-6789, and my email address is pndang@ucsd.edu. Per our recent call, please forward me the escrow docs. Thanks!"


In [69]:
print(sanitized_prompt)

Please translate the following to Vietnamese: "My address is 31 Southview Ln, Anytown, CA 55555, my SSN is [REDACTED_US_SSN_RE_1], and my email address is [REDACTED_EMAIL_ADDRESS_1]. Per our recent call, please forward me the escrow docs. Thanks!"


In [70]:
print(res_text)

Địa chỉ của tôi là 31 Southview Ln, Anytown, CA 55555, số SSN của tôi là [REDACTED_US_SSN_RE_1], và địa chỉ email của tôi là [REDACTED_EMAIL_ADDRESS_1]. Theo cuộc gọi gần đây của chúng ta, vui lòng gửi cho tôi các tài liệu ký quỹ. Cảm ơn!


In [71]:
print(f"{deanomymized_res_text}\n")

Địa chỉ của tôi là 31 Southview Ln, Anytown, CA 55555, số SSN của tôi là 123-45-6789, và địa chỉ email của tôi là pndang@ucsd.edu. Theo cuộc gọi gần đây của chúng ta, vui lòng gửi cho tôi các tài liệu ký quỹ. Cảm ơn!



In [72]:
results_valid

{'Deanonymize': True}