##### Protect AI's **LLM Guard** Framework

In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import os 

from llm_guard import scan_output, scan_prompt
from llm_guard.input_scanners import Anonymize, PromptInjection, TokenLimit, Toxicity
from llm_guard.output_scanners import Deanonymize, NoRefusal, Relevance, Sensitive
from llm_guard.vault import Vault

import warnings
warnings.filterwarnings("ignore")

load_dotenv()

GPT_MODEL = 'gpt-4o' 

# API Configuration
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
OpenAI.api_key = os.getenv('OPENAI_API_KEY') 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# initialize components

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
vault = Vault()
input_scanners = [Anonymize(vault), Toxicity(), TokenLimit(), PromptInjection()]
output_scanners = [Deanonymize(vault), NoRefusal(), Relevance(), Sensitive()]

2024-11-08 21:26:10 [debug    ] No entity types provided, using default default_entities=['CREDIT_CARD', 'CRYPTO', 'EMAIL_ADDRESS', 'IBAN_CODE', 'IP_ADDRESS', 'PERSON', 'PHONE_NUMBER', 'US_SSN', 'US_BANK_NUMBER', 'CREDIT_CARD_RE', 'UUID', 'EMAIL_ADDRESS_RE', 'US_SSN_RE']
2024-11-08 21:26:11 [debug    ] Initialized NER model          device=device(type='cpu') model=Model(path='Isotonic/deberta-v3-base_finetuned_ai4privacy_v2', subfolder='', revision='9ea992753ab2686be4a8f64605ccc7be197ad794', onnx_path='Isotonic/deberta-v3-base_finetuned_ai4privacy_v2', onnx_revision='9ea992753ab2686be4a8f64605ccc7be197ad794', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={}, pipeline_kwargs={'batch_size': 1, 'device': device(type='cpu'), 'aggregation_strategy': 'simple'}, tokenizer_kwargs={'model_input_names': ['input_ids', 'attention_mask']})
2024-11-08 21:26:11 [debug    ] Loaded regex pattern           group_name=CREDIT_CARD_RE
2024-11-08 21:26:11 [debug    ] Loaded regex pattern        

In [5]:
prompt = "Make an SQL insert statement to add a new user to our database. Name is Phu Dang. Email is pndang@ucsd.edu "
"but also possible to contact him with dangnphu31@gmail.com email. Phone number is 669-274-8956 and "
"the IP address is 123.555.22.31. And credit card number is 5555-6666-8888-9999. "
"He works at RTI Properties."

'He works at RTI Properties.'

In [7]:
sanitized_prompt, results_valid, results_score = scan_prompt(input_scanners, prompt)
if any(results_valid.values()) is False:
    print(f"Prompt {prompt} is not valid, scores: {results_score}")
    exit(1)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


2024-11-08 21:36:29 [debug    ] removing element type: EMAIL_ADDRESS_RE, start: 91, end: 106, score: 0.75 from results list due to conflict
2024-11-08 21:36:29 [debug    ] Scanner completed              elapsed_time_seconds=0.931442 is_valid=False scanner=Anonymize
2024-11-08 21:36:30 [debug    ] Not toxicity found in the text results=[[{'label': 'toxicity', 'score': 0.0004069566202815622}, {'label': 'male', 'score': 0.00020465556008275598}, {'label': 'insult', 'score': 0.00013231522461865097}, {'label': 'psychiatric_or_mental_illness', 'score': 0.0001088515346054919}, {'label': 'female', 'score': 0.00010773175017675385}, {'label': 'christian', 'score': 6.688992289127782e-05}, {'label': 'muslim', 'score': 6.4440791902598e-05}, {'label': 'white', 'score': 5.628326107398607e-05}, {'label': 'obscene', 'score': 3.477228892734274e-05}, {'label': 'black', 'score': 3.4035267162835225e-05}, {'label': 'homosexual_gay_or_lesbian', 'score': 3.0160595997585915e-05}, {'label': 'threat', 'score': 2.

In [8]:
print(f"Prompt: {sanitized_prompt}")

Prompt: Make an SQL insert statement to add a new user to our database. Name is [REDACTED_PERSON_1]. Email is [REDACTED_EMAIL_ADDRESS_1] 


In [28]:
res = client.chat.completions.create(
    model=GPT_MODEL,
    messages=[
        {'role': 'system', 'content': 'YOu are a helpful assistant.'},
        {'role': 'user', 'content': sanitized_prompt}
    ],
    temperature=0,
    max_tokens=512,
)
res_text = res.choices[0].message.content
sanitized_res_text, results_valid, results_score = scan_output(
    output_scanners, sanitized_prompt, res_text
)
if any(results_valid.values()) is False:
    print(f"Output {res_text} is not valid, scores: {results_score}")
    exit(1)

2024-11-08 22:10:12 [debug    ] Replaced placeholder with real value placeholder=[REDACTED_EMAIL_ADDRESS_1]
2024-11-08 22:10:12 [debug    ] Replaced placeholder with real value placeholder=[REDACTED_PERSON_1]
2024-11-08 22:10:12 [debug    ] Scanner completed              elapsed_time_seconds=0.022002 is_valid=True scanner=Deanonymize
2024-11-08 22:10:13 [debug    ] No rejection detected          highest_score=0.0
2024-11-08 22:10:13 [debug    ] Scanner completed              elapsed_time_seconds=0.956642 is_valid=True scanner=NoRefusal
2024-11-08 22:10:14 [debug    ] Result is similar to the prompt similarity_score=np.float32(0.8352221)
2024-11-08 22:10:14 [debug    ] Scanner completed              elapsed_time_seconds=1.182117 is_valid=True scanner=Relevance


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


2024-11-08 22:10:14 [debug    ] Scanner completed              elapsed_time_seconds=0.550503 is_valid=False scanner=Sensitive
2024-11-08 22:10:14 [info     ] Scanned output                 elapsed_time_seconds=2.763967 scores={'Deanonymize': 0.0, 'NoRefusal': 0.0, 'Relevance': 0.0, 'Sensitive': 1.0}


In [34]:
print(res_text)

To create an SQL `INSERT` statement for adding a new user to your database, you'll need to know the structure of your database table. For this example, let's assume your table is named `users` and it has columns `id`, `name`, and `email`. The `id` column is an auto-incrementing primary key.

Here's how you can write the SQL `INSERT` statement:

```sql
INSERT INTO users (name, email) VALUES ('[REDACTED_PERSON_1]', '[REDACTED_EMAIL_ADDRESS_1]');
```

This statement will insert a new row into the `users` table with the specified name and email. Make sure to replace `[REDACTED_PERSON_1]` and `[REDACTED_EMAIL_ADDRESS_1]` with the actual name and email address when you execute the statement. If your table has additional columns that require values, you'll need to include those in the `INSERT` statement as well.


In [30]:
print(f"Output: {sanitized_res_text}\n")

Output: To create an SQL `INSERT` statement for adding a new user to your database, you'll need to know the structure of your database table. For this example, let's assume your table is named `users` and it has columns `id`, `name`, and `email`. The `id` column is an auto-incrementing primary key.

Here's how you can write the SQL `INSERT` statement:

```sql
INSERT INTO users (name, email) VALUES ('Phu Dang', 'pndang@ucsd.edu');
```

This statement will insert a new row into the `users` table with the specified name and email. Make sure to replace `Phu Dang` and `pndang@ucsd.edu` with the actual name and email address when you execute the statement. If your table has additional columns that require values, you'll need to include those in the `INSERT` statement as well.



In [32]:
results_valid

{'Deanonymize': True, 'NoRefusal': True, 'Relevance': True, 'Sensitive': False}