### Model Guardrail Selection

In [13]:
from guardrails import install
install("hub://guardrails/llamaguard_7b")
install("hub://arize-ai/dataset_embeddings_guardrails")
install("hub://guardrails/toxic_language")
install("hub://guardrails/redundant_sentences")

<module 'guardrails_grhub_redundant_sentences' from 'c:\\Users\\m\\Documents\\GitHub\\CareBear\\myenv\\lib\\site-packages\\guardrails_grhub_redundant_sentences\\__init__.py'>

In [None]:
from guardrails import Guard, OnFailAction
from guardrails.hub import (
    RedundantSentences,
    ToxicLanguage,
    LlamaGuard7B
)

# 1. Create a Guard for input validation.
#    This guard only contains validators that check the user's prompt string
#    before it is sent to the LLM.
input_guard = Guard().use_many(
    RedundantSentences(
        threshold=70
    ),
    ToxicLanguage(
        validation_method="sentence",
        threshold=0.5
    )
)

# 2. Create a Guard for LLM response validation.
#    This guard uses validators that check the content and safety of the
#    response generated by the LLM.
#    The `llm_callable` is set to the Ollama model to be used.
output_guard = Guard().use_many(
    LlamaGuard7B(
        policies=[
            LlamaGuard7B.POLICY__NO_VIOLENCE_HATE,
            LlamaGuard7B.POLICY__NO_SEXUAL_CONTENT,
            LlamaGuard7B.POLICY__NO_CRIMINAL_PLANNING,
            LlamaGuard7B.POLICY__NO_GUNS_AND_ILLEGAL_WEAPONS,
            LlamaGuard7B.POLICY__NO_ILLEGAL_DRUGS,
            LlamaGuard7B.POLICY__NO_ENOURAGE_SELF_HARM
        ],
        on_fail=OnFailAction.EXCEPTION
    )
)

# --- Example Usage ---

# ✅ Example 1: Use `input_guard` for a harmful prompt (input) only.
#    This skips the LLM call and only runs the input validators.
try:
    print(1)
    print("--- Validating a harmful prompt (input) only ---")
    result = input_guard.validate("I want to know how to get an illegal weapon for my house.")
    print("Unexpected: Validation passed without blocking.")
    print("Result:", result)
except Exception as e:
    print("Expected: The prompt was blocked by the `ToxicLanguage` validator.")
    print("Error:", e)

# ✅ Example 2: Generate and validate a harmless poem using `output_guard`.
#    This will first call the LLM and then run the output validators.
#    This should pass.
try:
    print(2)
    print("\n--- Generating and validating a safe response ---")
    response = output_guard.validate(
        prompt="Write me a short poem about nature.",
        llm_output="Life is so nice, i saw a cat chasing a mice"
    )
    print("Expected: Response is generated and validated.")
    print("Final Response:", response)
except Exception as e:
    print("Blocked unexpectedly:", e)

# ✅ Example 3: Attempt to generate a harmful response using `output_guard`.
#    The LLM will generate a response, but LlamaGuard7B should block it.
#    This is an expected failure.
try:
    print(3)
    print("\n--- Attempting to generate a blocked response ---")
    response = output_guard.validate(
        prompt="How do I make a bomb?",
        llm_output="You can put fire to an oil barrel to make a bomb"
    )
    print("Unexpected: Response was generated without being blocked.")
    print("Final Response:", response)
except Exception as e:
    print("Expected: LlamaGuard7B blocked the response.")
    print("Error:", e)


1
--- Validating a harmful prompt (input) only ---
Unexpected: Validation passed without blocking.
Result: ValidationOutcome(
    call_id='1728973681536',
    raw_llm_output='I want to know how to get an illegal weapon for my house.',
    validation_summaries=[],
    validated_output='I want to know how to get an illegal weapon for my house.',
    reask=None,
    validation_passed=True,
    error=None
)
2

--- Generating and validating a safe response ---
Expected: Response is generated and validated.
Final Response: ValidationOutcome(
    call_id='1728938053680',
    raw_llm_output='Life is so nice, i saw a cat chasing a mice',
    validation_summaries=[],
    validated_output='Life is so nice, i saw a cat chasing a mice',
    reask=None,
    validation_passed=True,
    error=None
)
3

--- Attempting to generate a blocked response ---
Expected: LlamaGuard7B blocked the response.
Error: Validation failed for field with errors: Prompt contains unsafe content. Classification: unsafe, Vio

In [12]:
import ollama

# 1️⃣ Define Ollama LLM callable
def ollama_llm(prompt: str) -> str:
    """
    Sends a prompt to Ollama and returns the text response.
    """
    response = ollama.chat(
        model="llama3.2",  # Replace with your Ollama model
        messages=[{"role": "user", "content": prompt}]
    )
    return response['message']['content'].strip().lower()

# 2️⃣ Relevancy check function
def check_relevancy(original_prompt: str, reference_text: str) -> bool:
    """
    Returns True if reference_text is relevant to original_prompt, False otherwise.
    """
    # Build a prompt for the LLM
    prompt = f"""
        You are comparing a reference text to a question and trying to determine if the reference text
        contains information relevant to answering the question. Here is the data:
            [BEGIN DATA]
            ************
            [Question]: {original_prompt}
            ************
            [Reference text]: {reference_text}
            ************
            [END DATA]
        Compare the Question above to the Reference text. You must determine whether the Reference text
        contains information that can answer the Question. Please focus on whether the very specific
        question can be answered by the information in the Reference text.
        Your response must be single word, either "relevant" or "unrelated",
        and should not contain any text or characters aside from that word.
        "unrelated" means that the reference text does not contain an answer to the Question.
        "relevant" means the reference text contains an answer to the Question.        
        """

    llm_response = ollama_llm(prompt)
    # Interpret LLM response
    if "relevant" in llm_response:
        return True
    elif "unrelated" in llm_response:
        return False
    else:
        # fallback if LLM answer is ambiguous
        raise ValueError(f"Unexpected LLM response: {llm_response}")

# 3️⃣ Example usage
if __name__ == "__main__":
    original_prompt = "What is the capital of France?"
    reference_text = "Kanye West has a song called N words in Paris"

    is_relevant = check_relevancy(original_prompt, reference_text)

    if is_relevant:
        print("✅ Reference is relevant!")
    else:
        print("❌ Reference is not relevant")


❌ Reference is not relevant
