# Run Comparison Models - LLM Groundedness Evaluator and Off-shelf Groundedness Guardrail

This notebook establishes the required connections and sets up two comparison models - a prompted LLM GPT-4o evaluator and an off-the-shelf AWS Bedrock guardrail. For more information on the AWS Bedrock contextual grounding guardrail please see:

- https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-contextual-grounding-check.html

In [36]:
import os
import json
import sys
import boto3
from openai import OpenAI
from jinja2 import Environment, FileSystemLoader, select_autoescape
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd
from sklearn.metrics import classification_report
import time

In [42]:
# Explicitly extract the credentials
access_key = os.getenv("AWS_ACCESS_KEY_ID")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
region = os.getenv("AWS_DEFAULT_REGION")

# Pass them to boto3
bedrock_client = boto3.client(
    "bedrock-runtime",
    region_name=region,
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key
)
# Pass them to boto3
bedrock_client

sts_client = boto3.client(
    "sts",
    region_name=region,
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key
)

print(sts_client.get_caller_identity())

{'UserId': 'AIDASYIUVQAL4IXIIR4FU', 'Account': '189558784023', 'Arn': 'arn:aws:iam::189558784023:user/TStone', 'ResponseMetadata': {'RequestId': 'd3149f8c-9ce8-4434-9d82-b59b4bcd29ce', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd3149f8c-9ce8-4434-9d82-b59b4bcd29ce', 'content-type': 'text/xml', 'content-length': '403', 'date': 'Sun, 29 Jun 2025 14:08:11 GMT'}, 'RetryAttempts': 0}}


In [54]:
# Load environment variables from .env file
load_dotenv(dotenv_path=".env", override=True)

# Configure OpenAI and AWS API clients
OpenAI.api_key = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=OpenAI.api_key)
#bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")

# === Load Prompt Template ===
env = Environment(
    loader=FileSystemLoader("."), 
    autoescape=select_autoescape()
)
template = env.get_template("groundedness_guardrail.j2")

# === Configuration ===
GUARDRAIL_ID = "egi3t9xv4xej"
GUARDRAIL_VERSION = "1"
MODEL_NAME = "gpt-4o"
sample_size = 500
SAVE_INTERVAL = 40
#input_file = "../data/halueval_groundedness.csv"
#output_file = "../data/halueval_bedrock_results.csv"

input_file = "../data/synthetic_groundedness_challenger_set_long.csv"
output_file = "../data/synthetic_groundedness_challenger_set_bedrock_results.csv"

### Define the LLM Evaluator for Groundedness Guardrail using a prompted GPT-4o model

In [22]:
# LLM Groundedness Guardrail
def call_llm_guardrail(row, openai_client, template, model="gpt-4"):
    prompt = template.render({
        "user_query": row["query"],
        "retrieved_context": row["context"],
        "model_answer": row["response"]
    })

    try:
        response = openai_client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )

        raw_output = response.choices[0].message.content.strip()

        # --- Fix: strip markdown fencing if present ---

        if raw_output.startswith("```"):
            raw_output = raw_output.replace("```json", "", 1).replace("```", "").strip()


        try:
            parsed = json.loads(raw_output)
        except json.JSONDecodeError:
            parsed = {
                "REASONING": [raw_output],
                "SCORE": "FAIL"
            }

    except Exception as e:
        parsed = {
            "REASONING": [f"Error: {str(e)}"],
            "SCORE": "FAIL"
        }

    return pd.Series({
        "llm_score": parsed.get("SCORE", "FAIL"),
        "llm_reasoning": " | ".join(parsed.get("REASONING", ["Missing reasoning"]))
    })


def run_llm_guardrail_batch_with_resume(input_file, output_file, openai_client, template, model="gpt-4", save_interval=20):
    full_dataset = pd.read_csv(input_file)

    try:
        completed_results = pd.read_csv(output_file)
        resume_from_index = len(completed_results)
        print(f"Resuming from row {resume_from_index}")
        if not completed_results.empty:
            completed_results = completed_results.astype(full_dataset.dtypes.to_dict(), errors='ignore')
    except FileNotFoundError:
        completed_results = pd.DataFrame()
        resume_from_index = 0
        print("No existing output found. Starting fresh.")

    remaining_data = full_dataset.iloc[resume_from_index:].copy()

    for i, (_, row) in enumerate(remaining_data.iterrows(), start=resume_from_index):
        result = call_llm_guardrail(row, openai_client, template, model=model)
        completed_row = pd.concat([row, result])
        completed_results = pd.concat([completed_results, completed_row.to_frame().T], ignore_index=True)

        if (i + 1) % save_interval == 0 or (i + 1) == len(full_dataset):
            completed_results.to_csv(output_file, index=False)
            print(f"Saved progress at row {i + 1}")

    return completed_results

In [23]:
# === Step 2: Run LLM Guardrail ===
df = run_llm_guardrail_batch_with_resume(input_file, output_file, openai_client=openai_client, template=template, model=MODEL_NAME,save_interval = SAVE_INTERVAL)


No existing output found. Starting fresh.
Saved progress at row 40
Saved progress at row 80
Saved progress at row 120
Saved progress at row 160
Saved progress at row 200
Saved progress at row 240
Saved progress at row 280
Saved progress at row 320
Saved progress at row 360
Saved progress at row 400
Saved progress at row 440
Saved progress at row 480
Saved progress at row 520
Saved progress at row 560
Saved progress at row 600
Saved progress at row 640
Saved progress at row 680
Saved progress at row 720
Saved progress at row 760
Saved progress at row 770


### Define an Off-the-Shelf Option using an AWS Bedrock Groundedness Guardrail

In [55]:
# AWS Bedrock Guardrail
def call_bedrock_guardrail(row, guardrail_id, guardrail_version, bedrock_client):
    payload = {
        "source": "OUTPUT",
        "content": [
            {"text": {"text": row["context"], "qualifiers": ["grounding_source"]}},
            {"text": {"text": row["query"], "qualifiers": ["query"]}},
            {"text": {"text": row["response"]}}
        ]
    }

    try:
        response = bedrock_client.apply_guardrail(
            guardrailIdentifier=guardrail_id,
            guardrailVersion=guardrail_version,
            source=payload["source"],
            content=payload["content"]
        )
        outputs = response.get("outputs", [{}])
        blocked_output = outputs[0].get("text") if outputs else None

        grounding_score = None
        threshold = None
        reason = None
        for a in response.get("assessments", []):
            if "groundingPolicy" in a:
                gp = a["groundingPolicy"]
                grounding_score = gp.get("score")
                threshold = gp.get("threshold")
                reason = gp.get("action")

        return pd.Series({
            "bedrock_action": response.get("action", "UNKNOWN"),
            "grounding_score": grounding_score,
            "grounding_threshold": threshold,
            "grounding_decision_reason": reason,
            "blocked_output_text": blocked_output
        })

    except Exception as e:
        return pd.Series({
            "bedrock_action": "ERROR",
            "grounding_score": None,
            "grounding_threshold": None,
            "grounding_decision_reason": str(e),
            "blocked_output_text": None
        })


def run_bedrock_guardrail_batch(df: pd.DataFrame, guardrail_id: str, guardrail_version: str, bedrock_client) -> pd.DataFrame:
    results = df.apply(call_bedrock_guardrail, axis=1, args=(guardrail_id, guardrail_version, bedrock_client))
    return pd.concat([df, results], axis=1)


In [58]:
# === Incremental Batch Processor ===
def run_bedrock_guardrail_batch(input_file, output_file, guardrail_id, guardrail_version, bedrock_client, save_interval=20):
    full_dataset = pd.read_csv(input_file)

    try:
        completed_results = pd.read_csv(output_file)
        resume_from_index = len(completed_results)
        print(f"Resuming from row {resume_from_index}")
        if not completed_results.empty:
            completed_results = completed_results.astype(full_dataset.dtypes.to_dict(), errors="ignore")
    except FileNotFoundError:
        completed_results = pd.DataFrame()
        resume_from_index = 0
        print("No existing output found. Starting fresh.")

    remaining_data = full_dataset.iloc[resume_from_index:].copy()

    for i, (_, row) in enumerate(remaining_data.iterrows(), start=resume_from_index):
        result = call_bedrock_guardrail(row, guardrail_id, guardrail_version, bedrock_client)
        combined_row = pd.concat([row, result])
        completed_results = pd.concat([completed_results, combined_row.to_frame().T], ignore_index=True)

        if (i + 1) % save_interval == 0 or (i + 1) == len(full_dataset):
            completed_results.to_csv(output_file, index=False)
            print(f"✅ Saved progress at row {i + 1} / {len(full_dataset)}")

    return completed_results


In [59]:
# === Step 3: Run Bedrock Guardrail ===
df_br = run_bedrock_guardrail_batch(input_file, output_file, guardrail_id=GUARDRAIL_ID, guardrail_version=GUARDRAIL_VERSION, bedrock_client=bedrock_client, save_interval=SAVE_INTERVAL)

No existing output found. Starting fresh.
✅ Saved progress at row 40 / 770
✅ Saved progress at row 80 / 770
✅ Saved progress at row 120 / 770
✅ Saved progress at row 160 / 770
✅ Saved progress at row 200 / 770
✅ Saved progress at row 240 / 770
✅ Saved progress at row 280 / 770
✅ Saved progress at row 320 / 770
✅ Saved progress at row 360 / 770
✅ Saved progress at row 400 / 770
✅ Saved progress at row 440 / 770
✅ Saved progress at row 480 / 770
✅ Saved progress at row 520 / 770
✅ Saved progress at row 560 / 770
✅ Saved progress at row 600 / 770
✅ Saved progress at row 640 / 770
✅ Saved progress at row 680 / 770
✅ Saved progress at row 720 / 770
✅ Saved progress at row 760 / 770
✅ Saved progress at row 770 / 770


In [None]:
# === Step 5: Prediction and Evaluation  ===
#df["llm_guardrail_pred"] = df["llm_score"].map(lambda x: 1 if str(x).upper() == "PASS" else 0)
#df["bedrock_pred"] = df["grounding_score"].apply(lambda x: 1 if x is not None and x >= 0.5 else 0)
#df["label"] = df["label"].astype(int)

#print("\n📊 Model Evaluation Summary:")
#for model_name in ["llm_guardrail_pred", "bedrock_pred"]:
#    print(f"\n🔍 {model_name}:\n")
#    print(classification_report(df["label"], df[model_name], target_names=["FAIL", "PASS"]))

# === Step 6: Save Results ===
results_path = "results/llm_bedrock_evaluation.csv"
df.to_csv(results_path, index=False)
print(f"\n✅ Evaluation complete. Results saved to '{results_path}'")