# Groundedness Dataset Preparation using Benchmark HaluEval Dataset

This notebook generates a groundedness dataset from QA-style JSONL input files obtained from:
 
- https://github.com/RUCAIBox/HaluEval/blob/main/README.md
- https://github.com/RUCAIBox/HaluEval/blob/main/data/qa_data.json

In [None]:
import os
import json
import sys
import boto3
from openai import OpenAI
from jinja2 import Environment, FileSystemLoader, select_autoescape
from dotenv import load_dotenv
from datetime import datetime
import pandas as pd
from sklearn.metrics import classification_report

In [None]:
# Add src to path if running from project root
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# === Load environment variables ===
load_dotenv()

# === AWS + OpenAI clients ===
bedrock_client = boto3.client("bedrock-runtime", region_name="us-east-1")
openai_api_key = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=openai_api_key)

# === Load Prompt Template ===
env = Environment(
    loader=FileSystemLoader("src"), 
    autoescape=select_autoescape()
)
template = env.get_template("groundedness_guardrail.j2")

# === Configuration ===
GUARDRAIL_ID = "egi3t9xv4xej"
GUARDRAIL_VERSION = "1"
MODEL_NAME = "gpt-4"
sample_size = 500
input_json = "inputs/qa_data.json"
output_csv = "inputs/halueval_groundedness.csv"

### Define the LLM Evaluator for Groundedness Guardrail using a prompted GPT-4o model

In [None]:

# LLM Groundedness Guardrail
def call_llm_guardrail(row,openai_client, template, model="gpt-4"):
    prompt = template.render({
        "user_query": row["query"],
        "retrieved_context": row["context"],
        "model_answer": row["response"]
    })
    try:
        response = openai_client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        raw_output = response.choices[0].message.content.strip()
        parsed = json.loads(raw_output) if raw_output.startswith("{") else {
            "REASONING": [raw_output],
            "SCORE": "FAIL"
        }
    except Exception as e:
        parsed = {
            "REASONING": [f"Error: {str(e)}"],
            "SCORE": "FAIL"
        }

    return pd.Series({
        "llm_score": parsed.get("SCORE", "FAIL"),
        "llm_reasoning": " | ".join(parsed.get("REASONING", ["Missing reasoning"]))
    })


def run_llm_guardrail_batch(df: pd.DataFrame, openai_client=None, template=None, model="gpt-4") -> pd.DataFrame:
    results = df.apply(call_llm_guardrail, axis=1, args=(openai_client, template, model))
    return pd.concat([df, results], axis=1)


### Define an Off-the-Shelf Option using an AWS Bedrock Groundedness Guardrail

In [None]:
# AWS Bedrock Guardrail
def call_bedrock_guardrail(row, guardrail_id, guardrail_version, bedrock_client):
    payload = {
        "source": "OUTPUT",
        "content": [
            {"text": {"text": row["context"], "qualifiers": ["grounding_source"]}},
            {"text": {"text": row["query"], "qualifiers": ["query"]}},
            {"text": {"text": row["answer"]}}
        ]
    }

    try:
        response = bedrock_client.apply_guardrail(
            guardrailIdentifier=guardrail_id,
            guardrailVersion=guardrail_version,
            source=payload["source"],
            content=payload["content"]
        )
        outputs = response.get("outputs", [{}])
        blocked_output = outputs[0].get("text") if outputs else None

        grounding_score = None
        threshold = None
        reason = None
        for a in response.get("assessments", []):
            if "groundingPolicy" in a:
                gp = a["groundingPolicy"]
                grounding_score = gp.get("score")
                threshold = gp.get("threshold")
                reason = gp.get("action")

        return pd.Series({
            "bedrock_action": response.get("action", "UNKNOWN"),
            "grounding_score": grounding_score,
            "grounding_threshold": threshold,
            "grounding_decision_reason": reason,
            "blocked_output_text": blocked_output
        })

    except Exception as e:
        return pd.Series({
            "bedrock_action": "ERROR",
            "grounding_score": None,
            "grounding_threshold": None,
            "grounding_decision_reason": str(e),
            "blocked_output_text": None
        })


def run_bedrock_guardrail_batch(df: pd.DataFrame, guardrail_id: str, guardrail_version: str, bedrock_client) -> pd.DataFrame:
    results = df.apply(call_bedrock_guardrail, axis=1, args=(guardrail_id, guardrail_version, bedrock_client))
    return pd.concat([df, results], axis=1)

In [None]:
# === Step 1: Load Dataset ===
df = pd.read_csv(output_csv)

# === Step 2: Run LLM Guardrail ===
df = run_llm_guardrail_batch(df.copy(), model=MODEL_NAME, openai_client=openai_client, template=template)


In [None]:
# === Step 3: Run Bedrock Guardrail ===
df = run_bedrock_guardrail_batch(df, guardrail_id=GUARDRAIL_ID, guardrail_version=GUARDRAIL_VERSION, bedrock_client=bedrock_client)

In [None]:
# === Step 5: Prediction and Evaluation  ===
#df["llm_guardrail_pred"] = df["llm_score"].map(lambda x: 1 if str(x).upper() == "PASS" else 0)
#df["bedrock_pred"] = df["grounding_score"].apply(lambda x: 1 if x is not None and x >= 0.5 else 0)
#df["label"] = df["label"].astype(int)

#print("\n📊 Model Evaluation Summary:")
#for model_name in ["llm_guardrail_pred", "bedrock_pred"]:
#    print(f"\n🔍 {model_name}:\n")
#    print(classification_report(df["label"], df[model_name], target_names=["FAIL", "PASS"]))

# === Step 6: Save Results ===
results_path = "results/llm_bedrock_evaluation.csv"
df.to_csv(results_path, index=False)
print(f"\n✅ Evaluation complete. Results saved to '{results_path}'")