In [2]:
!pip install -U langchain langchain-core langchain-aws datasets



In [3]:
import time
import boto3
import botocore
import pandas as pd
from botocore.config import Config
from datasets import Dataset

from langchain_aws import ChatBedrock
from langchain_aws.retrievers.bedrock import AmazonKnowledgeBasesRetriever
from langchain_aws.embeddings import BedrockEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [4]:
# To Do: Update access key ID and secret access key

import os
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"

In [5]:
REGION = "us-west-2"

CLAUDE_HAIKU_PROFILE = (
    "arn:aws:bedrock:us-west-2:471112956049:"
    "inference-profile/us.anthropic.claude-haiku-4-5-20251001-v1:0"
)

cfg = Config(connect_timeout=120, read_timeout=120, retries={"max_attempts": 3})

In [6]:
bedrock_agent = boto3.client("bedrock-agent", region_name=REGION)
bedrock_agent_runtime = boto3.client("bedrock-agent-runtime", region_name=REGION, config=cfg)
bedrock_runtime = boto3.client("bedrock-runtime", region_name=REGION, config=cfg)

In [7]:
resp = bedrock_agent.list_knowledge_bases(maxResults=1)
kb_summaries = resp.get("knowledgeBaseSummaries", [])

if not kb_summaries:
    raise RuntimeError("No Knowledge Bases found.")

kb_id = kb_summaries[0]["knowledgeBaseId"]
print("Using Knowledge Base:", kb_id)

Using Knowledge Base: 5O0E4XEJBP


In [8]:
llm_for_generation = ChatBedrock(model_id=CLAUDE_HAIKU_PROFILE, provider="anthropic", \
                                 client=bedrock_runtime, temperature=0.2)

In [9]:
llm_for_evaluation = ChatBedrock(model_id=CLAUDE_HAIKU_PROFILE, provider="anthropic", client=bedrock_runtime, \
                                 temperature=0.0, model_kwargs={"max_tokens": 3072})

In [10]:
bedrock_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-text-v1", client=bedrock_runtime)

In [11]:
retriever = AmazonKnowledgeBasesRetriever(knowledge_base_id=kb_id, \
                                          retrieval_config={"vectorSearchConfiguration": \
                                           {"numberOfResults": 10, "overrideSearchType": "SEMANTIC"}})

In [12]:
PROMPT_TEMPLATE = """
You are a behavioral health coach who has been provided mental health information about a specific patient.
You should speak in a compassionate, professional tone to support the user. Make sure to not share
information about the patient in the context, and only focus on providing advice.

Context:
{context}

Question: {question}

Make sure the response contains:
-Actionable advice
-Less than 150 words

If information about the question is not available in the provided context, explicitly state:
"This information is not provided." Stick to the responses provided in the context.
"""

prompt = PromptTemplate(
    template=PROMPT_TEMPLATE,
    input_variables=["context", "question"]
)

def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)

chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm_for_generation
    | StrOutputParser()
)

In [13]:
questions = ["Which green tea do you recommend?",
             "What is good for dandruff?",
             "What are some places I could go for a meditation retreat?",
             "what are healthy alternatives for dessert?",
             "What helps headaches?",
             "How does green tea help?",
             "What is light therapy and what are the instructions to do it?",
             "What percentage of saturated fats can I eat?",
             "What is a meditation for confidence",
             "What are the types of PCOS?"]

In [14]:
ground_truth = ["Allegro Tea or Yamamotoyama is good for green tea",
                "Biossance which is turmeric-based is good and Avalon also has anti-dandruff treatment",
                "Some retreat spots are Land Of Medicine Buddha, Spirit Rock, Escalon, and Mt. Madonna",
                "Daily Harvest has good options for healthy alternatives. There are also nut cookies just made from nuts, and almond flour blueberry muﬃns with honey",
                "1. Lower the light on your screen. 2. A protector for your laptop screen for the LED light. 3. Taking breaks is important. 4. Look away from the screen every 20 minutes and look at something far away. 5. You can take Phyto Ultra Comfort supplement. 6. EFT tapping.",
                "Green tea helps prevent cancer and helps the brain",
                "Light therapy is helpful for sleep and energy. It can also be helpful for mood in the context of seasonal affective disorder. In the morning it can be helpful to sit next to the light therapy for 15-45 minutes while keeping the light box an arms distance away, and keep it on your side.",
                "You can eat 20% saturated fat daily",
                "Some meditations for confidence include manipura chakra, yoga nidra for confidence, and Prana Nidra Healing",
                "The types of PCOS include glucose intolerance, inflammation, and hormonal imbalances."]

In [15]:
answers = []
contexts = []

for i, q in enumerate(questions, 1):
    print(f"Processing question {i}/{len(questions)}")
    answer = chain.invoke(q)
    ctx = [d.page_content for d in retriever.invoke(q)]

    answers.append(answer)
    contexts.append(ctx)

    time.sleep(20)

Processing question 1/10
Processing question 2/10
Processing question 3/10
Processing question 4/10
Processing question 5/10
Processing question 6/10
Processing question 7/10
Processing question 8/10
Processing question 9/10
Processing question 10/10


In [16]:
dataset = Dataset.from_dict({
    "question": questions,
    "answer": answers,
    "contexts": contexts,
    "ground_truth": ground_truth
})

print("Dataset ready with columns:", dataset.column_names)

Dataset ready with columns: ['question', 'answer', 'contexts', 'ground_truth']


In [18]:
!pip install -U ragas pandas openpyxl



In [19]:
from ragas import evaluate
from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
                           context_entity_recall, answer_similarity, answer_correctness)

metrics = [faithfulness, answer_relevancy, context_precision, context_recall, context_entity_recall, \
           answer_similarity, answer_correctness]

  `sender=` as a single positional argument and any \*\*kwargs that
  `sender=` as a single positional argument and any \*\*kwargs that
  """Emit this signal on behalf of *sender*, passing on \*\*kwargs.
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \
  from ragas.metrics import (faithfulness, answer_relevancy, context_precision, context_recall, \


In [20]:
results = evaluate(dataset=dataset, metrics=metrics, llm=llm_for_evaluation, embeddings=bedrock_embeddings)

df = results.to_pandas()
#df

Evaluating:   0%|          | 0/70 [00:00<?, ?it/s]



In [None]:
df.to_excel("ragas_results.xlsx", index=False)
print("Saved to ragas_results.xlsx")

Saved to ragas_results.xlsx
