# Knowledge bases for Amazon Bedrock

In [None]:
import json

import nest_asyncio
import pandas as pd
from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv(override=True)

In [None]:
import boto3

client = boto3.client("bedrock-agent-runtime")

In [None]:
knowledge_base_id = "LABZ5ZLX8R"
model_arn = "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0"

In [None]:
def retrieve_and_generate(question: str):

    response = client.retrieve_and_generate(
        input={"text": question},
        retrieveAndGenerateConfiguration={
            "knowledgeBaseConfiguration": {
                "knowledgeBaseId": knowledge_base_id,
                "modelArn": model_arn,
                "orchestrationConfiguration": {
                    "queryTransformationConfiguration": {"type": "QUERY_DECOMPOSITION"}
                },
                "retrievalConfiguration": {
                    "vectorSearchConfiguration": {"overrideSearchType": "HYBRID"}
                },
            },
            "type": "KNOWLEDGE_BASE",
        },
    )

    return response

In [None]:
df = pd.read_json("testdataset.json")

question = df["question"]

In [None]:
retrieve_and_generate(question=question[0])

In [None]:
answers = []
citations = []

for q in question:
    response = retrieve_and_generate(question=q)

    answers.append(response["output"]["text"])
    citations.append(response["citations"])

    print(q)
    print(response["output"]["text"])
    print("---")

In [None]:
df["kb_answers"] = answers
df["kb_citations"] = citations

In [None]:
df.to_json("answer_data_kb_for_bedrock.json", force_ascii=False)

# Evaluation

In [None]:
df = pd.read_json("answer_data_kb_for_bedrock.json")
df.head()

In [None]:
question = df["question"]
answer = df["kb_answers"]
ground_truth = df["ground_truth"]
context = []

for citations in df["kb_citations"]:
    tmp = []
    for citation in citations:
        for retrievedReferences in citation["retrievedReferences"]:
            text = json.loads(retrievedReferences["content"]["text"])
            body = text["body"]
            tmp.append(body)
    context.append(tmp)

In [None]:
from datasets import Dataset

eval_data = Dataset.from_dict(
    {
        "question": question,
        "answer": answer,
        "ground_truth": ground_truth,
        "contexts": context,
    }
)

In [None]:
import os

from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings

llm = AzureChatOpenAI(azure_deployment=os.getenv("AZURE_DEPLOYMENT_GPT4OMINI", None))
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("AZURE_DEPLOYMENT_EMBEDDINGS", None)
)

In [None]:
from ragas.metrics import (
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness,
)
from ragas.run_config import RunConfig

from ragas import evaluate

run_config = RunConfig(max_wait=600, max_retries=100)

result = evaluate(
    eval_data,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    llm=llm,
    embeddings=embeddings,
    run_config=run_config,
    raise_exceptions=False,
)

In [None]:
result

In [None]:
eval_df = result.to_pandas()
eval_df.head()

In [None]:
eval_df.to_json("eval_data_kb_for_bedrock.json", force_ascii=False)