# Kendra

In [None]:
import nest_asyncio
import pandas as pd
from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv(override=True)

In [None]:
import boto3

bedrock_runtime = boto3.client("bedrock-runtime")
kendra = boto3.client("kendra")

In [None]:
kendra_index_id = "e2b2ac6f-0b68-4bd0-8792-96729e04feb7"
model_id = "cohere.command-r-plus-v1:0"

In [None]:
def generate_search_query(question: str):

    result = bedrock_runtime.converse(
        modelId=model_id,
        additionalModelRequestFields={"search_queries_only": True},
        additionalModelResponseFieldPaths=["/search_queries"],
        messages=[
            {
                "role": "user",
                "content": [{"text": question}],
            }
        ],
    )

    return list(
        map(
            lambda x: x["text"],
            result["additionalModelResponseFields"]["search_queries"],
        )
    )

In [None]:
def fetching_relevant_documents(queries: list[str]):

    items = []
    for query in queries:

        response = kendra.retrieve(
            IndexId=kendra_index_id,
            QueryText=query,
            AttributeFilter={
                "EqualsTo": {"Key": "_language_code", "Value": {"StringValue": "ja"}}
            },
        )

        keys = [
            "Id",
            "DocumentId",
            "DocumentTitle",
            "Content",
            "DocumentURI",
        ]

        items.extend(
            list(
                map(
                    lambda x: {k: v for k, v in x.items() if k in keys},
                    response["ResultItems"],
                )
            )
        )

    return items

In [None]:
def generating_response(question: str, documents: list[str]):

    result = bedrock_runtime.converse(
        modelId=model_id,
        additionalModelRequestFields={"documents": documents},
        messages=[
            {
                "role": "user",
                "content": [{"text": question}],
            }
        ],
    )

    return result["output"]["message"]["content"][0]["text"]

In [None]:
df = pd.read_json("testdataset.json")

question = df["question"]

In [None]:
queries = generate_search_query(question[0])
queries

In [None]:
relevant_documents = fetching_relevant_documents(queries)
relevant_documents

In [None]:
response = generating_response(question=question[0], documents=relevant_documents)
response

In [None]:
answers = []
citations = []

for q in question:
    queries = generate_search_query(question=q)
    relevant_documents = fetching_relevant_documents(queries)
    answer = generating_response(question=q, documents=relevant_documents)

    answers.append(answer)
    citations.append(relevant_documents)

    print(q)
    print(answer)

    print("---")

In [None]:
df["kendra_answers"] = answers
df["kendra_citations"] = citations

In [None]:
df.to_json("answer_data_kendra.json", force_ascii=False)

# Evaluation

In [None]:
df = pd.read_json("answer_data_kendra.json")
df.head()

In [None]:
question = df["question"]
answer = df["kendra_answers"]
ground_truth = df["ground_truth"]
context = []

for citations in df["kendra_citations"]:
    tmp = []
    for citation in citations:
        body = citation["Content"]
        tmp.append(body)

    context.append(tmp)

In [None]:
from datasets import Dataset

eval_data = Dataset.from_dict(
    {
        "question": question,
        "answer": answer,
        "ground_truth": ground_truth,
        "contexts": context,
    }
)

In [None]:
import os

from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings

llm = AzureChatOpenAI(azure_deployment=os.getenv("AZURE_DEPLOYMENT_GPT4OMINI", None))
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.getenv("AZURE_DEPLOYMENT_EMBEDDINGS", None)
)

In [None]:
from ragas.metrics import (
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness,
)
from ragas.run_config import RunConfig

from ragas import evaluate

run_config = RunConfig(max_wait=600, max_retries=100)

result = evaluate(
    eval_data,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
    llm=llm,
    embeddings=embeddings,
    run_config=run_config,
    raise_exceptions=False,
)

In [None]:
result

In [None]:
eval_df = result.to_pandas()
eval_df.head()

In [None]:
eval_df.to_json("eval_data_kendra.json", force_ascii=False)