### LLM's

#### AWS

In [7]:
import os
import boto3
import json

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

import boto3

bedrock_runtime = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    aws_session_token=os.getenv("AWS_SESSION_TOKEN"),
)

In [None]:
from langchain_aws import ChatBedrock, BedrockLLM

model_id = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"

claude_3 = BedrockLLM(
    client=bedrock_runtime,
    model_id=model_id,
)

# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.prompts import ChatPromptTemplate

# # Invoke Example
# messages = [
#     ("human", "{question}"),
# ]

# prompt = ChatPromptTemplate.from_messages(messages)

# chain = prompt | claude_3 | StrOutputParser()

# # Chain Invoke
# response = chain.invoke({"question": "Hi"})
# print(response)

### Document Loader - LlamaParse

In [18]:
import os
import nest_asyncio
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader
from llama_index.llms.langchain import LangChainLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

nest_asyncio.apply()

Settings.llm = LangChainLLM(llm=claude_3)

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
parser = LlamaParse(result_type="text")  # "markdown" and "text" are available
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(
    input_files=[
        "../data/OJ_L_202401689_EN_TXT.pdf",
    ],
    file_extractor=file_extractor,
).load_data()

### Dataset Generation

In [None]:
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core import VectorStoreIndex
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core import StorageContext

# Initialize the node parser with a specified chunk size
node_parser = SimpleNodeParser.from_defaults(chunk_size=256)

# Parse nodes from the provided documents
nodes = node_parser.get_nodes_from_documents(documents)

# Create a vector store index from the parsed nodes
vector_index = VectorStoreIndex(nodes, embed_model=embed_model)
vector_index.storage_context.persist(persist_dir="../persist/retreival_eval")

# Generate question-context pairs for evaluation
qa_dataset = generate_question_context_pairs(nodes, llm=claude_3)

# Save the generated QA dataset to a JSON file
qa_dataset.save_json("../datasets/retreival_eval.json")

### Evaluation

In [11]:
from llama_index.core import load_index_from_storage, StorageContext

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="../persist/retreival_eval")

# load index
vector_index = load_index_from_storage(storage_context)

In [None]:
from llama_index.core.evaluation import EmbeddingQAFinetuneDataset
import nest_asyncio

nest_asyncio.apply()

# Create a QA dataset for fine-tuning embeddings
qa_dataset = EmbeddingQAFinetuneDataset.from_json("../datasets/hit_rate_mrr_chunk.json")

In [13]:
# Initialize retriever with semantic search
retriever1 = vector_index.as_retriever(similarity_top_k=3)

In [14]:
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever

vector_retriever = vector_index.as_retriever(similarity_top_k=3)

bm25_retriever = BM25Retriever.from_defaults(
    docstore=vector_index.docstore, similarity_top_k=3
)

retriever2 = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=3,
    num_queries=1,  # set this to 1 to disable query generation
    use_async=True,
    verbose=True,
)

In [15]:
from llama_index.postprocessor.cohere_rerank import CohereRerank

api_key = os.environ["COHERE_API_KEY"]
cohere_rerank = CohereRerank(api_key=api_key, top_n=3)

retriever3 = vector_index.as_retriever(
    similarity_top_k=5,
    node_postprocessors=[cohere_rerank],
)

In [None]:
from llama_index.core.evaluation import RetrieverEvaluator

# Define the evaluation metrics
metrics = ["precision", "recall", "hit_rate", "ap", "mrr", "ndcg"]

# Create retriever evaluators for each retriever
retriever_evaluator1 = RetrieverEvaluator.from_metric_names(
    metrics, retriever=retriever1
)
retriever_evaluator2 = RetrieverEvaluator.from_metric_names(
    metrics, retriever=retriever2
)
retriever_evaluator3 = RetrieverEvaluator.from_metric_names(
    metrics, retriever=retriever3
)

# Evaluate the dataset using the retriever evaluators
eval_results1 = await retriever_evaluator1.aevaluate_dataset(
    qa_dataset, show_progress=True, workers=1
)
eval_results2 = await retriever_evaluator2.aevaluate_dataset(
    qa_dataset, show_progress=True, workers=1
)
eval_results3 = await retriever_evaluator3.aevaluate_dataset(
    qa_dataset, show_progress=True, workers=1
)

In [9]:
import pandas as pd

def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df

In [None]:
metric_df1 = display_results("Semantic Search", eval_results1)
metric_df2 = display_results("Hybrid Search", eval_results2)
metric_df3 = display_results("With Reranker", eval_results3)

results = pd.concat([metric_df1, metric_df2, metric_df3], ignore_index=True)
results.to_csv("../data/results/retrieval-evaluation.csv", index=False)