In [None]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file

In [None]:
from pathlib import Path
from tqdm import tqdm
import sys

sys.path.append(Path("..").resolve().as_posix())

## Prepare LLMs

In [None]:
from src.generator_builder import LLMConfig

llms = LLMConfig.from_azure_deployments(
    light="gpt-35-turbo-1106",
    heavy="gpt4o",
    embedding="text-embedding-ada-002",
    api_version="2023-07-01-preview",
)
from llama_index.core import Settings

Settings.embed_model = llms.embed_model
Settings.llm = llms.llamaindex_light

## Assemble the RAG

In [None]:
from src.index_builder import build_index

index, full_nodes_dict = build_index(
    persist_path = Path("index_storage_updated").resolve(),
    collection_name = "index",
    lib_path = Path("../../docs_md").resolve()
)

In [None]:
from src.generator_builder import build_generator

generator = build_generator(
    index=index, full_nodes_dict=full_nodes_dict, llm_config=llms
)

## Run some queries

In [None]:
response = generator.stream_chat(message="Tell me about a basic select", chat_history=[])

response.print_response_stream()
generator.reset()

In [None]:
from llama_index.core.base.llms.types import ChatMessage, MessageRole

response = generator.chat(
    chat_history=[
        ChatMessage(
            role=MessageRole.USER,
            content="Tell me about a basic select",
        ),
        ChatMessage(
            role=MessageRole.ASSISTANT,
            content="A basic `select` in EdgeDB is a command used to retrieve or compute a set of values from the database. \
            It can be used to select primitive values, objects, or computed results.",
        ),
    ],
    message="What about insert?",
)

print(response.response)
generator.reset()

## Run benchmarks

In [None]:
query_path = Path("benchmarks/devon_0229.txt").resolve()

In [None]:
with query_path.open("r") as f:
    queries = [query.strip() for query in f.readlines()]

# with query_path.open("r") as f:
#     hard_queries = f.read().split(">>>SEPARATOR<<<")

In [None]:
queries

In [None]:
from src.eval import run_queries

In [None]:
from datetime import datetime

timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
responses = run_queries(generator, queries, Path(f"eval_devon_0229_{timestamp}.jsonl").resolve())

In [None]:
for response in responses:
    print(f"QUERY: {response.query}\n")
    print(f"RESPONSE: {response.response}\n")
    for i, node in enumerate(response.source_nodes):
        print(f"CONTEXT {i}: {node.text}\n")
    print("\n\n")
    print("----------------------------------")
    print("\n\n")

## Evaluate results using an LLM

In [None]:
from src.eval import Evaluator

evaluator = Evaluator(llm=llms.llamaindex_heavy)

In [None]:
# attach to the same event-loop
import nest_asyncio

nest_asyncio.apply()

In [None]:
from src.eval import PydanticResponse, run_queries

responses_path = Path("eval_new_hard_2024-02-26-14-21-28-2.jsonl")

with responses_path.open("r") as f:
    responses = [PydanticResponse.parse_raw(raw) for raw in f.readlines()]

len(responses)

In [None]:
eval_results = [evaluator.evaluate_response(response=response) for response in responses]

In [None]:
faithful = [eval_result for eval_result in eval_results if eval_result["answer_relevancy"].passing]
unfaithful = [eval_result for eval_result in eval_results if not eval_result["answer_relevancy"].passing]
non_response = [eval_result for eval_result in unfaithful if "Sorry, the system was unable to produce a faithful response" not in eval_result["faithfulness"].response]
len(faithful)