In [None]:
import os
import sys
import random
from langchain_community.vectorstores import Chroma
from IPython.display import display, Markdown

current_dir = os.getcwd()
kit_dir = os.path.abspath(os.path.join(current_dir, ".."))
repo_dir = os.path.abspath(os.path.join(kit_dir, ".."))

sys.path.append(kit_dir)
sys.path.append(repo_dir)

from complex_rag.src.document_retrieval import DocumentRetrieval
from complex_rag.src.rag import COMPLEXRAG

CONFIG_PATH = os.path.join(kit_dir, "config.yaml")
PROMPTS_PATH = os.path.join(kit_dir, "prompts")

from datasets import load_dataset

rag_dataset = load_dataset("neural-bridge/rag-dataset-12000")
random.seed(123)
questions = rag_dataset["train"]["question"][:100]
answers = rag_dataset["train"]["answer"][:100]

In [None]:
documentRetrieval = DocumentRetrieval()
*_, embedding_model_info, retrieval_info, _ = documentRetrieval.get_config_info()
embeddings = documentRetrieval.load_embedding_model()

In [None]:
vectorstore = Chroma(
    persist_directory=kit_dir
    + "/data/rag_dataset_12000_train_100samples_1000_100.chromadb",
    embedding_function=embeddings,
)

In [None]:
# instantiate rag
rag = COMPLEXRAG(
    config=CONFIG_PATH,
    prompts_path=PROMPTS_PATH,
    embeddings=embeddings,
    vectorstore=vectorstore,
)

# Initialize chains
rag.initialize()

# Build nodes
workflow = rag.create_rag_nodes()
print(workflow)

# Build graph
app = rag.build_rag_graph(workflow)

In [None]:
rag.display_graph(app)

In [None]:
idx = random.randint(0, len(questions))

response = rag.call_rag(app, question=questions[idx])

display(Markdown("---Response---"))
display(Markdown(response["answer"]))
display(Markdown("---Groud Truth---"))
display(Markdown(answers[idx]))

In [None]:
idx = random.randint(0, len(questions))

response = rag.call_rag(app, question=questions[idx])

display(Markdown("---Response---"))
display(Markdown(response["answer"]))
display(Markdown("---Groud Truth---"))
display(Markdown(answers[idx]))

In [None]:
idx = random.randint(0, len(questions))

response = rag.call_rag(app, question=questions[idx])

display(Markdown("---Response---"))
display(Markdown(response["answer"]))
display(Markdown("---Groud Truth---"))
display(Markdown(answers[idx]))

In [None]:
response = rag.call_rag(app, question="Why is there a monster truck in my driveway?")

display(Markdown("---Response---"))
display(Markdown(response["answer"]))