# Exercise - Knowledge Base Agent - STARTER

In this exercise, you’ll implement and evaluate a RAG (Retrieval-Augmented Generation) pipeline, using RAGAS metrics and MLflow for logging the process.


**Challenge**

Your task is to create a LangGraph Workflow that includes:

- A RAG pipeline for information retrieval.
- An LLM-based judge for evaluation.
- RAGAS metrics for quality assessment.
- MLflow logging for observability.

The workflow should:

- Retrieve, augment, and generate answers.
- Evaluate the answers using RAGAS.
- Log performance metrics in MLflow.

## 0. Import the necessary libs

In [1]:
!pip install pysqlite3-binary
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
!pip install chromadb
!pip install langchain-chroma

Collecting pysqlite3-binary
  Downloading pysqlite3_binary-0.5.4.post2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pysqlite3-binary
Successfully installed pysqlite3-binary-0.5.4.post2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting chromadb
  Downloading chromadb-1.3.7-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.7/21.7 MB[0m [31m48.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting build>=1.0.3
  Downloading build-1.3.0-py3-none-any.whl (23 kB)
Collecting opentelemetry-sdk>=1.2.0
 

Collecting sympy
  Downloading sympy-1.14.0-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m115.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting protobuf
  Downloading protobuf-6.33.2-cp39-abi3-manylinux2014_x86_64.whl (323 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.3/323.3 kB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting coloredlogs
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting flatbuffers
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl (30 kB)
Collecting importlib-metadata<8.8.0,>=6.0
  Downloading importlib_metadata-8.7.0-py3-none-any.whl (27 kB)
Collecting opentelemetry-exporter-otlp-proto-common==1.39.1
  Downloading opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl (18 kB)
Collecting opentelemetry

Collecting langchain-chroma
  Downloading langchain_chroma-1.1.0-py3-none-any.whl (12 kB)
Collecting langchain-core<2.0.0,>=1.1.3
  Downloading langchain_core-1.2.0-py3-none-any.whl (475 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.9/475.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting uuid-utils<1.0,>=0.12.0
  Downloading uuid_utils-0.12.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (343 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.7/343.7 kB[0m [31m39.7 MB/s[0m eta [36m0:00:00[0m
Collecting langsmith<1.0.0,>=0.3.45
  Downloading langsmith-0.4.59-py3-none-any.whl (413 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.1/413.1 kB[0m [31m49.6 MB/s[0m eta [36m0:00:00[0m


Collecting zstandard>=0.23.0
  Downloading zstandard-0.25.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m89.4 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
Installing collected packages: zstandard, uuid-utils, langsmith, langchain-core, langchain-chroma
  Attempting uninstall: langsmith
    Found existing installation: langsmith 0.1.147
    Uninstalling langsmith-0.1.147:
      Successfully uninstalled langsmith-0.1.147
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.21
    Uninstalling langchain-core-0.3.21:
      Successfully uninstalled langchain-core-0.3.21
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langgraph 0.2.53 requires langchain-core!=0.3.0,!=0.3.1,!=0.3.10,!=0.3.11,!=0.3.12,!=0.3.13,!=0.3.1

In [2]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-3.7.0-py3-none-any.whl (8.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pandas<3
  Downloading pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m91.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting mlflow-skinny==3.7.0
  Downloading mlflow_skinny-3.7.0-py3-none-any.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m91.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic!=1.10.0,<2
  Downloading alembic-1.17.2-py3-none-any.whl (248 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m248.6/248.6 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cryptography<47,>=43.0.0
  Downloading cryptography-46.0.3-cp38-abi3-manylinux_2_28_

Collecting graphql-core<3.3,>=3.1
  Downloading graphql_core-3.2.7-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
Collecting graphql-relay<3.3,>=3.1
  Downloading graphql_relay-3.2.0-py3-none-any.whl (16 kB)
Collecting pyparsing>=3
  Downloading pyparsing-3.2.5-py3-none-any.whl (113 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m113.9/113.9 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cycler>=0.10
  Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)
Collecting kiwisolver>=1.3.1
  Downloading kiwisolver-1.4.9-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m83.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting contourpy>=1.0.1
  Downloading contourpy-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (325 kB)
[2K     [90m━━━━━━━━━━━━━━

In [None]:
!pip install ragas

Collecting ragas
  Downloading ragas-0.4.1-py3-none-any.whl (419 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m419.9/419.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting diskcache>=5.6.3
  Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting instructor
  Downloading instructor-1.13.0-py3-none-any.whl (160 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.9/160.9 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting networkx
  Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting scikit-network
  Downloading scikit_network-0.33.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

  Downloading langchain_core-0.3.79-py3-none-any.whl (449 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m449.8/449.8 kB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_core-0.3.78-py3-none-any.whl (449 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m449.6/449.6 kB[0m [31m54.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_core-0.3.77-py3-none-any.whl (449 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m449.5/449.5 kB[0m [31m53.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_core-0.3.76-py3-none-any.whl (447 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m447.5/447.5 kB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_core-0.3.75-py3-none-any.whl (443 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.0/444.0 kB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_core-0.3.74-py3-none-any.whl (4

[?25h  Downloading langchain_openai-0.3.22-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.3/65.3 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.3.21-py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.2/65.2 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.3.20-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.5/64.5 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.3.19-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.5/64.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.3.18-py3-none-any.whl (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.4/63.4 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.3.17-py3-no

[?25h  Downloading langchain_openai-0.1.16-py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.1.15-py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.1.14-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.9/45.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.1.13-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.9/45.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.1.12-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading langchain_openai-0.1.11-py3-none-an

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m74.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.3.12-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.3.11-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m91.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.3.11-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m62.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.3.10-py3-none-any.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m85.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  

In [None]:
import mlflow
from mlflow import log_params, log_metrics
from typing import List, Dict
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langgraph.graph import START, END, StateGraph
from langgraph.graph.message import MessagesState
from langchain.prompts import ChatPromptTemplate
from ragas import evaluate
from datasets import Dataset
from IPython.display import Image, display

## 1. Instantiate Chat Model with your API Key

To be able to connect with OpenAI, you need to instantiate an ChatOpenAI client passing your OpenAI key.

You can pass the `api_key` argument directly.
```python
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
    api_key="voc-",
)
```

In [None]:
OPENAI_API_KEY="voc-*"

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
# TODO - Instantiate your chat model
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.0,
    api_key = OPENAI_API_KEY,
)

In [None]:
# TODO - Instantiate your llm as judge model
# This will evaluate the responses
llm_judge = ChatOpenAI(
    model="gpt-4o",
    temperature=0.0,
    api_key = OPENAI_API_KEY,
)

In [None]:
# TODO - Instantiate your embeddings model
embeddings_fn = OpenAIEmbeddings(
    model="text-embedding-3-large",
    api_key = OPENAI_API_KEY,
)

## 2. MLFlow

In [None]:
mlflow.set_experiment("udacity")

In [None]:
with mlflow.start_run(run_name="l4_exercise_02") as run:
    log_params(
        {
            "embeddings_model":embeddings_fn.model,
            "llm_model": llm.model_name,
            "llm_judge_model": llm_judge.model_name,
        }
    )
    print(run.info)

In [None]:
mlflow_run_id = run.info.run_id

In [None]:
mflow_client = mlflow.tracking.MlflowClient()

In [None]:
mflow_client.get_run(mlflow_run_id)

## 3. Load and Process Documents

In [None]:
# Initialize vector store
vector_store = Chroma(
    collection_name="udacity",
    embedding_function=embeddings_fn
)

# Load and process PDF documents
file_path = "compact-guide-to-large-language-models.pdf"
loader = PyPDFLoader(file_path)

pages = []
for page in loader.load():
    pages.append(page)

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
all_splits = text_splitter.split_documents(pages)

# Store document chunks in the vector database
_ = vector_store.add_documents(documents=all_splits)


## 4. Define State Schema

We define a State Schema for managing:

- MLFlow Run id
- User query
- Ground Truth
- Retrieved documents
- Generated answer
- Evaluation Report

run_id(str), ground_truth(str), evaluation(Dict),vquestion(str), documents(List) and answer(str)

In [None]:
# TODO - Create your state schema
class State(MessagesState):
    run_id: str
    ground_truth: str
    question: str
    documents: List[Document]
    answer: str
    evaluation: Dict


## 5. RAG Nodes

The agent should:
- fetch relevant document chunks based on the user query
- combine the retrieved documents and use them as context
- invoke the LLM to generate a response
- evaluate the pipeline based on the ground_truth

In [None]:
def retrieve(state: State):
    question = state["question"]
    retrieved_docs = vector_store.similarity_search(question)
    return {"documents": retrieved_docs}

In [None]:
def augment(state: State):
    question = state["question"]
    documents = state["documents"]
    docs_content = "\n\n".join(doc.page_content for doc in documents)

    template = ChatPromptTemplate([
        ("system", "You are an assistant for question-answering tasks."),
        ("human", "Use the following pieces of retrieved context to answer the question. "
                "If you don't know the answer, just say that you don't know. "
                "Use three sentences maximum and keep the answer concise. "
                "\n# Question: \n-> {question} "
                "\n# Context: \n-> {context} "
                "\n# Answer: "),
    ])

    messages = template.invoke(
        {"context": docs_content, "question": question}
    ).to_messages()

    return {"messages": messages}

In [None]:
def generate(state: State):
    ai_message = llm.invoke(state["messages"])
    return {"answer": ai_message.content, "messages": ai_message}

In [None]:
def evaluate_rag(state: State):
    question = state["question"]
    documents = state["documents"]
    answer = state["answer"]
    ground_truth = state["ground_truth"]
    dataset = Dataset.from_dict(
        {
            "question": [question],
            "answer": [answer],
            "contexts": [[doc.page_content for doc in documents]],
            "ground_truth": [ground_truth]
        }
    )

    evaluation_results = evaluate(
        dataset=dataset,
        llm=llm_judge
    )
    print(evaluation_results)

    # TODO - Log metrics in MLflow
    # The evaluation_results output value is a list
    # Example: evaluation_results["faithfulness"][0]
    with mlflow.start_run(state["run_id"]):
        log_metrics({
            "faithfulness": evaluation_results["faithfulness"][0],
            "context_precision": evaluation_results["context_precision"][0],
            "context_recall": evaluation_results["context_recall"][0],
            "answer_relevancy": evaluation_results["answer_relevancy"][0],
        })

    return {"evaluation": evaluation_results}

## 6. Build the LangGraph Workflow

In [None]:
# TODO - add all the nodes and edges
workflow = StateGraph(State)

workflow.add_node("retrieve", retrieve)
workflow.add_node("augment", augment)
workflow.add_node("generate", generate)
workflow.add_node("evaluate_rag", evaluate_rag)

workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "augment")
workflow.add_edge("augment", "generate")
workflow.add_edge("generate", "evaluate_rag")
workflow.add_edge("evaluate_rag", END)

In [None]:
graph = workflow.compile()

display(
    Image(
        graph.get_graph().draw_mermaid_png()
    )
)

## 7. Invoke the Workflow with a Query

In [None]:
reference = [
    {
        "question": "What are Open source models?",
        "ground_truth": "Open-source models are AI or machine learning "
                        "models whose code, architecture, and in some cases, "
                        "training data and weights, are publicly available for "
                        "use, modification, and distribution. They enable "
                        "collaboration, transparency, and innovation by allowing "
                        "developers to fine-tune, deploy, or improve them without "
                        "proprietary restrictions.",
    }
]

In [None]:
output = graph.invoke(
    {
        "question": reference[0]["question"],
        "ground_truth": reference[0]["ground_truth"],
        "run_id": mlflow_run_id
    }
)

## 8. Inspect in MLFlow

In [None]:
# TODO - Get MLFlow Run with .get_run()
mflow_client.get_run()

## 9. Experiment

Now that you understood how it works, experiment with new things.

- Change RAG parameters: embedding model, chunk_size, chunk_overlap...
- Create multiple runs
- Improve your reference with more questions and ground_truth answers
- Use the results to understand what are the best parameters
- Create an Agent that picks the best combination