# Simple RAG pipeline
Let's first load the dataset finepersonas-v0.1-tiny and save it into the data folder.

In [None]:
from datasets import load_dataset
from pathlib import Path

dataset = load_dataset("dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

Let's now load the persona descriptions from the data directory with a `SimpleDirectoryReader`.

In [None]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
len(documents)

Let's now create a `IngestionPipeline` to create nodes from the documents and divide them into smaller chunks ready for `QueryEngine`. We'll use `SentenceSplitter` to split documents into natural sentences and then `OllamaEmbedding` to create embeddings.

In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

# from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        OllamaEmbedding(model_name="nomic-embed-text"),
        # HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-small-en-v1.5"), # for HuggingFace
    ]
)

# let's now run the pipeline sync or async
nodes = await pipeline.arun(documents=documents[:10])
nodes

We'll now attach a vector store to the pipeline to populate it. We'll use `Chroma` for this task. Let's run the pipeline again with the vector store attached.

In [None]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient("./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        OllamaEmbedding(model_name="nomic-embed-text"),
        # HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-small-en-v1.5"), # for HuggingFace
    ],
    vector_store=vector_store
)

nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

Let's now create a `VectorStoreIndex` and use it to query the documents by passing the vector store and using the `from_vector_store()` method.

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.ollama import OllamaEmbedding

# for HuggingFace
# from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding
# embed_model = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-small-en-v1.5")

embed_model = OllamaEmbedding(model_name="nomic-embed-text")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model
)

We'll now query the index by creating a `QueryEngine` from the index.

In [None]:
from llama_index.llms.ollama import Ollama
import nest_asyncio

# for HuggingFace
# from llama_index.llms.huggingface_api import HuggingFaceInference
# llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")

nest_asyncio.apply()
llm = Ollama(model="qwen2.5-coder")
query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize"
)

response = query_engine.query("Respond using a persona that describes author and travel experiences?")
response

## Evaluation and observability
Let's now use LlamaIndex built-in evaluation tools to assess the quality of the response. These evaluators leverage LLMs to analyze responses across different dimensions. We can now check if the query is faithful to the original persona.

In [None]:
from llama_index.core.evaluation import FaithfulnessEvaluator

evaluator = FaithfulnessEvaluator(llm=llm)
eval_result = evaluator.evaluate_response(response=response)
eval_result.passing

We can also check the response using the Arize Phoenix tool and LlamaTrace.

In [None]:
import llama_index
import os

PHOENIX_API_KEY = "<PHOENIX_API_KEY>"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
llama_index.core.set_global_handler(
    "arize_phoenix",
    endpoint="https://llamatrace.com/v1/traces"
)

response = query_engine.query(
    "What is the name of the someone that is interested in AI and techhnology?"
)
response