In [5]:
!pip install llama-index datasets llama-index-callbacks-arize-phoenix arize-phoenix llama-index-vector-stores-chroma llama-index-llms-huggingface-api llama-index-embeddings-huggingface -U -q


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
# Create a QueryEngine for retrieval augmented generation
# Setting up the persona database
from datasets import load_dataset
from pathlib import Path

# Load the dataset
dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

# Create the data directory if it doesn't exist
Path("data").mkdir(parents=True, exist_ok=True)

# Iterate through the dataset and write each persona to a file with UTF-8 encoding
print("Writing persona data to files...")
for i, persona in enumerate(dataset):
    # Specify encoding='utf-8' when opening the file
    file_path = Path("data") / f"persona_{i}.txt"
    try:
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(persona["persona"])
        print(f"Successfully wrote {file_path}")
    except Exception as e:
        print(f"Error writing to {file_path}: {e}")

print("Finished writing persona data.")

Writing persona data to files...
Successfully wrote data\persona_0.txt
Successfully wrote data\persona_1.txt
Successfully wrote data\persona_2.txt
Successfully wrote data\persona_3.txt
Successfully wrote data\persona_4.txt
Successfully wrote data\persona_5.txt
Successfully wrote data\persona_6.txt
Successfully wrote data\persona_7.txt
Successfully wrote data\persona_8.txt
Successfully wrote data\persona_9.txt
Successfully wrote data\persona_10.txt
Successfully wrote data\persona_11.txt
Successfully wrote data\persona_12.txt
Successfully wrote data\persona_13.txt
Successfully wrote data\persona_14.txt
Successfully wrote data\persona_15.txt
Successfully wrote data\persona_16.txt
Successfully wrote data\persona_17.txt
Successfully wrote data\persona_18.txt
Successfully wrote data\persona_19.txt
Successfully wrote data\persona_20.txt
Successfully wrote data\persona_21.txt
Successfully wrote data\persona_22.txt
Successfully wrote data\persona_23.txt
Successfully wrote data\persona_24.txt
Su

In [7]:
# Loading and embedding persona documents
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents = reader.load_data()
len(documents)

5000

In [8]:
# Now we have a list of Document objects, we can use the IngestionPipeline to create nodes from the documents and prepare them for the QueryEngine. We will use the SentenceSplitter to split the documents into smaller chunks and the HuggingFaceEmbedding to embed the chunks.
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ]
)

# run the pipeline sync or async
nodes = await pipeline.arun(documents=documents[:10])
nodes

Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 


[TextNode(id_='a5d0e4af-5ee4-48ff-b3e9-c4c5427985b9', embedding=[-0.028494413942098618, -0.0076458570547401905, 0.04205216467380524, 0.0009117340669035912, 0.03986470773816109, -0.042177438735961914, -0.03446083515882492, 0.00377092813141644, -0.058626409620046616, -0.07370070368051529, 0.019474472850561142, -0.012607400305569172, -0.02480388432741165, 0.011020636186003685, -0.0018131416290998459, 0.015286014415323734, 0.015409273095428944, 0.06909511238336563, 0.00890304148197174, 0.017028115689754486, 0.019885893911123276, -0.07081490010023117, 0.06141560524702072, -0.05413505434989929, -0.01749911904335022, 0.026480823755264282, 0.027789946645498276, -0.016359681263566017, -0.03710777312517166, -0.14235733449459076, -0.028282804414629936, -0.01448756828904152, -0.010285651311278343, 0.010946129448711872, 0.04783603921532631, 0.005421118810772896, 0.009371841326355934, 0.0739346519112587, 0.003909273073077202, 0.029446164146065712, 0.002321752719581127, 0.017086472362279892, -0.04479

In [9]:
# Storing and indexing documents
# Since we are using an ingestion pipeline, we can directly attach a vector store to the pipeline to populate it. In this case, we will use Chroma to store our documents. Let's run the pipeline again with the vector store attached. The IngestionPipeline caches the operations so this should be fast!
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents[:10])
len(nodes)

Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 


10

In [10]:
# Querying the index
# Now that we have our index, we can use it to query the documents. Let's create a QueryEngine from the index and use it to query the documents using a specific response mode.
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

In [11]:
# Now that we have our index, we can use it to query the documents. Let's create a QueryEngine from the index and use it to query the documents using a specific response mode.
from langchain_community.llms.ollama import Ollama
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import nest_asyncio
from smolagents import LiteLLMModel
import os
from huggingface_hub import InferenceClient
llm = Ollama(model="qwen2.5:latest", base_url="http://localhost:11434")

nest_asyncio.apply()  # This is needed to run the query engine

query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)
response

  llm = Ollama(model="qwen2.5:latest", base_url="http://localhost:11434")
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 


Response(response="From my anthropologist or cultural expert's perspective who has delved deeply into Cypriot culture, history, and society, I would like to share some insights as if I were recounting from personal experience:\n\n---\n\nIn the heart of the Mediterranean, lies a land rich with stories waiting to be unraveled—Cyprus. Over the years, I have had the privilege to immerse myself in this unique island's culture, living among its people and participating in their daily life. The essence of Cypriot society is deeply rooted in traditions that span millennia, blending Eastern and Western influences into a harmonious whole.\n\nMy journey began with extensive research at various academic institutions, but it was the time I spent living within the community that truly enriched my understanding. Moving from the bustling streets of Nicosia to the quiet villages nestled among olive groves, each experience offered new insights into the Cypriot way of life. The warm hospitality and deep-

In [12]:
# Evaluation and observability
# LlamaIndex provides built-in evaluation tools to assess response quality. These evaluators leverage LLMs to analyze responses across different dimensions. We can now check if the query is faithful to the original persona.
from llama_index.core.evaluation import FaithfulnessEvaluator
from llama_index.core.base.llms.types import LLMMetadata

if llm.metadata is None:
    print("Warning: llm.metadata is None. Manually populating LLMMetadata.")
    llm.metadata = LLMMetadata(model_name="qwen2.5:latest", llm_name="Ollama")
elif llm.metadata.model_name is None:
    print("Warning: llm.metadata.model_name is None. Manually setting model_name.")
    llm.metadata.model_name = "qwen2.5:latest"
    if llm.metadata.llm_name is None:
        llm.metadata.llm_name = "Ollama" # Also set llm_name for good measure
print(f"LLM metadata (after check): {llm.metadata}")
print(f"LLM model_name (after check): {llm.metadata.model_name}")

print("\n--- Evaluating response faithfulness ---")
# Sanity check: Print metadata before passing to evaluator
if hasattr(llm, 'metadata') and llm.metadata is not None:
    print(f"LLM metadata exists: {llm.metadata}")
    print(f"LLM model_name: {llm.metadata.model_name}")
else:
    print("LLM metadata is None or missing.")
    print("The FaithfulnessEvaluator requires the LLM's metadata to be correctly populated.")
    print("Please ensure all llama-index related packages are up to date.")


try:
    # FaithfulnessEvaluator expects an LLM instance.
    # The 'llm' variable from above (Ollama instance) is correctly passed here.
    evaluator = FaithfulnessEvaluator(llm=llm)
    eval_result = evaluator.evaluate_response(response=response) # Pass the generated response
    print(f"Faithfulness Evaluation Result (Passing): {eval_result.passing}")
    print(f"Faithfulness Evaluation Feedback: {eval_result.feedback}")
except AttributeError as e:
    print(f"AttributeError during evaluation: {e}")
    print("This often indicates an issue with the LLM object's metadata attribute.")
    print("Please ensure your LlamaIndex and related LLM packages are up to date.")
except Exception as e:
    print(f"An unexpected error occurred during evaluation: {e}")


LLM metadata (after check): context_window=3900 num_output=256 is_chat_model=False is_function_calling_model=False model_name='qwen2.5:latest' system_role=<MessageRole.SYSTEM: 'system'>
LLM model_name (after check): qwen2.5:latest

--- Evaluating response faithfulness ---
LLM metadata exists: context_window=3900 num_output=256 is_chat_model=False is_function_calling_model=False model_name='qwen2.5:latest' system_role=<MessageRole.SYSTEM: 'system'>
LLM model_name: qwen2.5:latest


Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 


Faithfulness Evaluation Result (Passing): True
Faithfulness Evaluation Feedback: YES


In [13]:
!pip install arize-phoenix-otel




[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:
from phoenix.otel import register

tracer_provider = register(
  project_name="test-llama-index",
  endpoint="https://app.phoenix.arize.com/v1/traces",
  auto_instrument=True
)

Overriding of current TracerProvider is not allowed
Attempting to instrument while already instrumented
Attempting to instrument while already instrumented


OpenTelemetry Tracing Details
|  Phoenix Project: test-llama-index
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: https://app.phoenix.arize.com/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {'authorization': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



In [15]:
import llama_index
import os

PHOENIX_API_KEY = os.environ["PHOENIX_API_KEY"] if "PHOENIX_API_KEY" in os.environ else "your_phoenix_api_key_here"
os.environ["PHOENIX_API_KEY"] = f"api_key={PHOENIX_API_KEY}"
llama_index.core.set_global_handler(
    "arize_phoenix", endpoint="https://llamatrace.com/v1/traces"
)

Attempting to instrument while already instrumented


In [16]:
response = query_engine.query(
    "What is the name of the someone that is interested in AI and techhnology?"
)
response

Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 
Failed to export batch code: 401, reason: 


Response(response='Based on the provided context, there is no mention of anyone specifically interested in AI and technology. The description refers to an anthropologist or cultural expert focused on Cypriot culture, history, and society. Therefore, we cannot determine a name for someone interested in AI and technology from this information alone.', source_nodes=[NodeWithScore(node=TextNode(id_='bfb2f869-19ea-4602-b09a-c5872e1bd173', embedding=None, metadata={'file_path': 'D:\\4thYear\\huggingface_course\\LLamaIndex\\data\\persona_1.txt', 'file_name': 'persona_1.txt', 'file_type': 'text/plain', 'file_size': 266, 'creation_date': '2025-06-15', 'last_modified_date': '2025-06-15'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: Relat

In [16]:
# This covers basics of using LlamaIndex with RAG, including loading data, embedding, indexing, querying, and evaluating responses.