In [None]:
#! pip install llama-index-llms-huggingface-api llama-index-embeddings-huggingface

In [None]:
# ! pip install dotenv

In [1]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv

load_dotenv()

hf_token= os.getenv("HF_TOKEN")

llm = HuggingFaceInferenceAPI(
    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
    temperature=0.7,
    max_tokens=100,
    token=hf_token,
    provider="auto"
)

response= llm.complete("Hello, how are you?")
print (response)

  from .autonotebook import tqdm as notebook_tqdm


Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?


In [None]:
#! pip install datasets

extracting persona of people attendng alfred's paty, storing locally

In [2]:
from datasets import load_dataset
from pathlib import Path

dataset = load_dataset(path="dvilasuero/finepersonas-v0.1-tiny", split="train")

Path("data").mkdir(parents=True, exist_ok=True)
for i, persona in enumerate(dataset):
    with open(Path("data") / f"persona_{i}.txt", "w") as f:
        f.write(persona["persona"])

Loading the personas using SImpleDirectoryReader

In [3]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="data")
documents= reader.load_data()
len (documents)

5000

After loading, break them to pieces

In [None]:
#! pip install chromadb

In [4]:
import chromadb
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.vector_stores.chroma import ChromaVectorStore

db= chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_overlap=0),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store  
)

nodes = await pipeline.arun(documents=documents)  
print("Number of nodes ingested:", len(nodes))

2025-11-20 15:00:56,903 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-11-20 15:00:57,004 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-11-20 15:01:02,264 - INFO - 1 prompt is loaded, with the key: query


Number of nodes ingested: 5000


In [5]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

2025-11-20 15:01:14,466 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-11-20 15:01:19,904 - INFO - 1 prompt is loaded, with the key: query


In [None]:
# pip install llama-index huggingface-hub transformers torch

In [6]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import nest_asyncio

nest_asyncio.apply()  # This is needed to run the query engine

llm = HuggingFaceInferenceAPI(
    model_name="HuggingFaceTB/SmolLM3-3B",
    token=os.getenv("HF_TOKEN"),
    temperature=0.7,
    max_tokens=200
)

query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
response = query_engine.query(
    "Respond using a persona that describes author and travel experiences?"
)
response

Response(response='<think>\nOkay, let\'s see. The user wants me to respond using a persona that describes an author and their travel experiences. The context mentions a travel blogger focused on cultural exploration and language, specifically Eastern European history and a background in linguistics or education.\n\nFirst, I need to create a persona that fits these elements. The persona should sound like someone who\'s not just a traveler but also someone who deeply understands the cultural and historical aspects of Eastern Europe. They probably have a rich background in languages and education, so their writing might be detailed and informative.\n\nI should start with a name that sounds authentic, maybe a blend of Eastern European and modern elements. Let\'s go with "Ivan Petrovich" – sounds Russian, but not too traditional. Then a title, maybe something like "Cultural Navigator" to highlight their expertise.\n\nNext, the travel experiences should reflect their interests. They might vi

Evaluation

In [7]:
from llama_index.core.evaluation import FaithfulnessEvaluator

evaluator= FaithfulnessEvaluator(llm=llm)
eval_result= evaluator.evaluate_response(response=response)
eval_result.passing

False

Functional tools in LLama

In [13]:
from llama_index.core.tools import FunctionTool

def get_weather(location:str)->str:
    print(f"Getting weather for {location}")
    return f"The weather in {location} is sunny"

tool= FunctionTool.from_defaults(
    get_weather,
    name="weather_tool",
    description="It is used to find the descrption of a location"
)
tool.call("Kathmandu")

Getting weather for Kathmandu


ToolOutput(blocks=[TextBlock(block_type='text', text='The weather in Kathmandu is sunny')], tool_name='weather_tool', raw_input={'args': ('Kathmandu',), 'kwargs': {}}, raw_output='The weather in Kathmandu is sunny', is_error=False)

Creating a QueryEngineTool

In [15]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import QueryEngineTool
from llama_index.vector_stores.chroma import ChromaVectorStore

db= chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection(name="alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
embed_model= HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

llm = HuggingFaceInferenceAPI(
    model_name="HuggingFaceTB/SmolLM3-3B",
    token=os.getenv("HF_TOKEN"),
    temperature=0.7,
    max_tokens=200
)
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embed_model
)

query_engine= index.as_query_engine(llm=llm)

tool= QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="name",
    description="some desc"
)

await tool.acall("Responds about research on the impact of AI on the future of work and society?")

2025-11-20 15:36:59,355 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-11-20 15:37:03,873 - INFO - 1 prompt is loaded, with the key: query


ToolOutput(blocks=[TextBlock(block_type='text', text="<think>\nOkay, let's see. The user is asking about research on the impact of AI on the future of work and society, and they provided a context about a machine learning researcher focused on natural language processing and AI prompts. Hmm, the context mentions developing AI prompts to enhance performance, decision-making, and ethics. But how does that relate to the impact on work and society?\n\nWait, the user wants to know if the researcher's work is related to that topic. The context doesn't explicitly mention work or society. The researcher is working on AI prompts for enhancing AI's performance, decisions, and ethics. Maybe the researcher's work could indirectly affect work and society through better AI systems. For example, if AI is more efficient in decision-making, it might lead to more job opportunities or changes in the workforce. But the context doesn't specify that. The answer needs to be based strictly on the given inform

gents in LlamaIndex

In [23]:
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.tools import FunctionTool

# define sample Tool -- type annotations, function names, and docstrings, are all included in parsed schemas!
def multiply(a: int, b: int) -> int:
    """Multiplies two integers and returns the resulting integer"""
    return a * b

# initialize llm
llm = HuggingFaceInferenceAPI(
    model_name="HuggingFaceTB/SmolLM3-3B",
    token=os.getenv("HF_TOKEN"),
    temperature=0.7,
    max_tokens=200
)

# initialize agent
agent = AgentWorkflow.from_tools_or_functions(
    [FunctionTool.from_defaults(multiply)],
    llm=llm
)


RAG with agents
Provide promt+query+relevant data to llm, llm provides reponse to user

In [24]:
from llama_index.core.tools import QueryEngineTool

query_engine = index.as_query_engine(llm=llm, similarity_top_k=3) # as shown in the Components in LlamaIndex section

query_engine_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="name",
    description="a specific description",
    return_direct=False,
)
query_engine_agent = AgentWorkflow.from_tools_or_functions(
    [query_engine_tool],
    llm=llm,
    system_prompt="You are a helpful assistant that has access to a database containing persona descriptions. "
)

In [26]:
from llama_index.core.agent.workflow import (
    AgentWorkflow,
    FunctionAgent,
    ReActAgent,
)

# Define some tools
def add(a: int, b: int) -> int:
    """Add two numbers."""
    return a + b


def subtract(a: int, b: int) -> int:
    """Subtract two numbers."""
    return a - b


# Create agent configs
# NOTE: we can use FunctionAgent or ReActAgent here.
# FunctionAgent works for LLMs with a function calling API.
# ReActAgent works for any LLM.
calculator_agent = ReActAgent(
    name="calculator",
    description="Performs basic arithmetic operations",
    system_prompt="You are a calculator assistant. Use your tools for any math operation.",
    tools=[add, subtract],
    llm=llm,
)

query_agent = ReActAgent(
    name="info_lookup",
    description="Looks up information about XYZ",
    system_prompt="Use your tool to query a RAG system to answer information about XYZ",
    tools=[query_engine_tool],
    llm=llm
)

# Create and run the workflow
agent = AgentWorkflow(
    agents=[calculator_agent, query_agent], root_agent="calculator"
)

# Run the system
response = await agent.run(user_msg="Can you add 5 and 3?")
print (response)

<think>
Okay, let's see. The user is asking to add 5 and 3. I need to figure out which tool to use here. The available tools are add, subtract, and handoff. Since the task is to add two numbers, the add tool should be the right choice.

First, I'll start with a thought to note that I need to use a tool. Then, I'll use the add tool. The parameters for the add tool are a and b, both integers. The numbers here are 5 and 3. So the action input should be a JSON object with a: 5 and b: 3. 

Wait, the action input format needs to be in the correct JSON structure. The user mentioned that the Action Input should be in JSON format representing the kwargs. So for the add tool, the input would be {"a": 5, "b": 3}. 

After sending the action input, the tool should respond with the result. Let me check if there's any reason to handoff here. Since the task is straightforward addition, there's no need to handoff to another agent. 

So the response should be the sum of 5 and 3, which is 8. Then, I


Workflow Creation in LLamaIndex

In [27]:
from llama_index.core.workflow import StartEvent, StopEvent, Workflow, step

class MyWorkflow(Workflow):
    @step
    async def my_step(self, ev: StartEvent) -> StopEvent:
        # do something here
        return StopEvent(result="Hello, world!")


w = MyWorkflow(timeout=10, verbose=False)
result = await w.run()

In [28]:
from llama_index.core.workflow import Event

class ProcessingEvent(Event):
    intermediate_result: str

class MultiStepWorkflow(Workflow):
    @step
    async def step_one(self, ev: StartEvent) -> ProcessingEvent:
        # Process initial data
        return ProcessingEvent(intermediate_result="Step 1 complete")

    @step
    async def step_two(self, ev: ProcessingEvent) -> StopEvent:
        # Use the intermediate result
        final_result = f"Finished processing: {ev.intermediate_result}"
        return StopEvent(result=final_result)

w = MultiStepWorkflow(timeout=10, verbose=False)
result = await w.run()
result

'Finished processing: Step 1 complete'

In [29]:
from llama_index.core.workflow import Event
import random


class ProcessingEvent(Event):
    intermediate_result: str


class LoopEvent(Event):
    loop_output: str


class MultiStepWorkflow(Workflow):
    @step
    async def step_one(self, ev: StartEvent | LoopEvent) -> ProcessingEvent | LoopEvent:
        if random.randint(0, 1) == 0:
            print("Bad thing happened")
            return LoopEvent(loop_output="Back to step one.")
        else:
            print("Good thing happened")
            return ProcessingEvent(intermediate_result="First step complete.")

    @step
    async def step_two(self, ev: ProcessingEvent) -> StopEvent:
        # Use the intermediate result
        final_result = f"Finished processing: {ev.intermediate_result}"
        return StopEvent(result=final_result)


w = MultiStepWorkflow(verbose=False)
result = await w.run()
result

Good thing happened


'Finished processing: First step complete.'