In [1]:
import os
import dotenv
dotenv.load_dotenv()

from llama_index.llms.azure_openai import AzureOpenAI

llm = AzureOpenAI(
    model="gpt-35-turbo-16k",
    deployment_name=os.environ.get("AZURE_OPENAI_DEPLOYMENT"),
    api_key=os.environ.get("AZURE_OPENAI_KEY"),
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    api_version=os.environ.get("AZURE_OPENAI_VERSION"),
)

llm.complete("Hello, how are you?")

CompletionResponse(text="Hello! I'm doing well, thank you. How can I assist you today?", additional_kwargs={}, raw=ChatCompletion(id='chatcmpl-Be1SW2MVwHzOzV8ID7BUeCEkFwCrx', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="Hello! I'm doing well, thank you. How can I assist you today?", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None, annotations=[]), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1748878428, model='gpt-4.1-mini-2025-04-14', object='chat.completion', service_tier=None, system_fingerprint='fp_178c8d546f', usage=CompletionUsage(completion_tokens=17, prompt_tokens=13

## RAG

Read data

In [2]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_dir="sample_data")
documents = reader.load_data()
print(documents)

[Document(id_='2dc349a1-2d75-4363-8d18-ba198664b331', embedding=None, metadata={'page_label': '1', 'file_name': 'DLC-02-E.pdf', 'file_path': '/Users/admin/src/edu/hf_agents/sample_data/DLC-02-E.pdf', 'file_type': 'application/pdf', 'file_size': 9225859, 'creation_date': '2025-06-02', 'last_modified_date': '2025-06-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'), Document(id_='d355fa27-ca79-4e7f-b85b-a633270a0749', embedding=None, metadata={'page_label': '2', 'file_name': 'DLC-02-E.pdf', '

Index (vectorize)

In [3]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection("alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1000, chunk_overlap=50),
        HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"),
    ],
    vector_store=vector_store,
)

nodes = await pipeline.arun(documents=documents)

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
retriever = index.as_retriever(
)
nodes = retriever.retrieve("Does the device support Modbus?")
for node in nodes:
    print(node)

Node ID: d84892b6-d9a0-467e-bbc4-8caca67855a5
Text: 151 5 5 Modbus TCP interface. M Clientodbus MS e r v e rodbus
request response This device supports Modbus TCP communication
interface. When transmitting data, all Word data must comply with the
High Byte transmissionprinciple. The external software (Client) can
use this interface to control andmonitor THE DLC-02(Server), including
the minimum b...
Score:  0.556



In [5]:

query_engine = index.as_query_engine(
    llm=llm,
    response_mode="tree_summarize",
)
response = query_engine.query("Does the device support Modbus?")
print(response)

Yes, the device supports Modbus TCP communication interface.


## Eval

These evaluators leverage LLMs to analyze responses across different dimensions. Let’s look at the three main evaluators available:

- FaithfulnessEvaluator: Evaluates the faithfulness of the answer by checking if the answer is supported by the context.
- AnswerRelevancyEvaluator: Evaluate the relevance of the answer by checking if the answer is relevant to the question.
- CorrectnessEvaluator: Evaluate the correctness of the answer by checking if the answer is correct.

In [15]:
from llama_index.core.evaluation import FaithfulnessEvaluator

evaluator = FaithfulnessEvaluator(llm=llm)

# Use the async version
eval_result = await evaluator.aevaluate_response(response=response)
print(eval_result.passing)
print("CONTEXT:")
for context in eval_result.contexts:
    print(context)

True
CONTEXT:
151
5
5 Modbus TCP interface.
M Clientodbus MS e r v e rodbus
request
response
This device supports Modbus TCP communication interface. When
transmitting data, all Word data must comply with the High Byte transmissionprinciple. The external software (Client) can use this interface to control andmonitor THE DLC-02(Server), including the minimum brightness level,maximum brightness level, system power-on behavior, dimming time andother DALI instructions.
IP address: Refer to section 4.1.3 or 6.4.2, communication port: 502
51. Communication Timing
a) The maximum response time between registers 40001 and 40006 and
registers 30001 to 30004 shall be determined according to DALIinstruction sending time.
b) The maximum response time of registers 41001 to 41004 shall be
determined according to DALI instruction sending time.
c) The maximum response time of registers 42001 to 42002 and registers
32001 to 32004 shall be determined according to DALI instructionsending time.
d) The maxi

## Tools

- FunctionTool: Convert any Python function into a tool that an agent can use. It automatically figures out how the function works.
- QueryEngineTool: A tool that lets agents use query engines. Since agents are built on query engines, they can also use other agents as tools.
- Toolspecs: Sets of tools created by the community, which often include tools for specific services like Gmail.
- Utility Tools: Special tools that help handle large amounts of data from other tools.

In [16]:
from llama_index.core.tools import FunctionTool

def get_weather(location: str) -> str:
    """Useful for getting the weather for a given location."""
    print(f"Getting weather for {location}")
    return f"The weather in {location} is sunny"

tool = FunctionTool.from_defaults(
    get_weather,
    name="my_weather_tool",
    description="Useful for getting the weather for a given location.",
)
tool.call("New York")

Getting weather for New York


ToolOutput(content='The weather in New York is sunny', tool_name='my_weather_tool', raw_input={'args': ('New York',), 'kwargs': {}}, raw_output='The weather in New York is sunny', is_error=False)

In [None]:
from llama_index.core.tools import QueryEngineTool
tool = QueryEngineTool.from_defaults(query_engine, name="some useful name", description="some useful description")

# The above query engine, using LLM to synthesize the response

print(tool.call("DALI, what is it?"))

DALI is a communication protocol used for controlling lighting devices. It involves sending commands and receiving data through specific registers, with data formatted in bytes that include a command frame header, serial number, bus ID, and status byte. The bus ID distinguishes between different DALI buses, such as DALI A and DALI B. The status byte indicates the type of DALI data returned or error messages. Control gear registers allow for various functions such as turning lights on or off, setting brightness levels, triggering scenes, locking or unlocking motion sensor controls, and changing color values. Addresses can be assigned to single devices, groups, or broadcasts, enabling flexible control over lighting systems.


Toolspect - set of related tools, e.g. for Gmail:

```
from llama_index.tools.google import GmailToolSpec

tool_spec = GmailToolSpec()
tool_spec_list = tool_spec.to_tool_list()

## Agents

In [21]:

from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.tools import FunctionTool

# define sample Tool -- type annotations, function names, and docstrings, are all included in parsed schemas!
def multiply(a: int, b: int) -> int:
    """Multiplies two integers and returns the resulting integer"""
    return a * b


# initialize agent
agent = AgentWorkflow.from_tools_or_functions(
    [FunctionTool.from_defaults(multiply)],
    llm=llm
)

# stateless
response = await agent.run("What is 2 times 2?")
print(response)

# remembering state
from llama_index.core.workflow import Context

ctx = Context(agent)

response = await agent.run("My name is Bob.", ctx=ctx)
print(response)
response = await agent.run("What was my name again?", ctx=ctx)
print(response)

2 times 2 is 4.
Hello Bob! How can I assist you today?
Your name is Bob. How can I help you further, Bob?


Multi-agent

In [24]:
query_engine_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="name",
    description="a specific description",
    return_direct=False,
)
query_engine_agent = AgentWorkflow.from_tools_or_functions(
    [query_engine_tool],
    llm=llm,
    system_prompt="You are a helpful assistant that has access to a database containing persona descriptions. "
)

from llama_index.core.agent.workflow import (
    AgentWorkflow,
    FunctionAgent,
    ReActAgent,
)

# Define some tools
def add(a: int, b: int) -> int:
    """Add two numbers."""
    return a + b


def subtract(a: int, b: int) -> int:
    """Subtract two numbers."""
    return a - b


# Create agent configs
# NOTE: we can use FunctionAgent or ReActAgent here.
# FunctionAgent works for LLMs with a function calling API.
# ReActAgent works for any LLM.
calculator_agent = ReActAgent(
    name="calculator",
    description="Performs basic arithmetic operations",
    system_prompt="You are a calculator assistant. Use your tools for any math operation.",
    tools=[add, subtract],
    llm=llm,
)

# query_agent = ReActAgent(
#     name="info_lookup",
#     description="Looks up information about XYZ",
#     system_prompt="Use your tool to query a RAG system to answer information about XYZ",
#     tools=[query_engine_tool],
#     llm=llm
# )

query_agent = FunctionAgent(
    name="info_lookup",
    description="Looks up information about XYZ",
    system_prompt="Use your tool to query a RAG system to answer information about XYZ",
    tools=[query_engine_tool],
    llm=llm
)

# Create and run the workflow
agent = AgentWorkflow(
    agents=[calculator_agent, query_agent], root_agent="calculator"
)

# Run the system
response = await agent.run(user_msg="Tell me about devices available in the database")
print(response)

The devices available in the database include those compatible with version-specific features and equipped with displays and operating elements. They have status LEDs and mechanical specifications, supporting installation and settings such as mounting, electrical configuration, and wiring. These devices interface with software managing virtual lamps, input devices, groups, scenes, sensor interactions, combination outputs, relays, timers, sequences, and human-centric lighting. They also support communication via Modbus TCP interface with defined protocols and commands. Maintenance features include firmware updates and FAQs, and the devices have LCD displays providing bus information, tests, and system management options. If you want details about a specific device or category, please let me know!
