In [1]:
# !apt-get install -y bubblewrap
# # install a branch of llama stack
# !pip install llama-stack

In [3]:
# !llama stack build --template together --image-type venv
!export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"

In [4]:
import os
os.environ['INFERENCE_MODEL']="meta-llama/Llama-3.2-3B-Instruct"
def create_http_client():
    from llama_stack_client import LlamaStackClient
    return LlamaStackClient(base_url=f"http://127.0.0.1:11434")

create_http_client()

def create_library_client(template="ollama"):
    from llama_stack import LlamaStackAsLibraryClient
    client = LlamaStackAsLibraryClient(template)
    client.initialize()
    return client

client = create_library_client()



In [5]:
import os
from termcolor import cprint
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.client_tool import ClientTool
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client.types import CompletionMessage
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack_client.types.shared.tool_response_message import ToolResponseMessage
from llama_stack_client.types import Document

In [6]:
# !llama-stack-client providers list

In [4]:
# import os
# from termcolor import cprint

# from llama_stack_client.lib.agents.agent import Agent
# from llama_stack_client.lib.agents.event_logger import EventLogger
# from llama_stack_client.types.agent_create_params import AgentConfig
# from llama_stack_client.types import Document

# client = create_library_client()

In [7]:
# List available models
models = client.models.list()
print("--- Available models: ---")
for m in models:
    print(f"- {m.identifier}")
print()

--- Available models: ---
- all-MiniLM-L6-v2
- meta-llama/Llama-3.2-3B-Instruct



In [14]:
# pip install pymupdf4llm==0.0.17

In [11]:
import pymupdf4llm

def process_texts(texts, chunk_size=100, overlap=30):
    """Process a list of texts, splitting them into chunks of specified size with overlap,
    and accumulating shorter texts."""
    accumulated_words = []  # Accumulate words from texts shorter than chunk_size
    final_chunks = []  # Store the final chunks of text

    for text in texts.split():
        accumulated_words.append(text)

        while len(accumulated_words) >= chunk_size:
            # Take the first chunk_size words for the current chunk
            chunk = " ".join(accumulated_words[:chunk_size])
            final_chunks.append(chunk)
            # Remove words from the start of the accumulated_words, considering overlap
            accumulated_words = accumulated_words[chunk_size - overlap:]

    # If there are any remaining words, form the last chunk
    if accumulated_words:
        final_chunks.append(" ".join(accumulated_words))

    return final_chunks


md_text = pymupdf4llm.to_markdown('/workspace/llama3-report.pdf',show_progress=False)
all_chunks = process_texts(md_text, chunk_size=500, overlap=50)

In [12]:
# all_chunks

In [15]:
documents = [
    Document(
        document_id=f"num-{i}",
        content=f"{chk}",
        mime_type="text/plain",
        metadata={},
    )
    for i, chk in enumerate(all_chunks)
]



# Register a vector database
vector_db_id = "test-vector-db"
client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model="BAAI/bge-large-en-v1.5",
    embedding_dimension=1024,
    provider_id="chromadb"

)

# Insert the documents into the vector database
client.tool_runtime.rag_tool.insert(
    documents=documents,
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=512,
)

ValueError: Model BAAI/bge-large-en-v1.5 not found

In [10]:



agent_config = AgentConfig(
    model=os.environ["INFERENCE_MODEL"],
    # Define instructions for the agent ( aka system prompt)
    instructions="You are a helpful assistant. Provide answer in detailed style.write your answer as topic , subtopic, expain each term in detail.",
    enable_session_persistence=False,
    sampling_params={
                "max_tokens": 2000},
    tool_choice="auto",
    # Define tools available to the agent
    toolgroups = [
        {
          "name": "builtin::rag",
          "args" : {
            "vector_db_ids": [vector_db_id],
          }
        }
    ],
)

rag_agent = Agent(client, agent_config)


Batches:   0%|          | 0/11 [00:00<?, ?it/s]

In [11]:
session_id = rag_agent.create_session("test4-session")

user_prompts = [
    "all training methods availalbe",
]

# Run the agent loop by calling the `create_turn` method
for prompt in user_prompts:
    cprint(f'User> {prompt}', 'green')
    response = rag_agent.create_turn(
        messages=[{"role": "user", "content": prompt}],
        session_id=session_id,
    )
    for log in EventLogger().log(response):
        log.print()

[32mUser> all training methods availalbe[0m
[30m[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[32mtool_execution> Tool:query_from_memory Args:{}[0m
[36mtool_execution> fetched 8414 bytes from memory[0m
[33minference> [0m[33m**[0m[33mTraining[0m[33m Methods[0m[33m**

[0m[33mThere[0m[33m are[0m[33m several[0m[33m training[0m[33m methods[0m[33m that[0m[33m can[0m[33m be[0m[33m used[0m[33m to[0m[33m improve[0m[33m the[0m[33m performance[0m[33m of[0m[33m a[0m[33m model[0m[33m,[0m[33m particularly[0m[33m in[0m[33m the[0m[33m context[0m[33m of[0m[33m mathematical[0m[33m problem[0m[33m-solving[0m[33m.[0m[33m The[0m[33m following[0m[33m are[0m[33m some[0m[33m of[0m[33m the[0m[33m most[0m[33m common[0m[33m training[0m[33m methods[0m[33m:

[0m[33m1[0m[33m.[0m[33m **[0m[33mSup[0m[33mervised[0m[33m Learning[0m[33m**:[0m[33m This[0m[33m method[0m[33m involves[0m[33m training[0m[33m the[0m[33m model[0m[33m on[0m[33m labeled[0m[33m data[0m[33m,[0m[33m where[0m[33m the[0

In [13]:
# response = rag_agent.create_turn(
#     messages=[{
#         "role": "user",
#         "content": "hi there"
#     }],
#     session_id=session_id
# )

# full_response = ""
# retrieval_response = ""
# for log in EventLogger().log(response):
#     # print(log.role)
#     # print(dir(log))
#     # log.print()
#     if log.role == "tool_execution":
#         print(log)
#         print(log.content,end='')
#         retrieval_response += log.content.replace("====", "").strip()
#     else:
        
#         print(log.content,end='')
#         full_response += log.content


In [14]:
# retrieval_response


In [15]:
# rs.to_dict()['event'].keys()

In [16]:
# rs.to_dict()

In [12]:
# messages =[]
# while True:
#     user_input = input('User: ').strip()
#     if user_input.lower()=='q':
#         break
#     messages.append({
#         "role": "user",
#         "content": f"{user_input}"
#     }
#     # Query with RAG
#     response = agent.create_turn(
#         messages=[{
#             "role": "user",
#             "content": "What are the key topics in the documents?"
#         }],
#         session_id=session_id
#     )

## Custom tools

In [8]:
pip -q install duckduckgo-search

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
from duckduckgo_search import DDGS
results = DDGS().text("python programming", max_results=5)
print(results)

[{'title': 'Welcome to Python.org', 'href': 'https://www.python.org/', 'body': "Python is a programming language that lets you work quickly and integrate systems more effectively. Learn More. Get Started. Whether you're new to programming or an experienced developer, it's easy to learn and use Python. Start with our Beginner's Guide. Download."}, {'title': 'Python For Beginners | Python.org', 'href': 'https://www.python.org/about/gettingstarted/', 'body': 'Learn how to get started with Python, a popular and easy-to-use programming language. Find out how to install, edit, and use Python, and explore its libraries, documentation, and community resources.'}, {'title': 'Python Tutorial - W3Schools', 'href': 'https://www.w3schools.com/python/', 'body': 'Python is a popular programming language. Python can be used on a server to create web applications. Start learning Python now » ...'}, {'title': 'Python Tutorial | Learn Python Programming Language', 'href': 'https://www.geeksforgeeks.org/p

In [27]:
# https://github.com/meta-llama/llama-stack/blob/main/tests/client-sdk/agents/test_agents.py#L13

In [None]:
# class TestClientTool(ClientTool):
#     """Tool to give boiling point of a liquid
#     Returns the correct value for polyjuice in Celcius and Fahrenheit
#     and returns -1 for other liquids
#     """

#     def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
#         assert len(messages) == 1, "Expected single message"

#         message = messages[0]

#         tool_call = message.tool_calls[0]

#         try:
#             response = self.run_impl(**tool_call.arguments)
#             response_str = json.dumps(response, ensure_ascii=False)
#         except Exception as e:
#             response_str = f"Error when running tool: {e}"

#         message = ToolResponseMessage(
#             role="tool",
#             call_id=tool_call.call_id,
#             tool_name=tool_call.tool_name,
#             content=response_str,
#         )
#         return [message]

#     def get_name(self) -> str:
#         return "get_boiling_point"

#     def get_description(self) -> str:
#         return "Get the boiling point of imaginary liquids (eg. polyjuice)"

#     def get_params_definition(self) -> Dict[str, Parameter]:
#         return {
#             "liquid_name": Parameter(
#                 name="liquid_name",
#                 parameter_type="string",
#                 description="The name of the liquid",
#                 required=True,
#             ),
#             "celcius": Parameter(
#                 name="celcius",
#                 parameter_type="boolean",
#                 description="Whether to return the boiling point in Celcius",
#                 required=False,
#             ),
#         }

#     def run_impl(self, liquid_name: str, celcius: bool = True) -> int:
#         if liquid_name.lower() == "polyjuice":
#             if celcius:
#                 return -100
#             else:
#                 return -212
#         else:
#             return -1


In [114]:
#https://github.com/meta-llama/llama-stack/blob/main/tests/client-sdk/agents/test_agents.py#L13

from typing import Dict
from llama_stack_client.types.tool_def_param import Parameter
from llama_stack_client.types.shared.completion_message import CompletionMessage
from llama_stack_client.types import ToolResponseMessage

class WebSearchTool(ClientTool):
    def __init__(self):
        self.engine = DDGS()

    def get_name(self) -> str:
        return "web_search"

    def get_description(self) -> str:
        return "Search the web for a given query"

    def run_impl(self, query: str):
        return self.engine.text(query,max_results=5)

    def get_params_definition(self) -> Dict[str, Parameter]:
        return {
            "query": Parameter(
                name="query",
                parameter_type="string",
                description="The query to search for internet",
                required=True,
            )
        }

    def run(self, messages):
        query = None
        for message in messages:
            if isinstance(message, CompletionMessage) and message.tool_calls:
                for tool_call in message.tool_calls:
                    if "query" in tool_call.arguments:
                        query = tool_call.arguments["query"]
                        call_id = tool_call.call_id

        if query:
            search_result = self.run_impl(query)
            return [
                ToolResponseMessage(
                    call_id=tool_call.call_id,
                    role="tool",
                    content=self._format_response_for_agent(search_result),
                    tool_name=tool_call.tool_name,
                )
            ]

        return [
            ToolResponseMessage(
                call_id=tool_call.call_id,
                role="tool",
                content="No query provided.",
                tool_name=tool_call.tool_name,
            )
        ]


    # def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
    #     assert len(messages) == 1, "Expected single message"

    #     message = messages[0]

    #     tool_call = message.tool_calls[0]

    #     try:
    #         response = self.run_impl(**tool_call.arguments)
    #         response_str = json.dumps(response, ensure_ascii=False)
    #     except Exception as e:
    #         response_str = f"Error when running tool: {e}"

    #     message = ToolResponseMessage(
    #         role="tool",
    #         call_id=tool_call.call_id,
    #         tool_name=tool_call.tool_name,
    #         content=response_str,
    #     )
    #     return [message]
    def _format_response_for_agent(self, search_result):
        parsed_result = search_result
        formatted_result = "Search Results with Citations:\n\n"
        for i, result in enumerate(parsed_result):
            formatted_result += (
                f"{i}. {result.get('title', 'No Title')}\n"
                f"   URL: {result.get('href', 'No URL')}\n"
                f"   Description: {result.get('body', 'No Description')}\n\n"
            )
        return formatted_result


    

In [67]:
web_search_tool = WebSearchTool()
result = web_search_tool.run_impl('python programming')
result

[{'title': 'Welcome to Python.org',
  'href': 'https://www.python.org/',
  'body': "Python is a programming language that lets you work quickly and integrate systems more effectively. Learn More. Get Started. Whether you're new to programming or an experienced developer, it's easy to learn and use Python. Start with our Beginner's Guide. Download."},
 {'title': 'Python For Beginners | Python.org',
  'href': 'https://www.python.org/about/gettingstarted/',
  'body': 'Learn how to get started with Python, a popular and easy-to-use programming language. Find out how to install, edit, and use Python, and explore its libraries, documentation, and community resources.'},
 {'title': 'Python Tutorial - W3Schools',
  'href': 'https://www.w3schools.com/python/',
  'body': 'Python is a popular programming language. Python can be used on a server to create web applications. Start learning Python now » ...'},
 {'title': 'Python Tutorial | Learn Python Programming Language',
  'href': 'https://www.ge

In [49]:
def execute_search(query: str):
    web_search_tool = WebSearchTool()
    result = web_search_tool.run_impl(query)
    print("Search Results:", result)

execute_search(query = 'python programming')

Search Results: [{'title': 'Welcome to Python.org', 'href': 'https://www.python.org/', 'body': "Python is a programming language that lets you work quickly and integrate systems more effectively. Learn More. Get Started. Whether you're new to programming or an experienced developer, it's easy to learn and use Python. Start with our Beginner's Guide. Download."}, {'title': 'Python For Beginners | Python.org', 'href': 'https://www.python.org/about/gettingstarted/', 'body': 'Learn how to get started with Python, a popular and easy-to-use programming language. Find out how to install, edit, and use Python, and explore its libraries, documentation, and community resources.'}, {'title': 'Python Tutorial | Learn Python Programming Language', 'href': 'https://www.geeksforgeeks.org/python-programming-language-tutorial/', 'body': 'A comprehensive guide to learn Python, a popular and versatile programming language for web development, data science, AI and more. Covers Python fundamentals, data ty

In [68]:
instructions = """You are a knowledgeable and detailed assistant focused on providing comprehensive, well-structured responses. Follow these guidelines:

Response Structure:
1. Begin with a brief overview of the topic
2. Break down the response into clearly defined sections using headers
3. For each major topic, provide:
   - A clear definition
   - Key concepts and principles
   - Real-world examples or applications
   - Common misconceptions (if applicable)
   - Related topics or connections

Formatting Requirements:
- Use clear hierarchical headers for organization
- Include subsections when topics need further breakdown
- Format technical terms in italics or bold where appropriate
- Use numbered lists for sequential information
- Use bullet points for related but non-sequential items

Content Guidelines:
- Provide detailed explanations that balance depth with clarity
- Include relevant historical context when applicable
- Cite specific examples to illustrate concepts
- Address both basic and advanced aspects of the topic
- Explain technical terms and jargon
- Highlight practical applications and relevance

Additional Requirements:
- Note any areas where information might be incomplete or uncertain
- Suggest relevant follow-up topics for further learning
- When appropriate, include:
  * Formulas or equations
  * Statistics or data
  * Current developments in the field
  * Different schools of thought or approaches

If research is needed:
- Frame potential search queries clearly
- Identify key terms and concepts to investigate
- Suggest reliable sources or types of resources
- Note areas where additional research would be beneficial"""


In [115]:
import json
client_tool = WebSearchTool()

agent_config = AgentConfig(
    model=os.environ["INFERENCE_MODEL"],
    # Define instructions for the agent ( aka system prompt)
        instructions="You are a helpful assistant! If you call builtin tools like web search",    
    enable_session_persistence=False,
    sampling_params={
                "max_tokens": 2000},
    # Define tools available to the agent
    toolgroups = ["builtin::websearch"],
    client_tools = [client_tool.get_tool_definition()],
    tool_choice="auto",
    tool_prompt_format="python_list"
)

agent = Agent(client, agent_config, client_tools=(client_tool,))

# Create a session for interaction and print the session ID
session_id = agent.create_session("test2-session")
print(f"Created session_id={session_id} for Agent({agent.agent_id})")

Created session_id=b859c812-0f92-4978-b943-1ed2ed13cc44 for Agent(808277f4-82e5-46e6-9c41-ffdd1c586f54)


In [116]:
response = agent.create_turn(
        messages=[
            {
                "role": "user",
                "content": """hi top new in 30 january 2025 ,search from web""",
            }
        ],
        session_id=session_id,  # Use the created session ID
    )

for log in EventLogger().log(response):
    log.print()
    # print(log.role)
    if log.role == "inference":
        # print(log)
        print('here is Inference\n')
        print(log.content, end='')
    elif log.role =='CustomTool':
        print('CustomTool\n')   
        print(log.content, end='')


[30m[0m[33minference> [0mhere is Inference

[33m[[0m[33mweb[0m[33m_search[0m[33m(query[0m[33m="[0m[33mtop[0m[33m news[0m[33m January[0m[33m [0m[33m30[0m[33m,[0m[33m [0m[33m202[0m[33m5[0m[33m")][0m[97m[0m
[32mCustomTool> Search Results with Citations:

0. US News Today Live Updates on January 30, 2025 - Mint
   URL: https://www.livemint.com/news/us-news/latest-us-news-today-on-january-30-2025-live-updates-11738175730553.html
   Description: US News Today Live Updates on January 30, 2025: Stay informed on the latest developments and key stories shaping the United States. ... Top Gainers Top Losers. Tata Motors share price; 752.45 3.29 ...

1. January 2025 News Archive - The Wall Street Journal
   URL: https://www.wsj.com/news/archive/2025/january
   Description: WSJ's digital archive of news articles and top headlines from January 2025

2. Portal:Current events/January 2025 - Wikipedia
   URL: https://en.wikipedia.org/wiki/Portal:Current_events/Januar

In [101]:
dir(agent)


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_create_agent',
 '_has_tool_call',
 '_run_tool',
 'agent_config',
 'agent_id',
 'client',
 'client_tools',
 'create_session',
 'create_turn',
 'memory_bank_id',
 'session_id',
 'sessions']

In [102]:
agent.sessions

['41b103d4-75fa-407e-89a5-909c149754dd']

In [105]:
agent.memory_bank_id