In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
query = "What is Alita? How does it work?"

In [3]:
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from dotenv import load_dotenv, find_dotenv
import os

_ = load_dotenv(find_dotenv())  # read local .env file

alita_index = LlamaCloudIndex(
  name="alita-index",
  project_name="Default",
  organization_id="bf9b425c-54cb-4182-a93f-8ac6aed04348",
  api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
)

nodes = alita_index.as_retriever().retrieve(query)

In [4]:
nodes

[NodeWithScore(node=TextNode(id_='2c753523-8829-4ad7-b85e-b648f422f5ae', embedding=None, metadata={'id': 'alita_paper.pdf', 'file_size': 1113373, 'last_modified_at': '2025-08-24T05:14:27', 'file_path': 'alita_paper.pdf', 'file_name': 'alita_paper.pdf', 'external_file_id': 'alita_paper.pdf', 'file_id': 'bcee7eb9-6479-46a6-9122-fd5b454699ef', 'pipeline_file_id': 'd149ed00-0d38-4815-a641-d388a45063aa', 'pipeline_id': 'e4c3c794-6d8a-450a-b3b3-2d1dee470c2e', 'page_label': 1, 'start_page_index': 0, 'start_page_label': 1, 'end_page_index': 0, 'end_page_label': 1, 'document_id': 'c9f76edb0a0d2f097f23ba53edef10f99a6924dfccb183a929', 'start_char_idx': 1, 'end_char_idx': 2846}, excluded_embed_metadata_keys=['file_id', 'pipeline_file_id', 'id', 'file_size', 'last_modified_at', 'file_id', 'pipeline_file_id', 'id', 'file_size', 'last_modified_at', 'start_page_index', 'start_page_label', 'page_label', 'end_page_index', 'end_page_label', 'document_id', 'file_id', 'pipeline_file_id'], excluded_llm_meta

In [10]:
from llama_index.llms.ollama import Ollama
from langsmith import traceable
from IPython.display import display, Markdown

llm = Ollama(model="gpt-oss:20b", temperature=0)
alita_query_engine = alita_index.as_query_engine(llm=llm)

@traceable(type="tool", name="alita")
def alita_knowledge_base(query: str):
    """For traceability of the alita RAG engine"""
    return alita_query_engine.query(query)
    
# response = alita_query_engine.query(query)
response = alita_knowledge_base(query=query)

display(Markdown(f"**Response:** {response}"))

**Response:** **Alita** is a general‑purpose AI agent that focuses on two core ideas:  
1. **Minimal predefinition** – it starts with only a handful of built‑in components (a web‑search agent and a manager that orchestrates everything).  
2. **Maximal self‑evolution** – it can create, refine, and reuse its own tools on the fly by generating *Model Context Protocols* (MCPs) from open‑source resources.

---

### How it works

1. **Question intake** – The manager receives a user query and builds an augmented prompt.  
2. **Iterative reasoning** – Using a Code‑ReAct style loop, the manager analyses the task, decomposes it into subtasks, and decides whether new tools are needed.  
3. **Tool discovery & creation**  
   * The web agent searches the internet for relevant libraries or code snippets.  
   * The manager synthesizes these findings into executable scripts.  
   * Scripts that prove useful are wrapped into MCPs, stored in an internal “MCP Box,” and can be invoked later.  
4. **Execution** – Generated tools run in isolated virtual environments; any errors are fed back to the manager for correction.  
5. **Output** – Once all subtasks are resolved, the manager aggregates the results and returns the final answer.

Through this cycle, Alita continually expands its own capability set without relying on a large, hand‑crafted toolbox, enabling it to tackle a wide range of tasks with high adaptability and scalability.

In [6]:
mcp_zero_index = LlamaCloudIndex(
  name="mcp-zero-index",
  project_name="Default",
  organization_id="bf9b425c-54cb-4182-a93f-8ac6aed04348",
  api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
)

In [None]:
from llama_cloud import CompositeRetrievalMode
from llama_index.indices.managed.llama_cloud import (
    LlamaCloudCompositeRetriever,
)

retriever = LlamaCloudCompositeRetriever(
    name="Alita and MCP Zero Retriever",
    api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
    create_if_not_exists=True,
    mode=CompositeRetrievalMode.FULL,
    rerank_top_n=6,
)
retriever.add_index(
    alita_index, description="Knowledge base for the Alita paradigm for agents"
)
retriever.add_index(
    mcp_zero_index, description="Knowledge base of the (model context protocol) MCP zero paradigm"
)

Retriever(name='Alita and MCP Zero Retriever', pipelines=[RetrieverPipeline(name='alita-paper', description='Knowledge base for the Alita paradigm for agents', pipeline_id='6e287db8-a658-48c2-837f-1e13c85edc84', preset_retrieval_parameters=PresetRetrievalParams(dense_similarity_top_k=30, dense_similarity_cutoff=0.0, sparse_similarity_top_k=30, enable_reranking=True, rerank_top_n=6, alpha=0.5, search_filters=None, search_filters_inference_schema=None, files_top_k=1, retrieval_mode=<RetrievalMode.CHUNKS: 'chunks'>, retrieve_image_nodes=False, retrieve_page_screenshot_nodes=False, retrieve_page_figure_nodes=False, class_name='base_component')), RetrieverPipeline(name='mcp-zero-paper', description='Knowledge base of the (model context protocol) MCP zero paradigm', pipeline_id='1d5c48e0-9849-49a6-a59d-0af4eb09f794', preset_retrieval_parameters=PresetRetrievalParams(dense_similarity_top_k=30, dense_similarity_cutoff=0.0, sparse_similarity_top_k=30, enable_reranking=True, rerank_top_n=6, alph

In [8]:
%%time
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

response_synthesizer = get_response_synthesizer(llm=llm)

query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

response = query_engine.query(
    "What is Alita and what is MCP Zero? Can Alita and MCP zero work together?"
)

CPU times: user 21.7 ms, sys: 5.01 ms, total: 26.7 ms
Wall time: 32.8 s


In [9]:
display(Markdown(f"**Response:** {response}"))

**Response:** **Alita** is a general‑purpose agent that focuses on minimal pre‑defined tooling.  
It can generate its own MCPs (Model‑Context Protocols) on the fly, allowing it to add new capabilities during a task and to share those MCPs with other agents. The agent’s design emphasizes self‑evolution and low upfront complexity while still achieving strong performance on a wide range of problems.

**MCP Zero** is a technique for efficient, on‑demand tool discovery.  
It asks the language model to explicitly request the tool it needs, then matches that request against a lightweight, semantically indexed collection of existing MCPs. The approach cuts prompt length dramatically, keeps retrieval accuracy high even when the tool pool is large, and maintains consistency across multi‑turn interactions.

**Combining the two** is natural.  
MCP Zero can first search the existing MCP collection and supply the best match to Alita. If no suitable MCP is found, Alita can invoke its tool‑creation workflow to build a new MCP, register it, and then MCP Zero can retrieve it in subsequent steps. This creates a virtuous loop where the agent actively discovers and, when necessary, creates the tools it needs, leading to a self‑sustaining, cost‑aware agent ecosystem.

## Test Llama 3.1

In [11]:
import os
from dotenv import load_dotenv, find_dotenv
from llama_index.llms.openai_like import OpenAILike
_ = load_dotenv(find_dotenv())

llm = OpenAILike(
    model="meta-llama/Meta-Llama-3.1-8B-Instruct",
    api_key=os.getenv("BENTO_CLOUD_API_KEY"),
    api_base=f'{os.getenv("llama3_endpoint_url")}/v1',
    is_chat_model=True,
    is_function_calling_model=True,
    temperature=0,
    timeout=600,
)

In [12]:
llm.complete("Hi!")

CompletionResponse(text="It's nice to meet you. Is there something I can help you with or would you like to chat?", additional_kwargs={'prompt_tokens': 37, 'completion_tokens': 23, 'total_tokens': 60}, raw=ChatCompletion(id='chatcmpl-9a5b68fc0cdcecf38ca3f2774e120f6d', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="It's nice to meet you. Is there something I can help you with or would you like to chat?", refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[], reasoning_content=None), stop_reason=None)], created=1756476633, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=23, prompt_tokens=37, total_tokens=60, completion_tokens_details=None, prompt_tokens_details=None), prompt_logprobs=None, kv_transfer_params=None), logprobs=None, delta=None)