In [1]:
import os
import asyncio
import json
import nest_asyncio
import dotenv
dotenv.load_dotenv()



GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
GOOGLE_SEARCH_KEY = os.getenv('GOOGLE_SEARCH_KEY')
GOOGLE_SEARCH_ENGINE = os.getenv('GOOGLE_SEARCH_ENGINE')

nest_asyncio.apply()

In [2]:
from llama_index.embeddings.cohere import CohereEmbedding
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core import VectorStoreIndex
import google.genai.types as types

config = types.GenerateContentConfig(
    thinking_config=types.ThinkingConfig(thinking_budget=0),
    max_output_tokens=1024,
    temperature=1,
)

llm = GoogleGenAI(
    model="gemini-2.5-flash",
    generation_config=config,
    )

Settings.embed_model = CohereEmbedding(
    model_name="embed-english-v3.0",
    input_type="search_document",
    api_key=COHERE_API_KEY
)
Settings.llm = llm
Settings.text_splitter = SentenceSplitter(chunk_size=300, chunk_overlap=50)

In [3]:
from llama_index.core.agent.workflow import ReActAgent
from llama_index.core.workflow import Context

# define sample Tool
def multiply(a: int, b: int) -> int:
    """Multiply two integers and returns the result integer"""
    return a * b

# initialize ReAct agent
agent = ReActAgent(tools=[multiply], verbose=True)

# Create a context to store the conversation history/session state
ctx = Context(agent)

In [4]:
agent

ReActAgent(name='Agent', description='An agent that can perform a task', system_prompt=None, tools=[<llama_index.core.tools.function_tool.FunctionTool object at 0x000001FEC59F09E0>], tool_retriever=None, can_handoff_to=None, llm=GoogleGenAI(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x000001FEDAC07380>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x000001FED492ED40>, completion_to_prompt=<function default_completion_to_prompt at 0x000001FED5C8A700>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='gemini-2.5-flash', temperature=0.1, context_window=None, max_retries=3, is_function_calling_model=True, cached_content=None, built_in_tool=None), initial_state={}, state_prompt='Current state:\n{state}\n\nCurrent message:\n{msg}\n', output_cls=None, structured_output_fn=None, streaming=True, reasoning_key='current_reasoning', output_parser=<llama_index.core.agent.re

In [5]:
from llama_index.core.agent.workflow import AgentStream, ToolCallResult

handler = agent.run("What is the multiplication of 43 and 45?", ctx=ctx)

async for ev in handler.stream_events():
    # if isinstance(ev, ToolCallResult):
    #     print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler

Thought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: multiply
Action Input: {"a": 43, "b": 45}Thought: I can answer without using any more tools. I'll use the user's language to answer
Answer: The multiplication of 43 and 45 is 1935.

In [6]:
print(str(response))

The multiplication of 43 and 45 is 1935.


## Define Google Search Tool


In [6]:
from llama_index.tools.google import GoogleSearchToolSpec # Get this from llamaHUb

tool_spec = GoogleSearchToolSpec(key=GOOGLE_SEARCH_KEY, engine=GOOGLE_SEARCH_ENGINE)

In [7]:
import os
from llama_index.tools.google import GoogleSearchToolSpec


try:
    print("Initializing Google Search Tool...")
    tool_spec = GoogleSearchToolSpec(key=GOOGLE_SEARCH_KEY, engine=GOOGLE_SEARCH_ENGINE)
    
    print("Sending request to Google...")
    results = tool_spec.google_search(query="Llama 4 model parameters")
    
    if results:
        print("\n✅ SUCCESS! Google returned data.")
        print("-" * 30)
        # Since 'results' is a list of strings, we just print the first one directly
        print(f"First Result: {results[0]}") 
        print("-" * 30)
    else:
        print("\n⚠️ Connection worked, but 0 results found.")

except Exception as e:
    print(f"\n❌ FAILURE: {e}")

Initializing Google Search Tool...
Sending request to Google...

✅ SUCCESS! Google returned data.
------------------------------
First Result: N
------------------------------


In [8]:
# Import and initialize our tool spec
from llama_index.core.tools.tool_spec.load_and_search import LoadAndSearchToolSpec

# Wrap the google search tool to create an index on top of the returned Google search
wrapped_search_tool = LoadAndSearchToolSpec.from_defaults(
    tool_spec.to_tool_list()[0],
).to_tool_list()

In [9]:
from llama_index.core.agent.workflow import FunctionAgent

# System prompt encouraging tool usage
system_prompt = """You are a helpful assistant that can search the web for current information.
When you don't have information about recent events or models released after your knowledge cutoff,
use the available search tools to find accurate, up-to-date information."""

# Create agent with proper configuration
search_agent = FunctionAgent(
    tools=wrapped_search_tool,
    llm=Settings.llm,
    system_prompt=system_prompt,
    verbose=True
)

ctx = Context(search_agent)

handler = search_agent.run("How many parameters LLaMA 4 model has? List the models with parameters", ctx=ctx)

async for ev in handler.stream_events():
    if isinstance(ev, ToolCallResult):
        print(f"\nCall {ev.tool_name} with {ev.tool_kwargs}\nReturned: {ev.tool_output}")
    if isinstance(ev, AgentStream):
        print(f"{ev.delta}", end="", flush=True)

response = await handler


Call google_search with {'query': 'LLaMA 4 model parameters'}
Returned: Content loaded! You can now search the information using read_google_search

Call read_google_search with {'query': 'LLaMA 4 model parameters'}
Returned: headers: {'access-control-expose-headers': 'X-Debug-Trace-ID', 'cache-control': 'no-cache, no-store, no-transform, must-revalidate, private, max-age=0', 'content-encoding': 'gzip', 'content-type': 'application/json', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'pragma': 'no-cache', 'vary': 'Origin,Accept-Encoding', 'x-accel-expires': '0', 'x-debug-trace-id': '6198dc641d686b394bbb7a65b5323e6f', 'date': 'Fri, 02 Jan 2026 12:28:49 GMT', 'x-envoy-upstream-service-time': '18', 'server': 'envoy', 'via': '1.1 google', 'alt-svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'transfer-encoding': 'chunked'}, status_code: 429, body: {'id': '56b24b7e-324b-41f4-8539-816b1f6b4e63', 'message': 'Please wait and try again later'}
I am sorry, but I cannot provide you with the

# Need To Find The Solution

In [10]:
from llama_index.tools.google import GoogleSearchToolSpec

tool_spec = GoogleSearchToolSpec(key=GOOGLE_SEARCH_KEY, engine=GOOGLE_SEARCH_ENGINE)

In [13]:
search_results = tool_spec.google_search("LLaMA 4 model details")

search_results

'No search results available'

In [12]:
search_results

'No search results available'