<a href="https://colab.research.google.com/github/m-gian/agents/blob/main/Create_Agent_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U -q google-genai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.3/53.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m713.3/713.3 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m234.9/234.9 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires google-auth==2.43.0, but you have google-auth 2.47.0 which is incompatible.[0m[31m
[0m

In [None]:
import os
import json
from typing import Dict, List, Callable, Any, Literal, Optional, Union
from google import genai
from google.genai import types
from pydantic import BaseModel, Field

In [None]:
from google.colab import userdata
GEMINI_API_KEY = userdata.get('gemini_sosuke')
client = genai.Client(api_key=GEMINI_API_KEY)

In [None]:
class Tool:
    def __init__(
        self,
        name: str,
        description: str,
        input_schema: Dict[str, Any],
        output_schema: Dict[str, Any],
        func: Callable[..., Any],
    ):
        self.name = name
        self.description = description
        self.input_schema = input_schema
        self.output_schema = output_schema
        self.func = func

    def __call__(self, **kwargs):
        return self.func(**kwargs)

In [None]:
class ToolRegistry:
    def __init__(self):
        self.tools: Dict[str, Tool] = {}

    def register(self, tool: Tool):
        self.tools[tool.name] = tool

    def get(self, name: str) -> Tool:
        if name not in self.tools.keys():
            raise ValueError(f"Tool '{name}' not found")
        return self.tools[name]

    def list_tools(self) -> List[Dict[str, Any]]:
        return [
            {
                "name": tool.name,
                "description": tool.description,
                "input_schema": tool.input_schema.model_json_schema(),
            }
            for tool in self.tools.values()
        ]
    def get_tool_call_args_type(self) -> Union[BaseModel]:
        input_args_models = [tool.input_schema for tool in self.tools.values()]
        tool_call_args = Union[tuple(input_args_models)]
        return tool_call_args
    def get_tool_names(self) -> Literal[None]:
        return Literal[*self.tools.keys()]


In [None]:
def add(a: int, b: int) -> int:
    return a + b

def multiply(a: int, b: int) -> int:
    return a * b

In [None]:
registry = ToolRegistry()

class ToolAddArgs(BaseModel):
    a: int
    b: int

class ToolMultiplyArgs(BaseModel):
    a: int
    b: int

registry.register(
    Tool(
        name="add",
        description="Add two numbers",
        input_schema=ToolAddArgs,
        output_schema={"result": "int"},
        func=add,
    )
)

registry.register(
    Tool(
        name="multiply",
        description="Multiply two numbers",
        input_schema=ToolMultiplyArgs,
        output_schema={"result": "int"},
        func=multiply,
    )
)

In [None]:
registry.list_tools()

[{'name': 'add',
  'description': 'Add two numbers',
  'input_schema': {'properties': {'a': {'title': 'A', 'type': 'integer'},
    'b': {'title': 'B', 'type': 'integer'}},
   'required': ['a', 'b'],
   'title': 'ToolAddArgs',
   'type': 'object'}},
 {'name': 'multiply',
  'description': 'Multiply two numbers',
  'input_schema': {'properties': {'a': {'title': 'A', 'type': 'integer'},
    'b': {'title': 'B', 'type': 'integer'}},
   'required': ['a', 'b'],
   'title': 'ToolMultiplyArgs',
   'type': 'object'}}]

In [None]:
registry.get_tool_call_args_type()

typing.Union[__main__.ToolAddArgs, __main__.ToolMultiplyArgs]

In [None]:
registry.get_tool_names()

typing.Literal['add', 'multiply']

In [None]:
ToolNameLiteral = registry.get_tool_names()
ToolArgsUnion = registry.get_tool_call_args_type()

class ToolCall(BaseModel):
    action: Literal["tool"]
    thought: str
    tool_name: ToolNameLiteral
    args: ToolArgsUnion

class FinalAnswer(BaseModel):
    action: Literal["final"]
    answer: str

LLMResponse = Union[ToolCall, FinalAnswer]

class GeminiLLM:
    def __init__(self, client, tool_registry, model="gemini-2.5-flash"):
        self.client = client
        self.model = model
        self.tool_registry = tool_registry
        self.system_instruction = self._create_system_instruction()

    def _create_system_instruction(self) -> str:
        tools_description = json.dumps(
            self.tool_registry.list_tools(),
            indent=2
        )

        system_prompt = """
You are a conversational AI agent that can interact with external tools.

CRITICAL RULES (MUST FOLLOW):
- You are NOT allowed to perform operations internally that could be performed by an available tool.
- If a tool exists that can perform any part of the task, you MUST use that tool.
- You MUST NOT skip tools, even for simple or obvious steps.
- You MUST NOT combine multiple operations into a single step unless a tool explicitly supports it.
- You may ONLY produce a final answer when no available tool can further advance the task.

TOOL USAGE RULES:
- Each tool call must perform exactly ONE meaningful operation.
- If the task requires multiple operations, you MUST call tools sequentially.
- If multiple tools could apply, choose the most specific one.

RESPONSE FORMAT (STRICT):
- You MUST respond ONLY in valid JSON.
- Never include explanations outside JSON.
- You must choose exactly one action per response.

Tool call format:
{
  "action": "tool",
  "thought": "...",
  "tool_name": "...",
  "inputs": { ... }
}

Final answer format:
{
  "action": "final",
  "answer": "..."
}""" + "\n\nAvailable tools with description:\n" + tools_description
        return system_prompt

    def _format_gemini_chat_history(self, history: list[dict]) -> list:
        formatted_history = []
        for message in history:
            if message["role"] == "user":
                formatted_history.append(types.Content(
                        role="user",
                        parts=[
                            types.Part.from_text(text=message["content"])
                        ]
                    )
                )
            if message["role"] == "assistant":
                formatted_history.append(types.Content(
                        role="model",
                        parts=[
                            types.Part.from_text(text=message["content"])
                        ]
                    )
                )
            if message["role"] == "tool":
                formatted_history.append(types.Content(
                        role="tool",
                        parts=[
                            types.Part.from_function_response(
                                name=message["tool_name"],
                                response={'result': message["tool_response"]},
                            )
                        ]
                    )
                )
        return formatted_history


    def generate(self, history: list[dict]) -> str:
        gemini_history_format = self._format_gemini_chat_history(history)
        response = self.client.models.generate_content(
            model=self.model,
            contents=gemini_history_format,
            config=types.GenerateContentConfig(
                temperature=0,
                response_mime_type="application/json",
                response_schema=LLMResponse,
                system_instruction=self.system_instruction,
                automatic_function_calling=types.AutomaticFunctionCallingConfig(disable=True)
            ),
        )
        return response.text


In [None]:
class Agent:
    def __init__(self, llm, tool_registry, max_steps=5):
        self.llm = llm
        self.tool_registry = tool_registry
        self.history = []
        self.max_steps = max_steps

    def run(self, user_input: str):
        self.history.append({"role": "user", "content": user_input})

        for step in range(self.max_steps):
            llm_output = self.llm.generate(self.history)
            action = json.loads(llm_output)

            if action["action"] == "tool":
                #print(action["thought"])
                self.history.append(
                    {"role": "assistant", "content": llm_output}
                )
                tool = self.tool_registry.get(action["tool_name"])
                result = tool(**action["args"])

                observation = f"Tool {tool.name} returned: {result}"
                self.history.append(
                    {"role": "tool", "tool_name": tool.name, "tool_response": result}
                )
                continue

            if action["action"] == "final":
                self.history.append(
                    {"role": "assistant", "content": llm_output}
                )
                #print(action["answer"])
                return action["answer"]
        raise RuntimeError("Agent did not terminate within max_steps")


In [None]:
llm = GeminiLLM(client, registry)
agent = Agent(llm, registry)

In [None]:
print(llm.system_instruction)


You are a conversational AI agent that can interact with external tools.

CRITICAL RULES (MUST FOLLOW):
- You are NOT allowed to perform operations internally that could be performed by an available tool.
- If a tool exists that can perform any part of the task, you MUST use that tool.
- You MUST NOT skip tools, even for simple or obvious steps.
- You MUST NOT combine multiple operations into a single step unless a tool explicitly supports it.
- You may ONLY produce a final answer when no available tool can further advance the task.

TOOL USAGE RULES:
- Each tool call must perform exactly ONE meaningful operation.
- If the task requires multiple operations, you MUST call tools sequentially.
- If multiple tools could apply, choose the most specific one.

RESPONSE FORMAT (STRICT):
- You MUST respond ONLY in valid JSON.
- Never include explanations outside JSON.
- You must choose exactly one action per response.

Tool call format:
{
  "action": "tool",
  "thought": "...",
  "tool_name": 

In [None]:
agent.run("Hi! Can you add 10 and 32?")

42

In [None]:
agent.run("Now multiply the result by 2")

In [None]:
def chat_with_agent(agent: Agent):
    print("Welcome! Type 'exit' to quit.\n")

    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit", "q"]:
            print("Goodbye!")
            break

        try:
            response = agent.run(user_input)
            print(f"Agent: {response}")
        except RuntimeError as e:
            print(f"Agent error: {e}")
        except Exception as e:
            print(f"Unexpected error: {e}")

In [None]:
chat_with_agent(agent)

Welcome! Type 'exit' to quit.

You: can you solve this equation step by step: x/3-2=3
Agent: x = 15
You: q
Goodbye!


In [None]:
agent.history

[{'role': 'user',
  'content': 'can you solve this equation step by step: x/3-2=3'},
 {'role': 'assistant',
  'content': '{\n  "action": "tool",\n  "thought": "The first step to solve the equation x/3 - 2 = 3 is to add 2 to both sides. I will use the \'add\' tool to calculate 3 + 2.",\n  "tool_name": "add",\n  "args": {\n    "a": 3,\n    "b": 2\n  }\n}'},
 {'role': 'tool', 'tool_name': 'add', 'tool_response': 5},
 {'role': 'assistant',
  'content': '{\n  "action": "tool",\n  "thought": "After adding 2 to both sides, the equation becomes x/3 = 5. The next step is to multiply both sides by 3 to solve for x. I will use the \'multiply\' tool to calculate 5 * 3.",\n  "tool_name": "multiply",\n  "args": {\n    "a": 5,\n    "b": 3\n  }\n}'},
 {'role': 'tool', 'tool_name': 'multiply', 'tool_response': 15},
 {'role': 'assistant',
  'content': '{\n  "action": "final",\n  "answer": "x = 15"\n}'}]

In [None]:
r = llm.generate([{'role': 'user', 'content': 'hi, Can you add 10 and 32?'},
 {'role': 'assistant',
  'content': '{\n  "action": "tool",\n  "thought": "The user wants to add two numbers, 10 and 32. I should use the \'add\' tool for this.",\n  "tool_name": "add",\n  "args": {\n    "a": 10,\n    "b": 32\n  }\n}'},
 {'role': 'tool', 'tool_name': 'add', 'tool_response': 42},
 {'role': 'assistant',
  'content': '{\n  "action": "final",\n  "answer": "42"\n}'},
 {'role': 'user', 'content': 'now multiply that by 2'}])

In [None]:
r