In [1]:
# 1) Install
!pip -q install -U langgraph langchain-core transformers accelerate

import json
import ast
import torch
from typing import TypedDict, Annotated, Sequence

from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, BaseMessage
from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/484.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m348.2/484.9 kB[0m [31m10.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m481.3/484.9 kB[0m [31m9.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# 2) Load a small Hugging Face model
model_id = "Qwen/Qwen2.5-0.5B-Instruct"  # small + decent in Colab
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto" if torch.cuda.is_available() else None,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

In [3]:
def llm(messages, max_new_tokens=200):
    # Convert messages into HF chat format
    hf_msgs = []
    for m in messages:
        if isinstance(m, HumanMessage):
            hf_msgs.append({"role": "user", "content": m.content})
        elif isinstance(m, AIMessage):
            hf_msgs.append({"role": "assistant", "content": m.content})
        elif isinstance(m, ToolMessage):
            hf_msgs.append({"role": "user", "content": f"Tool result: {m.content}"})

    prompt = tokenizer.apply_chat_template(hf_msgs, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.2,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id,
    )

    gen = out[0][inputs["input_ids"].shape[-1]:]
    return tokenizer.decode(gen, skip_special_tokens=True).strip()

In [4]:
# 3) A very simple calculator tool (safe eval)
def calculator(expr: str) -> str:
    allowed = (ast.Expression, ast.BinOp, ast.UnaryOp, ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Pow,
               ast.USub, ast.UAdd, ast.Constant)
    tree = ast.parse(expr, mode="eval")
    for node in ast.walk(tree):
        if not isinstance(node, allowed):
            return "Error: invalid math expression"
    try:
        val = eval(compile(tree, "<expr>", "eval"), {"__builtins__": {}}, {})
        return str(val)
    except Exception as e:
        return f"Error: {e}"

In [5]:
# 4) Tell the model to output ONLY JSON (tool call or final answer)
SYSTEM = """You must output ONLY one JSON object.

If you need math, use:
{"tool":"calculator","args":{"expression":"2+2"}}

Otherwise finish with:
{"tool":"final","args":{"answer":"..."}}
"""

In [6]:
# 5) LangGraph state
class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]

def agent_node(state: State):
    msgs = [HumanMessage(content=SYSTEM)] + list(state["messages"])
    out = llm(msgs)
    return {"messages": [AIMessage(content=out)]}

def tool_node(state: State):
    last = state["messages"][-1]
    try:
        req = json.loads(last.content)
    except Exception:
        # If model didn't follow JSON, just stop
        return {}

    if req.get("tool") != "calculator":
        return {}

    expr = req.get("args", {}).get("expression", "")
    result = calculator(expr)
    return {"messages": [ToolMessage(content=result, name="calculator", tool_call_id="1")]}

def router(state: State):
    last = state["messages"][-1]
    try:
        req = json.loads(last.content)
    except Exception:
        return "end"
    return "tools" if req.get("tool") == "calculator" else "end"


In [7]:
# 6) Build graph: agent -> (maybe tools) -> agent -> end
g = StateGraph(State)
g.add_node("agent", agent_node)
g.add_node("tools", tool_node)
g.set_entry_point("agent")
g.add_conditional_edges("agent", router, {"tools": "tools", "end": END})
g.add_edge("tools", "agent")
app = g.compile()

In [8]:
# 7) Run it
def run(question: str):
    out = app.invoke({"messages": [HumanMessage(content=question)]})
    print("\n--- Messages ---")
    for m in out["messages"]:
        print(f"\n[{m.__class__.__name__}]\n{m.content}")

run("What is (87*12)/3 ?")


--- Messages ---

[HumanMessage]
What is (87*12)/3 ?

[AIMessage]
{"tool":"calculator","args":{"expression":"87*12/3"}}

[ToolMessage]
348.0

[AIMessage]
{"tool":"final","args":{"answer":"348.0"}}
