In [15]:
# Creating a calculator and a knowledge base 

def calculator(expression):
    try:
        return str(eval(expression))
    except Exception as e:
        return f"Error: {e}"

knowledge_base = {
    "LangChain": "A framework for building LLM-powered agents.",
    "RAG": "Retrieval-Augmented Generation, which uses external knowledge to improve answers."
}

def lookup(term):
    return knowledge_base.get(term, "Term not found in knowledge base.")

In [16]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
# ReAct Framework Implementation: It stands for reasoning and acting.

def react_agent(query, max_turns = 5):
    # Initialize conversation with system prompt
    conversation = [
        {"role":"system","content":"You are a ReAct agent. You must think step by step and take actions when needed. Format:\nThought: ...\nAction: tool[input]\nObservation: ...\nFinal Answer: ..."}
    ]
    conversation.append({"role":"user","content":query})

    for i in range(max_turns):
        llm = HuggingFaceEndpoint(
            repo_id="meta-llama/Llama-3.2-1B-Instruct",
            task="text-generation",
            temperature=0)
        chat = ChatHuggingFace(llm=llm)
        resp = chat.invoke(conversation)
        print(resp.content)

        if "Action" in resp.content:
            if "calculator[" in resp.content:
                expr = resp.content.split("calculator[")[1].split("]")[0]
                obs = calculator(expr)
            
            elif "lookup[" in resp.content:
                term = resp.content.split("lookup[")[1].split("]")[0]
                obs = lookup(term)

            else:
                obs = "Unknown action."

            conversation.append({"role":"assistant","content":resp.content})
            conversation.append({"role":"user","content":f"Observation: {obs}"})

        else:  
            break
    print(conversation)


In [22]:
react_agent("What is (15*3) + (120/6)?")

Thought: First, I need to follow the order of operations (PEMDAS), which means parentheses first, then multiplication and division from left to right, and finally addition and subtraction from left to right.

Observation: The expression inside the parentheses is (15*3), which equals 45. The expression inside the second parentheses is (120/6), which equals 20.

Action: Now, I will add the results of the two expressions together: 45 + 20 = 65.

Final Answer: 65
Thought: Since we know the correct order of operations and the calculations, we can redo the calculations without the unknown action in this case.

Action: Now, I will add the results of the two expressions together: 15*3 + 120/6 = 45 + 20 = 65.

Final Answer: 65
Thought: Since the unknown action is still present, I will assume it's asking me to simplify the fraction (120/6) first.

Action: To simplify the fraction (120/6), I will divide both the numerator and the denominator by their greatest common divisor, which is 6. This resu

There is a small issue here, the model is not even making the tool calls and for each of them it is showing observation is unkown action. And the model is not stopping if it has already has the answer. It's maximum turn are 5, but that does not mean it should use 5 turns. 

In [None]:
# Running the model locally instead of APIs [It is not working properly]
from langchain_huggingface.llms import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    task="text-generation",
    pipeline_kwargs={
        "max_new_tokens": 30,
        "do_sample": False,
        "temperature": 0.0
    }
)

prompt = """<|system|>
You are a helpful assistant.
<|user|>
What is the capital of France?
<|assistant|>
"""

hf.invoke(prompt)


Device set to use cpu
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


KeyboardInterrupt: 

In [13]:
# Use a pipeline as a high-level helper to run locally
from transformers import pipeline

pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B-Instruct")
messages = [
    {"role": "user", "content": "What is the capital of India?"},
]
pipe("What is the capital of India?")

Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[{'generated_text': 'What is the capital of India? New Delhi is the capital of India.\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the capital of India?\nAnswer: New Delhi\nWhat is the c

In [20]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

model_id = "meta-llama/Llama-3.2-1B-Instruct"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
pipe("What is the capital of India?")


: 

Running llm models using Lanchain_Ollama since langchain_huggingface local models are taking a lot of time

In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="llama3.2:1b")

response = llm.invoke("What is the capital of India?")
response

AIMessage(content='The capital of India is New Delhi.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2026-01-23T18:24:35.7426482Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7877174800, 'load_duration': 614187800, 'prompt_eval_count': 32, 'prompt_eval_duration': 6065146600, 'eval_count': 9, 'eval_duration': 1070526800, 'logprobs': None, 'model_name': 'llama3.2:1b', 'model_provider': 'ollama'}, id='lc_run--019bec19-ba18-7493-b845-861cbeb6c046-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 32, 'output_tokens': 9, 'total_tokens': 41})

In [2]:
llm.invoke("What is the capital of France?")

AIMessage(content='The capital of France is Paris.', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2026-01-23T18:25:08.2780849Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2306576700, 'load_duration': 576008400, 'prompt_eval_count': 32, 'prompt_eval_duration': 1205861600, 'eval_count': 8, 'eval_duration': 461577400, 'logprobs': None, 'model_name': 'llama3.2:1b', 'model_provider': 'ollama'}, id='lc_run--019bec1a-4f87-7233-9045-b69754018545-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 32, 'output_tokens': 8, 'total_tokens': 40})

In [8]:
from langchain.tools import tool

@tool
def calculator(expression):
    """Do math calculations."""
    try:
        return str(eval(expression))
    except Exception as e:
        return f"Error: {e}"

knowledge_base = {
    "LangChain": "A framework for building LLM-powered agents.",
    "RAG": "Retrieval-Augmented Generation, which uses external knowledge to improve answers.",
    "CrewAI": "A framework for multi-agent orchestration."
}

@tool
def lookup(term):
    """Look up terms in knowledge base."""
    return knowledge_base.get(term, "Term not found in knowledge base.")


In [35]:
from langchain.agents import create_agent

llm = ChatOllama(model="llama3.2:1b", temeprature=0)

agent = create_agent(
    tools=[calculator, lookup],
    model=llm,
    system_prompt= "You are a helpful assistant, Just give final answer using calculator and lookup tools.")

In [46]:
response = agent.invoke({"messages": [{"role": "user", "content": "What is 25*3 + 10?"}]})

In [48]:
response['messages']

[HumanMessage(content='What is 25*3 + 10?', additional_kwargs={}, response_metadata={}, id='ae930565-c612-4787-8f4c-11f39fea74b8'),
 AIMessage(content='{"name": "calculator", "parameters": {"function": "Do math calculations.", "expression": "{\\"25\\*3\\": {\\"value\\": 75, \\"operator\\": \'+\'}"}}}\n{"name": "lookup", "parameters": {"object": "{\\"25\\*3\\": {\\"term\\": \\"{\\"25\\*\\"3\\":"}","10\\": {\\"term\\": \\"{10\\":"}},"true": {}}}}', additional_kwargs={}, response_metadata={'model': 'llama3.2:1b', 'created_at': '2026-01-23T18:47:01.338134Z', 'done': True, 'done_reason': 'stop', 'total_duration': 11745782600, 'load_duration': 760238300, 'prompt_eval_count': 194, 'prompt_eval_duration': 4316455800, 'eval_count': 92, 'eval_duration': 6135804500, 'logprobs': None, 'model_name': 'llama3.2:1b', 'model_provider': 'ollama'}, id='lc_run--019bec2e-33a0-7ec3-b7be-76dc16c6bcc6-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 194, 'output_tokens': 92, 'total_to

In [50]:
llm.invoke("How to make a omlete?")

AIMessage(content="Making an omelette is a simple process that requires just a few ingredients and some basic cooking skills. Here's a step-by-step guide:\n\nIngredients:\n\n* 2 eggs\n* Salt and pepper to taste\n* 1 tablespoon of butter or oil\n* Optional fillings (choose one or more):\n\t+ Chopped vegetables (e.g., bell peppers, onions, mushrooms)\n\t+ Diced ham or bacon\n\t+ Shredded cheese (e.g., cheddar, mozzarella)\n\t+ Fresh herbs (e.g., parsley, basil)\n\nInstructions:\n\n1. Crack the eggs into a bowl and whisk them together with a pinch of salt and pepper until the whites and yolks are fully incorporated.\n2. Heat the butter or oil in a medium-sized non-stick pan over medium heat.\n3. Pour the egg mixture into the pan and let it cook for about 30 seconds, until the edges start to set.\n4. Use a spatula to gently lift and fold the edges of the omelette towards the center, allowing uncooked egg to flow to the edges.\n5. Once the eggs are almost set (about 1-2 minutes), add your d