# Tools and Agents

In [1]:
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper

from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings, OllamaLLM
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq

import subprocess
# subprocess.run(["ollama",'pull',"gemma3:270m"],check=True)
subprocess.run(["ollama",'pull',"embeddinggemma:300m"],check=True)

import os
from dotenv import load_dotenv

os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")

load_dotenv()

USER_AGENT environment variable not set, consider setting it to identify your requests.


True

In [2]:
wiki_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki = WikipediaQueryRun(api_wrapper=wiki_wrapper)
wiki.name

'wikipedia'

In [3]:
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)
arxiv.name

'arxiv'

In [4]:
tools = [wiki, arxiv]
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\rahul\\anaconda3\\envs\\genai\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=200)),
 ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=200))]

In [5]:
# Create Custom Tool
docs = WebBaseLoader("https://docs.langchain.com/langsmith").load()
splits = RecursiveCharacterTextSplitter(chunk_size = 400, chunk_overlap = 100).split_documents(documents=docs)
embeddings = OllamaEmbeddings(model="embeddinggemma:300m")
faiss = FAISS.from_documents(documents=docs, embedding=embeddings)
retriever = faiss.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000022A67F551C0>, search_kwargs={})

In [6]:
from langchain.tools.retriever import create_retriever_tool
retriever_tool = create_retriever_tool(retriever=retriever, name='langsmith-search',description='Get information about langsmith')
retriever_tool.name

'langsmith-search'

In [7]:
# # Initilaise the model

# llm = OllamaLLM(model='gemma3:270m')
# parser = StrOutputParser()

# chain = llm | parser
# chain.invoke("Explain in simple terms why do we use feedforward networks in transformers")

In [8]:
from langchain import hub
# prompt = hub.pull("hwchase17/openai-functions-agent")
# prompt.messages

from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

# This mirrors the "openai-functions-agent" style prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the provided tools to answer the user’s questions."),
    MessagesPlaceholder(variable_name="chat_history"),  # keeps track of history
    ("user", "{input}"),                                # new user input
    MessagesPlaceholder(variable_name="agent_scratchpad"),  # for reasoning/tool use
])

prompt.messages


[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful AI assistant. Use the provided tools to answer the user’s questions.'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history'),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [15]:
## Agents

from langchain.agents import create_openai_tools_agent
llm = ChatGroq(model='gemma2-9b-it', streaming=True)
agent = create_openai_tools_agent(llm, tools, prompt)
agent

RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChun

In [16]:
from langchain.agents import AgentExecutor
agent_executor = AgentExecutor(agent=agent,tools= tools, verbose=True)
agent_executor.invoke({"input":"tell me about langsmith",
                       "chat_history": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'LangSmith'}`


[0m[36;1m[1;3mPage: LangChain
Summary: LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain[0m[32;1m[1;3mLangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain  provides tools and components that simplify the process of using LLMs in various contexts, such as chatbot development, question answering, text summarization, and more.[0m

[1m> Finished chain.[0m


{'input': 'tell me about langsmith',
 'chat_history': [],
 'output': 'LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain  provides tools and components that simplify the process of using LLMs in various contexts, such as chatbot development, question answering, text summarization, and more.'}

In [19]:
agent_executor.invoke({"input":"explain this arxiv paper 1706.03762",
                       "chat_history": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `arxiv` with `{'query': '1706.03762'}`


[0m[33;1m[1;3mPublished: 2023-08-02
Title: Attention Is All You Need
Authors: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin
Summary: The do[0m[32;1m[1;3m
Invoking: `arxiv` with `{'query': '1706.03762'}`


[0m[33;1m[1;3mPublished: 2023-08-02
Title: Attention Is All You Need
Authors: Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin
Summary: The do[0m[32;1m[1;3mThis paper introduces the Transformer, a novel neural network architecture based solely on attention mechanisms.  It eliminates the need for recurrent or convolutional layers, which are traditionally used in sequence modeling tasks like machine translation. The Transformer's architecture allows for parallel computation, making it more efficient and scalable than pr

{'input': 'explain this arxiv paper 1706.03762',
 'chat_history': [],
 'output': "This paper introduces the Transformer, a novel neural network architecture based solely on attention mechanisms.  It eliminates the need for recurrent or convolutional layers, which are traditionally used in sequence modeling tasks like machine translation. The Transformer's architecture allows for parallel computation, making it more efficient and scalable than previous models. \n\n\nThe paper demonstrates the Transformer's effectiveness on a variety of machine translation tasks, achieving state-of-the-art results. It also provides insights into the attention mechanism and its role in capturing long-range dependencies in sequences. \n\n\nThe Transformer has since become a foundational building block for many natural language processing (NLP) models, including BERT, GPT, and T5.\n"}

In [20]:
agent_executor.invoke({"input":"what was my first question about?",
                       "chat_history": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'my first question'}`


[0m[36;1m[1;3mPage: Question
Summary: A question is an utterance which serves as a request for information. Questions are sometimes distinguished from interrogatives, which are the grammatical forms, typically used[0m[32;1m[1;3mPlease provide me with the full text returned by the tool. 


[0m

[1m> Finished chain.[0m


{'input': 'what was my first question about?',
 'chat_history': [],
 'output': 'Please provide me with the full text returned by the tool. \n\n\n'}

In [22]:
agent_executor.invoke({"input":"in attention is all you need paper explain me all the major concepts","chat_history": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `wikipedia` with `{'query': 'Attention Is All You Need'}`


[0m[36;1m[1;3mPage: Attention Is All You Need
Summary: "Attention Is All You Need" is a 2017 landmark research paper in machine learning authored by eight scientists working at Google. The paper introduced a new de[0m[32;1m[1;3mThe paper introduces a new deep learning architecture called the Transformer, which relies entirely on attention mechanisms.  This means it doesn't use recurrent neural networks (RNNs) or convolutional neural networks (CNNs), which were common in previous language models. 

Here are some key concepts from the paper:

* **Attention:** This is the core mechanism of the Transformer. It allows the model to focus on different parts of the input sequence when processing information. This is like how humans pay attention to certain words in a sentence to understand its meaning.

* **Encoder-Decoder:** The Transformer has two main par

{'input': 'in attention is all you need paper explain me all the major concepts',
 'chat_history': [],
 'output': "The paper introduces a new deep learning architecture called the Transformer, which relies entirely on attention mechanisms.  This means it doesn't use recurrent neural networks (RNNs) or convolutional neural networks (CNNs), which were common in previous language models. \n\nHere are some key concepts from the paper:\n\n* **Attention:** This is the core mechanism of the Transformer. It allows the model to focus on different parts of the input sequence when processing information. This is like how humans pay attention to certain words in a sentence to understand its meaning.\n\n* **Encoder-Decoder:** The Transformer has two main parts: an encoder and a decoder. The encoder processes the input sequence and the decoder generates the output sequence. \n\n* **Multi-Head Attention:** \nThe Transformer uses multiple attention heads in parallel. Each head attends to different asp

In [26]:
agent_executor.invoke({"input":"explain 2005.11401 in 1000 words","chat_history": []})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `arxiv` with `{'query': '2005.11401'}`


[0m[33;1m[1;3mPublished: 2021-04-12
Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
Authors: Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, He[0m[32;1m[1;3m
Invoking: `arxiv` with `{'abstract': True, 'query': '2005.11401'}`


[0m[33;1m[1;3mPublished: 2021-04-12
Title: Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
Authors: Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, He[0m[32;1m[1;3mThis paper, "Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",  published in 2021, explores a novel approach to improve the performance of language models on tasks requiring extensive knowledge. 

The core idea behind retrieval-augmented generation (RAG) is to combine the strengths of both text generation models, like GPT-3, and i

{'input': 'explain 2005.11401 in 1000 words',
 'chat_history': [],
 'output': 'This paper, "Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks",  published in 2021, explores a novel approach to improve the performance of language models on tasks requiring extensive knowledge. \n\nThe core idea behind retrieval-augmented generation (RAG) is to combine the strengths of both text generation models, like GPT-3, and information retrieval systems. \n\nTraditionally, language models are trained on massive text datasets and learn to predict the next word in a sequence. While powerful, they often struggle with tasks demanding specific factual knowledge or reasoning. \n\nRAG addresses this by incorporating external knowledge sources.  When presented with a query, the RAG system first retrieves relevant information from a knowledge base or a large corpus of text. This retrieved information is then used to augment the input context for the language model, providing it with the necess