## RAG with Multi Data Sourse using Agents and tools

### 1: RAG as tool

In [21]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter # type: ignore
from langchain_google_genai import  GoogleGenerativeAIEmbeddings # type: ignore
from langchain.vectorstores import FAISS # type: ignore
from langchain.tools.retriever import create_retriever_tool

web = WebBaseLoader("https://www.langchain.com/langsmith")
web_pages = web.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(web_pages)

embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vector = FAISS.from_documents(documents, embedding=embedding)

retriever = vector.as_retriever()

# make the retriver as tool
retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    "search for information about langsmith"
    )


In [23]:
retriever_tool.name

'langsmith_search'

## 2: wikipedia tool

In [30]:
from langchain.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
wikipedia_tool  = WikipediaQueryRun(api_wrapper=api_wrapper)
wikipedia_tool.name

'wikipedia'

### 3: Arxiv tool

In [29]:
from langchain.tools import ArxivQueryRun
from langchain_community.utilities import ArxivAPIWrapper

arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=250)
arxiv_tool  = ArxivQueryRun(api_wrapper=arxiv_wrapper)
arxiv_tool.name

'arxiv'

###  LLM

In [16]:
from langchain_google_genai import ChatGoogleGenerativeAI # type: ignore
from dotenv import load_dotenv # type: ignore
import os
load_dotenv()

# load google api key
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")


# Initialize an instance of the ChatGoogleGenerativeAI with specific parameters
llm:ChatGoogleGenerativeAI =  ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.2,          
)


In [17]:
# invoking with LLM
response = llm.invoke("what is Generative AI")
response.content

'## Generative AI: Creating New Content from Data\n\nGenerative AI is a type of artificial intelligence that focuses on **creating new content** based on the data it has been trained on. This content can be in various forms, including:\n\n* **Text:** Writing stories, articles, poems, code, scripts, and more.\n* **Images:** Generating realistic images, artwork, and even photorealistic portraits.\n* **Audio:** Creating music, sound effects, and speech.\n* **Video:** Generating short videos, animations, and even entire movies.\n* **Code:** Writing code in different programming languages.\n\n**How it works:**\n\nGenerative AI models are trained on massive datasets of existing content. This training process allows them to learn patterns, relationships, and styles within the data. Once trained, they can then use this knowledge to generate new content that resembles the training data but is unique and original.\n\n**Examples of Generative AI:**\n\n* **ChatGPT:** A language model that can gene

### prompt

In [35]:
from langchain import hub

# get the prompt from hub
prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages



[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

### Agents

In [36]:
tools = [arxiv_tool,wikipedia_tool,retriever_tool]
tools

[ArxivQueryRun(api_wrapper=ArxivAPIWrapper(arxiv_search=<class 'arxiv.Search'>, arxiv_exceptions=(<class 'arxiv.ArxivError'>, <class 'arxiv.UnexpectedEmptyPageError'>, <class 'arxiv.HTTPError'>), top_k_results=1, ARXIV_MAX_QUERY_LENGTH=300, continue_on_failure=False, load_max_docs=100, load_all_available_meta=False, doc_content_chars_max=250)),
 WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\Nasir\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250)),
 Tool(name='langsmith_search', description='search for information about langsmith', args_schema=<class 'langchain_core.tools.retriever.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x0000020BC5503E20>, retriever=VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.F

In [41]:
from langchain.agents import create_tool_calling_agent

agent = create_tool_calling_agent(llm,tools,prompt)


Key 'title' is not supported in schema, ignoring
Key 'title' is not supported in schema, ignoring
Key 'title' is not supported in schema, ignoring
Key 'title' is not supported in schema, ignoring
Key 'title' is not supported in schema, ignoring
Key 'title' is not supported in schema, ignoring


In [42]:
# To run agent we need to agent exicuter
from langchain.agents import AgentExecutor
agent_exicuter = AgentExecutor(agent=agent,tools=tools , verbose=True)
agent_exicuter

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: message_formatter(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMe

In [43]:
agent_exicuter.invoke({"input":"tell me about langsmith"})



[1m> Entering new AgentExecutor chain...[0m
{'name': 'arxiv', 'description': 'A wrapper around Arxiv.org Useful for when you need to answer questions about Physics, Mathematics, Computer Science, Quantitative Biology, Quantitative Finance, Statistics, Electrical Engineering, and Economics from scientific articles on arxiv.org. Input should be a search query.', 'parameters': {'type_': 6, 'description': 'Input for the Arxiv tool.', 'properties': {'query': {'type_': 1, 'description': 'search query to look up', 'format_': '', 'nullable': False, 'enum': [], 'max_items': '0', 'min_items': '0', 'properties': {}, 'required': []}}, 'required': ['query'], 'format_': '', 'nullable': False, 'enum': [], 'max_items': '0', 'min_items': '0'}}
{'name': 'wikipedia', 'description': 'A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.', 'parameters': {'type_': 6, '

{'input': 'tell me about langsmith',
 'output': "LangSmith is a developer platform for building and managing LLM-powered applications. It provides tools for debugging, collaborating, testing, and monitoring your applications. You can use LangSmith even if you don't use LangChain. LangSmith offers a variety of features, including:\n\n* **Trace logging:** LangSmith can log all the inputs and outputs of each step in your application, providing a detailed view of what's happening.\n* **Dataset creation:** You can create datasets from your traces to train and evaluate your models.\n* **Evaluation:** LangSmith provides tools for evaluating your models and identifying areas for improvement.\n* **Monitoring:** LangSmith can monitor your application's performance and identify potential issues.\n* **Collaboration:** LangSmith allows you to collaborate with other developers on your projects.\n\nLangSmith is a powerful tool for building and managing LLM-powered applications. It can help you to deb

In [47]:
agent_exicuter.invoke({"input":"who won the world cup of 1992"})



[1m> Entering new AgentExecutor chain...[0m
{'name': 'arxiv', 'description': 'A wrapper around Arxiv.org Useful for when you need to answer questions about Physics, Mathematics, Computer Science, Quantitative Biology, Quantitative Finance, Statistics, Electrical Engineering, and Economics from scientific articles on arxiv.org. Input should be a search query.', 'parameters': {'type_': 6, 'description': 'Input for the Arxiv tool.', 'properties': {'query': {'type_': 1, 'description': 'search query to look up', 'format_': '', 'nullable': False, 'enum': [], 'max_items': '0', 'min_items': '0', 'properties': {}, 'required': []}}, 'required': ['query'], 'format_': '', 'nullable': False, 'enum': [], 'max_items': '0', 'min_items': '0'}}
{'name': 'wikipedia', 'description': 'A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.', 'parameters': {'type_': 6, '

{'input': 'who won the world cup of 1992',
 'output': 'The 1992 Cricket World Cup was won by **Pakistan**. \n'}

In [48]:
agent_exicuter.invoke({"input":"what's the paper 1605.008386 about?"})



[1m> Entering new AgentExecutor chain...[0m
{'name': 'arxiv', 'description': 'A wrapper around Arxiv.org Useful for when you need to answer questions about Physics, Mathematics, Computer Science, Quantitative Biology, Quantitative Finance, Statistics, Electrical Engineering, and Economics from scientific articles on arxiv.org. Input should be a search query.', 'parameters': {'type_': 6, 'description': 'Input for the Arxiv tool.', 'properties': {'query': {'type_': 1, 'description': 'search query to look up', 'format_': '', 'nullable': False, 'enum': [], 'max_items': '0', 'min_items': '0', 'properties': {}, 'required': []}}, 'required': ['query'], 'format_': '', 'nullable': False, 'enum': [], 'max_items': '0', 'min_items': '0'}}
{'name': 'wikipedia', 'description': 'A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.', 'parameters': {'type_': 6, '

{'input': "what's the paper 1605.008386 about?",
 'output': 'The paper "Maximum principles for a time-space fractional diffusion equation" was published on May 3, 2016. It focuses on maximum principles of a time-space fractional diffusion equation. \n'}