## RAG with multiple data sources!

### Wikipedia tool

In [1]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [2]:
api_wrapper=WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
wiki=WikipediaQueryRun(api_wrapper=api_wrapper)
print(wiki.name)

wikipedia


## VectorDB with OllamaEmbeddings

In [4]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader=WebBaseLoader("https://docs.smith.langchain.com")
docs=loader.load()
documents=RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
vectordb=FAISS.from_documents(documents, OllamaEmbeddings())
retriever=vectordb.as_retriever
retriever

<bound method VectorStore.as_retriever of <langchain_community.vectorstores.faiss.FAISS object at 0x000001A1DBE06270>>

### Creating retriever tool

In [150]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "langsmith_search",
    "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!",
)

In [151]:
retriever_tool.name

'langsmith_search'

### ARXIV Tool

In [7]:
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=250)
arxiv=ArxivQueryRun(arxiv_wrapper=arxiv_wrapper)
arxiv.name

'arxiv'

#### combining all the tools in a list

In [152]:
tools = [wiki, arxiv, retriever_tool]

In [33]:
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\Admin\\Desktop\\GUVI\\LangChain\\.venv\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=250)),
 ArxivQueryRun(),
 Tool(name='langsmith_search', description='Search for information about langsmith search', args_schema=<class 'langchain_core.tools.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x000001A194504FE0>, retriever=<bound method VectorStore.as_retriever of <langchain_community.vectorstores.faiss.FAISS object at 0x000001A1DBE06270>>, document_prompt=PromptTemplate(input_variables=['page_content'], template='{page_content}'), document_separator='\n\n'), coroutine=functools.partial(<function _aget_relevant_documents at 0x000001A194505440>, retriever=<bound method VectorStore.as_retriever of <langchain_community.vectorstores.faiss.FAISS object at 0x000001A1DBE06270>>, document

In [72]:
from langchain_community.llms import Ollama
from dotenv import load_dotenv
import os

#Environment variables loaded
load_dotenv()

#LangSmith Tracking 
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")

# Ollama LLAMA2 LLM
llm=Ollama(model="llama2")

In [106]:
# Prompt tempalate

#from langchain import hub

#prompt = hub.pull("hwchase17/openai-tools-agent")
#prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

### Creating prompt & deploying Agents

In [144]:
from langchain.agents import create_react_agent
from langchain.prompts import PromptTemplate

# Assuming wiki, arxiv, and retriever_tool are defined as Tool objects
tools = [wiki, arxiv, retriever_tool]
tool_names = [tool.name for tool in tools]

prompt = PromptTemplate(
    input_variables=["agent_scratchpad", "tool_names", "tools", "user_input"],
    template="You have everything to respond to the user. {agent_scratchpad}.\nYou have access to the following tools:{tools} {tool_names}.\n\nUse them to answer the user's query: {user_input}.\n\nPlease structure your response in the following format:\nAction: [Your action description]\nThought: [Your explanation or reasoning]\nAction Input: [Additional information needed for the action]"
)


agent = create_react_agent(llm=llm, tools=tools, prompt=prompt)


In [141]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["agent_scratchpad", "user_input"],
    template="You have everything to respond to the user. {user_input}{agent_scratchpad} "
)


In [146]:
agent

RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_log_to_str(x['intermediate_steps']))
})
| PromptTemplate(input_variables=['agent_scratchpad', 'user_input'], partial_variables={'tools': "wikipedia - A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.\narxiv - A wrapper around Arxiv.org Useful for when you need to answer questions about Physics, Mathematics, Computer Science, Quantitative Biology, Quantitative Finance, Statistics, Electrical Engineering, and Economics from scientific articles on arxiv.org. Input should be a search query.\nlangsmith_search(query: 'str', *, retriever: 'BaseRetriever' = <bound method VectorStore.as_retriever of <langchain_community.vectorstores.faiss.FAISS object at 0x000001A1DBE06270>>, document_prompt: 'BasePromptTemplate' = PromptTemplate(input_variables=['page_content'], template='{page_co

In [147]:
#Agent executer to run agent

from langchain.agents import AgentExecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)


In [153]:
agent_executor.invoke({"user_input": "Open source LLMs are good? ", "agent_scratchpad": ""})




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Action: Conduct a search on Wikipedia and Arxiv to gather information about open source LLMs.

Thought: Open source LLMs are generally considered to be good because they allow for transparency, reproducibility, and collaboration in the development of AI models. By making the code and weights of these models available publicly, researchers can review and build upon each other's work, leading to faster progress and more innovative solutions. Additionally, open source LLMs can help to promote accountability and trustworthiness in AI research by allowing for independent auditing and verification of the models' performance.

Action Input: Search queries for Wikipedia and Arxiv include:

* "Open source LLMs" on Wikipedia
* "LLMs" on Arxiv with filters set to "Language Modeling" and "Open Source"

Expected Output:
The search results from both sources should provide information about the benefits and challenges of open source LLMs, 