In [34]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [35]:
api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=100)
tool = WikipediaQueryRun(api_wrapper=api_wrapper)
tool.name

'wikipedia'

### lets fetch data from here "http://web.simmons.edu/~grovesd/comm244/notes/week2/links"

In [36]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.llms import Ollama

In [37]:
loader = WebBaseLoader("http://web.simmons.edu/~grovesd/comm244/notes/week2/links")
docs = loader.load()
docs

[Document(metadata={'source': 'http://web.simmons.edu/~grovesd/comm244/notes/week2/links', 'title': 'All About Links', 'language': 'en'}, page_content='\n\n\nAll About Links\n\n\n\n\n\n\n\n\n\n\n\nBack to Week 2 page »\nAll About Links\nDissecting a URL\n\n\nThe parts of a URL. From Learning Web Design, p. 24\n\n\nProtocol: The first part of a URL is the protocol. On the web, we almost always use Hypertext Transfer Protocol (HTTP) or HTTPS, which is simply a more secure version of HTTP.\nHost Name: The hostname points to a specific web site within a domain. Usually it is www, but this is not necessarily always the case. The www is so ubiquitous that it is oftentimes left off of urls to make them more clear.\nDomain Name: The domain name is the unique identifier of the site.\nAbsolute Path: The absolute path tells us the path to the requested file on the web server. \nDirectory Path: This part of the URL tells us what directories the requested file is contained in.\nDocument: The last p

In [38]:
documents = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 200).split_documents(docs)
documents

[Document(metadata={'source': 'http://web.simmons.edu/~grovesd/comm244/notes/week2/links', 'title': 'All About Links', 'language': 'en'}, page_content='All About Links\n\n\n\n\n\n\n\n\n\n\n\nBack to Week 2 page »\nAll About Links\nDissecting a URL\n\n\nThe parts of a URL. From Learning Web Design, p. 24'),
 Document(metadata={'source': 'http://web.simmons.edu/~grovesd/comm244/notes/week2/links', 'title': 'All About Links', 'language': 'en'}, page_content='Protocol: The first part of a URL is the protocol. On the web, we almost always use Hypertext Transfer Protocol (HTTP) or HTTPS, which is simply a more secure version of HTTP.\nHost Name: The hostname points to a specific web site within a domain. Usually it is www, but this is not necessarily always the case. The www is so ubiquitous that it is oftentimes left off of urls to make them more clear.\nDomain Name: The domain name is the unique identifier of the site.'),
 Document(metadata={'source': 'http://web.simmons.edu/~grovesd/comm2

In [39]:
embedding = OllamaEmbeddings(model="nomic-embed-text")
embedding

OllamaEmbeddings(base_url='http://localhost:11434', model='nomic-embed-text', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [40]:
vectordb = FAISS.from_documents(documents, embedding)
retiever = vectordb.as_retriever()
retiever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001663A603910>)

In [41]:
## creating retriver tool

from langchain.tools.retriever import create_retriever_tool

retrieval_tool = create_retriever_tool(retiever, 
                                       "Relative URLs_search",  ## give the name you want to search
                                       "Search for information about Relative URLs. For any questions about Relative URLs, you must use this tool!")

retrieval_tool


Tool(name='Relative URLs_search', description='Search for information about Relative URLs. For any questions about Relative URLs, you must use this tool!', args_schema=<class 'langchain_core.tools.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x000001666F1E8540>, retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001663A603910>), document_prompt=PromptTemplate(input_variables=['page_content'], template='{page_content}'), document_separator='\n\n'), coroutine=functools.partial(<function _aget_relevant_documents at 0x000001666F1E8860>, retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001663A603910>), document_prompt=PromptTemplate(input_variables=['page_content'], template='{page_content}'), document_separator='\n\n'))

#### So we have already two retriver tool 

In [42]:
retrieval_tool.name, tool.name

('Relative URLs_search', 'wikipedia')

###### now creating another tool arxiv for pdf loading 

In [43]:
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

In [44]:
arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=100)
arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_wrapper)
arxiv_tool.name

'arxiv'

#### combining all the tools

In [45]:
tools = [tool, retrieval_tool, arxiv_tool]
tools

[WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\YR272YB\\Anaconda3\\envs\\Langchain_Series_2\\Lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=1, lang='en', load_all_available_meta=False, doc_content_chars_max=100)),
 Tool(name='Relative URLs_search', description='Search for information about Relative URLs. For any questions about Relative URLs, you must use this tool!', args_schema=<class 'langchain_core.tools.RetrieverInput'>, func=functools.partial(<function _get_relevant_documents at 0x000001666F1E8540>, retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001663A603910>), document_prompt=PromptTemplate(input_variables=['page_content'], template='{page_content}'), document_separator='\n\n'), coroutine=functools.partial(<function _aget_relevant_documents at 0x000001666F1E8860>, retriever=VectorStoreRetriever(tags=['FAISS', 'OllamaEmbed

##### query from multiple tools -> for this we will use agents

Agents -> it basically takes sequence to tasks and using llm it tries to determine which task is to execute first

the order in which we will pass it will execute the same way like tool-> retrieval_tool -> arxiv_tool

In [48]:
import requests

session = requests.Session()
session.verify = False

requests.sessions.Session = lambda: session

# obj = hub.pull("wfh/proposal-indexing")

In [54]:
from langchain_experimental.llms.ollama_functions import OllamaFunctions
## calling models

llm = Ollama(model="llama2", temperature=0)


In [55]:
from langchain import hub
# Get the prompt to use - you can modify this!

from langchain import hub
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages



[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [57]:
### Agents
from langchain.agents import create_openai_tools_agent
agent=create_openai_tools_agent(llm,tools,prompt)

In [58]:
## Agent Executer
from langchain.agents import AgentExecutor
agent_executor=AgentExecutor(agent=agent,tools=tools,verbose=True)
agent_executor

AgentExecutor(verbose=True, agent=RunnableMultiActionAgent(runnable=RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_to_openai_tool_messages(x['intermediate_steps']))
})
| ChatPromptTemplate(input_variables=['agent_scratchpad', 'input'], optional_variables=['chat_history'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]], 'agent_scratchpad': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, partial_variables={'chat_history': []}, metadata={'lc_hub_owner': 'hwc

In [60]:
agent_executor.invoke({"input":"Tell me about Relative URLs"})



[1m> Entering new AgentExecutor chain...[0m


ValueError: This output parser only works on ChatGeneration output