# Agents - Websites, Google Search, Wiki

In [22]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [23]:
#pip install langchain-openai langchain-community wikipedia arxiv langchainhub google-api-python-client faiss-cpu

## Tools 


In [24]:
#Wikipedia tool - Inbuilt tool created by Langchain community
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper = WikipediaAPIWrapper(top_k_results=1,doc_content_chars_max=200)
wiki_tool = WikipediaQueryRun(api_wrapper=api_wrapper)

wiki_tool.name

'wikipedia'

Default tools from Langchain also include Google Search - here are the steps to follow to integrate it
https://python.langchain.com/v0.2/docs/integrations/tools/google_search/

In [25]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['GOOGLE_API_KEY'] = "your-google-api-key"

os.environ['GOOGLE_CSE_ID'] = "your-cse-id"

In [26]:
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.tools import Tool

search = GoogleSearchAPIWrapper()

search_tool = Tool(
    name="google_search",
    description="Search Google for recent results.",
    func=search.run,
)

search_tool.name

'google_search'

In [27]:
search_tool.run("Obama's first name?")

"1 Child's First Name. (Type or print). BARACK. CERTIFICATE OF LIVE BIRTH lb ... OBAMA, II. Day. 4. 6b. Island. Year. 5b. Hour. 1961 7:24 P.M.. Oahu. 6d. Is Place\xa0... As a member of the Democratic Party, he was the first African-American president in United States history. Obama previously served as a U.S. senator\xa0... Apr 2, 2018 ... BARACK : Barkat and Mubarak both are derived from it in Hindi and Urdu. Roughly meaning blessing, abundance etc. Husen or Hussein from which\xa0... Apr 7, 2021 ... No, Obama is secretly his last name, his first name is obviously Joe, though. Joe Obama. Joebama. First Lady Michelle LaVaughn Robinson Obama is a lawyer, writer, and the wife of the 44th President, Barack Obama. She is the first African-American First\xa0... Log in. Open app. barackobama's profile picture. See full profile in the app. Enjoy photos, videos and more from barackobama. Open the Instagram app. Not now. A museum that asks you to believe—not just in President Obama's power to cr

In [28]:
#Web based retriever

from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader(["https://python.langchain.com/v0.1/docs","https://attack.mitre.org/groups", "https://attack.mitre.org/techniques/enterprise/", "https://attack.mitre.org/tactics/enterprise/"])
docs = loader.load()
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
vectordb = FAISS.from_documents(documents, OpenAIEmbeddings())
web_retriever = vectordb.as_retriever()
web_retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7feb5d1c3b20>)

In [29]:
#Custom tool 
from langchain.tools.retriever import create_retriever_tool
web_retriever_tool = create_retriever_tool(web_retriever, "mitre_search", 
                      "Search for information about a MITRE ATT&CK technique. For any question about mitre-framework techniques, you must use this tool.")

In [30]:
web_retriever_tool.name

'mitre_search'

In [31]:
web_retriever_tool.run("what is langchain")

"Introduction | 🦜️🔗 LangChain\n\nSkip to main contentLangChain v0.2 is out! You are currently viewing the old v0.1 docs. View the latest docs here.ComponentsIntegrationsGuidesAPI ReferenceMorePeopleVersioningContributingTemplatesCookbooksTutorialsYouTubev0.1v0.2v0.1🦜️🔗LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs💬SearchGet startedIntroductionQuickstartInstallationUse casesQ&A with RAGExtracting structured outputChatbotsTool use and agentsQuery analysisQ&A over SQL + CSVMoreExpression LanguageGet startedRunnable interfacePrimitivesAdvantages of LCELStreamingAdd message history (memory)MoreEcosystem🦜🛠️ LangSmith🦜🕸️ LangGraph🦜️🏓 LangServeSecurityGet startedOn this pageIntroductionLangChain is a framework for developing applications powered by large language models (LLMs).LangChain simplifies every stage of the LLM application lifecycle:Development: Build your applications using LangChain's open-source building blocks and components. Hit the gr

In [32]:
# ## Arxiv tool - Inbuilt tool created by Langchain community
# from langchain_community.utilities import ArxivAPIWrapper
# from langchain_community.tools import ArxivQueryRun

# arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
# arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_wrapper)
# arxiv_tool.name

from langchain_community.utilities import ArxivAPIWrapper
arxiv_tool = ArxivAPIWrapper()
# docs = arxiv.run("1605.08386")
# docs

In [33]:
## Arxiv tool - Inbuilt tool created by Langchain community
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=1000)
arxiv_tool = ArxivQueryRun(api_wrapper=arxiv_wrapper)
arxiv_tool.name

docs = arxiv_tool.run("1605.08386")
docs

'Published: 2016-05-26\nTitle: Heat-bath random walks with Markov bases\nAuthors: Caprice Stanley, Tobias Windisch\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.'

In [34]:
tools = [web_retriever_tool, arxiv_tool, search_tool] #can add search_tool, wiki_tool if needed, 

In [36]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

llm = ChatOpenAI(model='gpt-3.5-turbo-0613', temperature=0)

In [37]:
#Hub - Its a community hub for explore and contribute prompts

from langchain import hub

#Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-functions-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

#### Agents

'''
The core idea of agents is to use a language model to choose a sequence of actions to take. In chains, a sequence of actions is hardcoded (in code). In agents, a language model is used as a reasoning engine to determine which actions to take and in which order.
'''

In [38]:
#Agents
from langchain.agents import create_openai_tools_agent
agent = create_openai_tools_agent(llm, tools, prompt)

In [40]:
#agent_executor.invoke({"input" : "search on google and give me 3 sentence description in a structured format along with the link to the docs in the bottom: langchain tools"})
response = agent_executor.invoke({"input" : "List of all the tactics in mitreattack framework withone line description"})

#print(response)
print(response['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `mitre_search` with `{'query': 'tactics'}`


[0m[36;1m[1;3mHome
Tactics
Enterprise










 
                                        Enterprise tactics
                                

                                    Tactics represent the "why" of an ATT&CK technique or sub-technique. It is the adversary's tactical goal: the reason for performing an action. For example, an adversary may want to achieve credential access.
                                


Enterprise Tactics: 14








ID
Name
Description





TA0043


Reconnaissance


                                                    The adversary is trying to gather information they can use to plan future operations.

                                        



TA0042


Resource Development


                                                    The adversary is trying to establish resources they can use to support operations.

                         

In [41]:
response = agent_executor.invoke({"input" : "search on google and give me 3 sentence description in a structured format: langchain tools"})

#print(response)
print(response['output'])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `google_search` with `langchain tools`



1. LangChain tools are interfaces that agents, chains, or LLMs can use to interact with the world, such as APIs, functions, databases, and more.
2. With LangChain tools, LLMs can perform tasks like searching the web, doing math, running code, and more.
3. The LangChain library provides a wide range of prebuilt tools, allowing LLMs to access external data sources and perform various operations.[0m

[1m> Finished chain.[0m
Here are three sentence descriptions of LangChain tools:

1. LangChain tools are interfaces that agents, chains, or LLMs can use to interact with the world, such as APIs, functions, databases, and more.
2. With LangChain tools, LLMs can perform tasks like searching the web, doing math, running code, and more.
3. The LangChain library provides a wide range of prebuilt tools, allowing LLMs to access external data sources and perform various operations.
