In [40]:
from langchain.llms import OpenAI
import os


## Using Self-Defined Agent to Query Wikipedia

In [41]:
import wikipediaapi
import requests

In [42]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [165]:
response = llm.invoke("What are the keywords to search for on Wikipedia given the claim Peking is the capital of China. Please give the answer in a common delimited sentence with no space.")
print(response)

content='Peking, capital, China' response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 40, 'total_tokens': 46}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None} id='run-888bd4cb-a65d-408c-b0a0-917e5ac783fb-0'


In [166]:
def cleanup(msg):
    msg_lst = dict(msg)['content'].split(',')
    return msg_lst

cleanup(response)



['Peking', ' capital', ' China']

In [162]:
response = ['Peiking']

In [164]:
url = "https://en.wikipedia.org/w/api.php"
params = {
    "action": "query",
    "format": "json",
    "prop": "extracts",
    "titles": 'peking',
    "explaintext": True,
    "exintro": True,
    "explaintext": True 
}
response = requests.get(url, params=params)
data = response.json()
print(data)
pages = data["query"]["pages"]
counter = 0
info = ""
for page_id in pages:
    counter += 1
    page = pages[page_id]
    info += page['extract']
info

{'batchcomplete': '', 'query': {'normalized': [{'from': 'peking', 'to': 'Peking'}], 'pages': {'8148478': {'pageid': 8148478, 'ns': 0, 'title': 'Peking', 'extract': ''}}}}


''

In [70]:
response = llm.invoke("Answer true or false to the claim Bejing is the captital of China given this information with justification by citing from the information: " + info)

In [71]:
dict(response)['content']

'True. Beijing is the capital of China as mentioned in the information provided: "Beijing, alternatively romanized as Peking, is the capital of China."'

In [154]:
from langchain.agents import tool

@tool
def verify_claim(claim: str) -> str:
    """Returns the yes or now to verify a claim"""
    response = llm.invoke("What are the keywords to search for on Wikipedia to verify claim: " + claim + "Please give the answer in a comma delimited sentence with no space.")
    def cleanup(msg):
        msg_lst = dict(msg)['content'].split(',')
        return msg_lst
    keyword = cleanup(response)[0]
    print("keywords" + keyword)

    url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "format": "json",
        "prop": "extracts",
        "titles": keyword,
        "explaintext": True,
        "exintro": True,
        "explaintext": True 
    }
    response = requests.get(url, params=params)
    data = response.json()
    print(data)
    pages = data["query"]["pages"]
    info = ""
    for page_id in pages:
        page = pages[page_id]
        info += page['extract']

    response = llm.invoke('The input claim is: ' + claim + " Given this wikipedia information: " + info + "Please answer Yes or No with justificaiton.")
    print(dict(response)['content'])
    return dict(response)['content']


In [155]:
system_message = """You are a powerful fact checker that checks the input claim against relevant information extracted from wikipeida. You should always answer 'Yes.' or 'No.', then follow by a short justification by citing from the wiki information. 
                    You will receive two variables: 
    * claim: the input claim to verify
    * information: the relevent information extracted from wikipedia

    You must first determine if the answer to the claim is "Yes" or "No", then provide a short justification.

    Your response must follow this json format:
    {
        "answer": "< put_your_answer >",
        "justification": "< justification >"
    }"""

examples = """claim: Beijing is the capital of china. \n
              information: Beijing, alternatively romanized as Peking, is the capital of China. With about 22 million residents, Beijing is the world's most populous national capital city as well as China's second largest city after Shanghai. It is located in Northern China, and is governed as a municipality under the direct administration of the State Council with 16 urban, suburban, and rural districts. Beijing is mostly surrounded by Hebei Province with the exception of neighboring Tianjin to the southeast; together, the three divisions form the Jingjinji megalopolis and the national capital region of China.Beijing is a global city and one of the world's leading centres for culture, diplomacy, politics, finance, business and economics, education, research, language, tourism, media, sport, science and technology and transportation. It is home to the headquarters of most of China's largest state-owned companies and houses the largest number of Fortune Global 500 companies in the world, as well as the world's four biggest financial institutions by total assets. It is also a major hub for the national highway, expressway, railway, and high-speed rail networks. For a decade before the COVID-19 pandemic, the Beijing Capital International Airport was Asia's busiest airport (2009–2019) \n
              response: {"answer": "Yes", "justification": "Beijing, alternatively known is Peking, is the capital of China. Beijing has a rich history dating back over three millennia and is known for its opulent palaces, temples, parks, and other historical sites. Additionally, Beijing is home to many national monuments, museums, and UNESCO World Heritage Sites."}
"""

In [156]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a powerful fact checker that checks if the input claim is true or false given relevant information extracted from wikipeida. You should always answer 'Yes.' or 'No.', then follow by a short justification by citing from the wiki information. """       ),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [157]:
from langchain.tools.render import format_tool_to_openai_function

tools = [verify_claim]
llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [158]:
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(x["intermediate_steps"]),
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)

In [167]:
from langchain.schema.agent import AgentFinish

user_input = "beijing is the capital of china"
intermediate_steps = []

while True:
    output = agent.invoke(
        {
            "input": user_input,
            "intermediate_steps": intermediate_steps,
        }
    )
    if isinstance(output, AgentFinish):
        print(output)
        final_result = output.return_values["output"]
        break
    else:
        print(f"TOOL NAME: {output.tool}")
        print(f"TOOL INPUT: {output.tool_input}")
        tool = {"verify_claim": verify_claim}[output.tool]
        observation = tool.run(output.tool_input)
        intermediate_steps.append((output, observation))
        

print(final_result)

TOOL NAME: verify_claim
TOOL INPUT: {'claim': 'beijing is the capital of china'}
keywordsBeijing
{'batchcomplete': '', 'query': {'pages': {'18603746': {'pageid': 18603746, 'ns': 0, 'title': 'Beijing', 'extract': "Beijing, alternatively romanized as Peking, is the capital of China. With about 22 million residents, Beijing is the world's most populous national capital city as well as China's second largest city after Shanghai. It is located in Northern China, and is governed as a municipality under the direct administration of the State Council with 16 urban, suburban, and rural districts. Beijing is mostly surrounded by Hebei Province with the exception of neighboring Tianjin to the southeast; together, the three divisions form the Jingjinji megalopolis and the national capital region of China.\nBeijing is a global city and one of the world's leading centres for culture, diplomacy, politics, finance, business and economics, education, research, language, tourism, media, sport, science a

In [143]:
print(output)
print(observation)

return_values={'output': 'No. Shanghai is not the capital of China.'} log='No. Shanghai is not the capital of China.'
The input claim that Shanghai is the capital of China is incorrect. Beijing is the capital of China, not Shanghai. Shanghai is one of the four direct-administered municipalities of China and is known for being a major center for finance, business, and culture.


In [20]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

agent_executor.invoke({"input": "Beijing is the capital of China"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `verify_claim` with `{'claim': 'Beijing is the capital of China'}`


[0m[36;1m[1;3mThe claim that Beijing is the capital of China is verified by the information provided.[0m[32;1m[1;3mThe claim that Beijing is the capital of China is verified.[0m

[1m> Finished chain.[0m


{'input': 'Beijing is the capital of China',
 'output': 'The claim that Beijing is the capital of China is verified.'}

## Using Predefined Agent Type: Self-Ask with Search

In [21]:
from langchain import hub
from langchain.agents import AgentExecutor, create_self_ask_with_search_agent
from langchain_community.llms import Fireworks
from langchain_community.tools.tavily_search import TavilyAnswer

In [22]:
tools = [TavilyAnswer(max_results=1, name="Intermediate Answer")]

In [24]:
prompt = hub.pull("hwchase17/self-ask-with-search")

In [26]:
llm = Fireworks()
agent = create_self_ask_with_search_agent(llm, tools, prompt)

In [27]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [29]:
response  = agent_executor.invoke(
    {"input": "Is the capital of China Beijing?"}
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Yes.
Follow up: What is the capital of China?[0m[36;1m[1;3mBeijing[0m[32;1m[1;3m
So the final answer is: Yes[0m

[1m> Finished chain.[0m


In [30]:
final_answer = response.get("output", "")
final_answer

'Yes'

## Using Prebuilt Agent: SQL Database Agent

In [168]:
from langchain_community.utilities.sql_database import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///Chinook.db")

In [169]:
from langchain_community.agent_toolkits import create_sql_agent
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)

ModuleNotFoundError: No module named 'langchain_openai'

In [25]:
agent_executor.invoke(
    "Verify the claim: Customers in the United States spent the most money"
)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3malbums, artists, customers, employees, genres, invoice_items, invoices, media_types, playlist_track, playlists, tracks[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'customers, invoices, invoice_items'}`


[0m[33;1m[1;3m
CREATE TABLE customers (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES employees ("EmployeeId")
)

/*
3 rows from customers table:
CustomerId	FirstName	LastName	Company	Address	City	State	Country	PostalCode	Phone	Fax	Email	SupportRepId
1

{'input': 'Verify the claim: Customers in the United States spent the most money',
 'output': 'Customers in the United States did not spend the most money. The country that spent the most money is the USA with a total amount spent of $523.06.'}