In [None]:
# !pip install -q openai langchain playwright beautifulsoup4 playwright install

In [None]:
from getpass import getpass
import os
os.environ['OPENAI_API_KEY'] = getpass()

In [2]:
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from bs4 import BeautifulSoup as Soup

import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

url = "https://www.cdc.gov"
loader = RecursiveUrlLoader(
    url=url, max_depth=2, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [8]:
from langchain_community.chat_models import ChatOpenAI
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=.2)
llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)

from langchain import hub
prompt = hub.pull("hwchase17/self-ask-with-search")
print(f'{prompt.format(agent_scratchpad = "AGENTSCRATCHPAD", input = "INPUT")}')

from langchain.tools.retriever import create_retriever_tool
retriever_tool = create_retriever_tool(
    retriever,
    "Intermediate Answer",
    "Search for information about outbreaks. For any questions about outbreaks, you must use this tool!",
)
tools = [retriever_tool]


Question: Who lived longer, Muhammad Ali or Alan Turing?
Are follow up questions needed here: Yes.
Follow up: How old was Muhammad Ali when he died?
Intermediate answer: Muhammad Ali was 74 years old when he died.
Follow up: How old was Alan Turing when he died?
Intermediate answer: Alan Turing was 41 years old when he died.
So the final answer is: Muhammad Ali

Question: When was the founder of craigslist born?
Are follow up questions needed here: Yes.
Follow up: Who was the founder of craigslist?
Intermediate answer: Craigslist was founded by Craig Newmark.
Follow up: When was Craig Newmark born?
Intermediate answer: Craig Newmark was born on December 6, 1952.
So the final answer is: December 6, 1952

Question: Who was the maternal grandfather of George Washington?
Are follow up questions needed here: Yes.
Follow up: Who was the mother of George Washington?
Intermediate answer: The mother of George Washington was Mary Ball Washington.
Follow up: Who was the father of Mary Ball Washin

In [9]:
# Construct the Self Ask With Search Agent
from langchain.agents import AgentExecutor, create_self_ask_with_search_agent
agent = create_self_ask_with_search_agent(llm, tools, prompt)

In [10]:
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools,
                               verbose=True, handle_parsing_errors=True)

agent_executor.invoke(
    {"input": "Are there any current flavivirus related disease outbreaks? \
    Thoughts go here... Follow up:  What viruses belong to the flavivirus family? \
    Thoughts go here... So the final answer is:",
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mYes.
Follow up: What are some examples of diseases caused by flaviviruses?[0m[36;1m[1;3m[Document(page_content='H. Lorenzo Juanes et al.\n View Abstract\n Cite This Article\n\n\n\nCrimean-Congo hemorrhagic fever (CCHF) is a viral infectious disease for which distribution of the main vector, Hyalomma spp. ticks, is expanding. We analyzed all 10 cases of CCHF diagnosed in Spain during 2013–2021; case-patient median age was 56.5 years, and 7 were men. We identified CCHF virus genotypes III and V. Six case-patients acquired the infection in urban areas. Sixty percent of patients were infected in summer and 40% in spring. Two patients met criteria for hemophagocytic syndrome. Seven patients survived. The epidemiologic pattern of CCHF in Spain is based on occasional cases with an elevated mortality rate. Genotype III and, to a less extent also genotype V, CCHF circulates in humans in a common geographic area in Spain. Those data

{'input': 'Are there any current flavivirus related disease outbreaks?     Thoughts go here... Follow up:  What viruses belong to the flavivirus family?     Thoughts go here... So the final answer is:',
 'output': 'Yes, there are current flavivirus-related disease outbreaks, such as Dengue in Africa and the Middle East.'}