## AutoGPT example finding Winning Marathon Times

* Implementation of https://github.com/Significant-Gravitas/Auto-GPT 
* With LangChain primitives (LLMs, PromptTemplates, VectorStores, Embeddings, Tools)

In [7]:
# !pip install bs4
# !pip install nest_asyncio

In [1]:
# General 
import os
import pandas as pd
from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT
from langchain.chat_models import ChatOpenAI

from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.docstore.document import Document
import asyncio
import nest_asyncio


# Needed synce jupyter runs an async eventloop
nest_asyncio.apply()

In [2]:
llm = ChatOpenAI(model_name="gpt-4", temperature=1.0)

### Set up tools

* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool, a web browsing tool, and a tool to interact with a CSV file via a python REPL

Define any other `tools` you want to use below:

In [3]:
# Tools
import os
from contextlib import contextmanager
from typing import Optional
from langchain.agents import tool
from langchain.tools.file_management.read import ReadFileTool
from langchain.tools.file_management.write import WriteFileTool

ROOT_DIR = "./data/"

@contextmanager
def pushd(new_dir):
    """Context manager for changing the current working directory."""
    prev_dir = os.getcwd()
    os.chdir(new_dir)
    try:
        yield
    finally:
        os.chdir(prev_dir)

@tool
def process_csv(
    csv_file_path: str, instructions: str, output_path: Optional[str] = None
) -> str:
    """Process a CSV by with pandas in a limited REPL.\
 Only use this after writing data to disk as a csv file.\
 Any figures must be saved to disk to be viewed by the human.\
 Instructions should be written in natural language, not code. Assume the dataframe is already loaded."""
    with pushd(ROOT_DIR):
        try:
            df = pd.read_csv(csv_file_path)
        except Exception as e:
            return f"Error: {e}"
        agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)
        if output_path is not None:
            instructions += f" Save output to disk at {output_path}"
        try:
            result = agent.run(instructions)
            return result
        except Exception as e:
            return f"Error: {e}"

**Browse a web page with PlayWright**

In [11]:
# !pip install playwright
# !playwright install

In [4]:
async def async_load_playwright(url: str) -> str:
    """Load the specified URLs using Playwright and parse using BeautifulSoup."""
    from bs4 import BeautifulSoup
    from playwright.async_api import async_playwright

    results = ""
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        try:
            page = await browser.new_page()
            await page.goto(url)

            page_source = await page.content()
            soup = BeautifulSoup(page_source, "html.parser")

            for script in soup(["script", "style"]):
                script.extract()

            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            results = "\n".join(chunk for chunk in chunks if chunk)
        except Exception as e:
            results = f"Error: {e}"
        await browser.close()
    return results

def run_async(coro):
    event_loop = asyncio.get_event_loop()
    return event_loop.run_until_complete(coro)

@tool
def browse_web_page(url: str) -> str:
    """Verbose way to scrape a whole webpage. Likely to cause issues parsing."""
    return run_async(async_load_playwright(url))

**Q&A Over a webpage**

Help the model ask more directed questions of web pages to avoid cluttering its memory

In [5]:
from langchain.tools import BaseTool, DuckDuckGoSearchRun
from langchain.text_splitter import RecursiveCharacterTextSplitter

from pydantic import Field
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain

def _get_text_splitter():
    return RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size = 500,
        chunk_overlap  = 20,
        length_function = len,
    )


class WebpageQATool(BaseTool):
    name = "query_webpage"
    description = "Browse a webpage and retrieve the information relevant to the question."
    text_splitter: RecursiveCharacterTextSplitter = Field(default_factory=_get_text_splitter)
    qa_chain: BaseCombineDocumentsChain
    
    def _run(self, url: str, question: str) -> str:
        """Useful for browsing websites and scraping the text information."""
        result = browse_web_page.run(url)
        docs = [Document(page_content=result, metadata={"source": url})]
        web_docs = self.text_splitter.split_documents(docs)
        results = []
        # TODO: Handle this with a MapReduceChain
        for i in range(0, len(web_docs), 4):
            input_docs = web_docs[i:i+4]
            window_result = self.qa_chain({"input_documents": input_docs, "question": question}, return_only_outputs=True)
            results.append(f"Response from window {i} - {window_result}")
        results_docs = [Document(page_content="\n".join(results), metadata={"source": url})]
        return self.qa_chain({"input_documents": results_docs, "question": question}, return_only_outputs=True)
    
    async def _arun(self, url: str, question: str) -> str:
        raise NotImplementedError
      

In [6]:
query_website_tool = WebpageQATool(qa_chain=load_qa_with_sources_chain(llm))

**Extra Tools**

In [7]:
from langchain.agents import load_tools

requests_tools = load_tools(["requests_get", "requests_post"])

serpapi_tool = load_tools(["serpapi"])

python_repl_tool = load_tools(["python_repl"])

extra_tools = requests_tools + serpapi_tool + python_repl_tool

### Set up memory

* The memory here is used for the agents intermediate steps

In [8]:
# Memory
import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.embeddings import OpenAIEmbeddings
from langchain.tools.human.tool import HumanInputRun

embeddings_model = OpenAIEmbeddings()
embedding_size = 1536
index = faiss.IndexFlatL2(embedding_size)
vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})

### Setup model and AutoGPT

`Model set-up`

In [10]:
# !pip install duckduckgo_search
web_search = DuckDuckGoSearchRun()
web_search

DuckDuckGoSearchRun(name='DuckDuckGo Search', description='A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_wrapper=DuckDuckGoSearchAPIWrapper(k=10, region='wt-wt', safesearch='moderate', time='y', max_results=5))

In [11]:
tools = [
    web_search,
    WriteFileTool(root_dir="./data"),
    ReadFileTool(root_dir="./data"),
    process_csv,
    query_website_tool,
    *extra_tools,
    # HumanInputRun(), # Activate if you want the permit asking for help from the human
]
print("\n".join([f"{tool.name}: {tool.description}" for tool in tools]))

DuckDuckGo Search: A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.
write_file: Write file to disk
read_file: Read file from disk
process_csv: process_csv(csv_file_path: str, instructions: str, output_path: Optional[str] = None) -> str - Process a CSV by with pandas in a limited REPL. Only use this after writing data to disk as a csv file. Any figures must be saved to disk to be viewed by the human. Instructions should be written in natural language, not code. Assume the dataframe is already loaded.
query_webpage: Browse a webpage and retrieve the information relevant to the question.
requests_get: A portal to the internet. Use this when you need to get specific content from a website. Input should be a  url (i.e. https://www.google.com). The output will be the text response of the GET request.
requests_post: Use this when you want to POST to a website.
    Input should be a json string with two keys:

In [15]:
from langchain.agents import initialize_agent
from langchain.agents import load_tools

tools = load_tools(
    ["llm-math"], 
    llm=llm
)
chat_agent = initialize_agent(
    agent="chat-conversational-react-description", #"zero-shot-react-description", 
    tools=tools, 
    llm=llm,
    verbose=True,
    max_iterations=3,
)


In [28]:
import pprint as pp
# Print the prompt for chat_agent

chat_messages = chat_agent.agent.llm_chain.prompt.messages
chat_messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanatio

In [49]:
chat_agent.agent.__dict__

{'llm_chain': LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=ChatPromptTemplate(input_variables=['input', 'chat_history', 'agent_scratchpad'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this know

In [42]:
pp.pprint(chat_messages[0].prompt.__dict__)

{'input_variables': [],
 'output_parser': None,
 'partial_variables': {},
 'template': 'Assistant is a large language model trained by OpenAI.\n'
             '\n'
             'Assistant is designed to be able to assist with a wide range of '
             'tasks, from answering simple questions to providing in-depth '
             'explanations and discussions on a wide range of topics. As a '
             'language model, Assistant is able to generate human-like text '
             'based on the input it receives, allowing it to engage in '
             'natural-sounding conversations and provide responses that are '
             'coherent and relevant to the topic at hand.\n'
             '\n'
             'Assistant is constantly learning and improving, and its '
             'capabilities are constantly evolving. It is able to process and '
             'understand large amounts of text, and can use this knowledge to '
             'provide accurate and informative responses to a 

In [45]:
pp.pprint(chat_messages[2].prompt.__dict__)

{'input_variables': ['input'],
 'output_parser': None,
 'partial_variables': {},
 'template': 'TOOLS\n'
             '------\n'
             'Assistant can ask the user to use tools to look up information '
             'that may be helpful in answering the users original question. '
             'The tools the human can use are:\n'
             '\n'
             '> Calculator: Useful for when you need to answer questions about '
             'math.\n'
             '\n'
             'RESPONSE FORMAT INSTRUCTIONS\n'
             '----------------------------\n'
             '\n'
             'When responding to me, please output a response in one of two '
             'formats:\n'
             '\n'
             '**Option 1:**\n'
             'Use this if you want the human to use a tool.\n'
             'Markdown code snippet formatted in the following schema:\n'
             '\n'
             '```json\n'
             '{{\n'
             '    "action": string \\ The action to take. Must

In [47]:
print(chat_messages[2].prompt.template)


TOOLS
------
Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are:

> Calculator: Useful for when you need to answer questions about math.

RESPONSE FORMAT INSTRUCTIONS
----------------------------

When responding to me, please output a response in one of two formats:

**Option 1:**
Use this if you want the human to use a tool.
Markdown code snippet formatted in the following schema:

```json
{{
    "action": string \ The action to take. Must be one of Calculator
    "action_input": string \ The input to the action
}}
```

**Option #2:**
Use this if you want to respond directly to the human. Markdown code snippet formatted in the following schema:

```json
{{
    "action": "Final Answer",
    "action_input": string \ You should put what you want to return to use here
}}
```

USER'S INPUT
--------------------
Here is the user's input (remember to respond with a markdown code snippet 

In [27]:
agent = AutoGPT.from_llm_and_tools(
    ai_name="Tom",
    ai_role="Assistant",
    tools=tools,
    llm=llm,
    memory=vectorstore.as_retriever(search_kwargs={"k": 8}),
    # human_in_the_loop=True, # Set to True if you want to add feedback at each step.
)
agent.chain.verbose = True

In [13]:
agent.chain.prompt

All Rights Reserved.

Copyright (c) 2000 BeOpen.com.
All Rights Reserved.

Copyright (c) 1995-2001 Corporation for National Research Initiatives.
All Rights Reserved.

Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.
All Rights Reserved., 'credits':     Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
    for supporting Python development.  See www.python.org for more information., 'license': Type license() to see the full license text, 'help': Type help() for interactive help, or help(object) for help about object., 'execfile': <function execfile at 0x7f3a6ad9b380>, 'runfile': <function runfile at 0x7f3a6ac97100>, '__IPYTHON__': True, 'display': <function display at 0x7f3a6c121800>, 'get_ipython': <bound method InteractiveShell.get_ipython of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x7f3a6a76cd90>>}, 'ast': <module 'ast' from '/home/pmmcb/conda/envs/openai/lib/python3.11/ast.py'>, 'sys': <module 'sys' (built-in)>, 'StringIO': <class

### AutoGPT for Querying the Web
 
  
I've spent a lot of time over the years crawling data sources and cleaning data. Let's see if AutoGPT can help with this!

Here is the prompt for looking up recent boston marathon times and converting them to tabular form.

In [41]:
# Goals to create a chatbot using the OpenAI API for ChatGPT with:
# 1. Internet access for searches and information gathering.
# 2. Long Term memory management.
# 3. GPT-3.5 powered Agents for delegation of simple tasks.
# 4. File output.
# - can crawl domains and scrape webpages for analysis  analyzing GitHub repos, web interface using Gradio
goals = [
    "Create a OpenAI API ChatGPT chatbot clone that has access to the internet for searches and can download information for long term memory storage.",
    "Output final python code to a file or as a set of project files in the './data' folder.",
    "Users will interact with the chatbot through a web-based user interface that allows the user to upload documents to the chatbot's long term memory, as well as input web pages or GitHub repos that the user wants the chatbot to research and store in memory.",
    "The web user interface will be implemented using Python with Gradio that allows for stream outputs that are quickly displayed on the screen to the user.",
    "The chatbot can search and browse the internet and file system for information and store data in long term memory if it is important.",
    "Long term memory management can be done through chunking and embedding the data using OpenAI's embeddings API, followed by storage in a FAISS index. The chatbot will be able to retrieve information from long term memory using the FAISS index.",
    "Look up how to do any of these things if unsure.",
]
    
agent.run(goals)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are Tom, Assistant
Your decisions must always be made independently without seeking user assistance.
Play to your strengths as an LLM and pursue simple strategies with no legal complications.
If you have completed all your tasks, make sure to use the "finish" command.

GOALS:

1. Create a OpenAI API ChatGPT chatbot clone that has access to the internet for searches and can download information for long term memory storage.
2. Output final python code to a file or as a set of project files in the './data' folder.
3. Users will interact with the chatbot through a web-based user interface that allows the user to upload documents to the chatbot's long term memory, as well as input web pages or GitHub repos that the user wants the chatbot to research and store in memory.
4. The web user interface will be implemented using Python with Gradio that allows for stream outputs that are quickly displayed o



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are Tom, Assistant
Your decisions must always be made independently without seeking user assistance.
Play to your strengths as an LLM and pursue simple strategies with no legal complications.
If you have completed all your tasks, make sure to use the "finish" command.

GOALS:

1. Create a OpenAI API ChatGPT chatbot clone that has access to the internet for searches and can download information for long term memory storage.
2. Output final python code to a file or as a set of project files in the './data' folder.
3. Users will interact with the chatbot through a web-based user interface that allows the user to upload documents to the chatbot's long term memory, as well as input web pages or GitHub repos that the user wants the chatbot to research and store in memory.
4. The web user interface will be implemented using Python with Gradio that allows for stream outputs that are quickly displayed o