In [None]:
%load_ext autoreload
%autoreload 2

In [29]:
import datetime
import json
import openai
import os
import pandas as pd
import pinecone
import re
from dotenv import load_dotenv
from tqdm.auto import tqdm
from typing import List, Union
import zipfile

# Langchain imports
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import BaseChatPromptTemplate, ChatPromptTemplate
from langchain import SerpAPIWrapper, LLMChain
from langchain.schema import AgentAction, AgentFinish, HumanMessage, SystemMessage
# LLM wrapper
from langchain.chat_models import ChatOpenAI
from langchain import OpenAI
# Conversational memory
from langchain.memory import ConversationBufferWindowMemory
# Embeddings and vectorstore
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone

# Vectorstore Index
index_name = 'podcasts'

In [30]:
# Load environment variables from .env file in the current directory
load_dotenv()

api_key = os.getenv("PINECONE_API_KEY") or "PINECONE_API_KEY"
env = os.getenv("PINECONE_ENVIRONMENT") or "PINECONE_ENVIRONMENT"

pinecone.init(api_key=api_key, environment=env)
pinecone.whoami()

WhoAmIResponse(username='4de3c64', user_label='default', projectname='9dde414')

In [31]:
pinecone.list_indexes()

['podcasts']

In [None]:
# Run this code block if you want to clear the index, or if the index doesn't exist yet

# Check whether the index with the same name already exists - if so, delete it
# if index_name in pinecone.list_indexes():
#     pinecone.delete_index(index_name)

# # Creates new index
# pinecone.create_index(name=index_name, dimension=1536)
# index = pinecone.Index(index_name=index_name)

# # Confirm our index was created
# pinecone.list_indexes()

In [33]:
# Initiate a Search tool - note you'll need to have set SERPAPI_API_KEY as an environment variable as per the above instructions
search = SerpAPIWrapper()

# Define a list of tools
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

In [34]:
# Set up the prompt with input variables for tools, user input and a scratchpad for the model to record its workings
template = """Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Arg"s

Question: {input}
{agent_scratchpad}"""

In [35]:
# Set up a prompt template
class CustomPromptTemplate(BaseChatPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format_messages(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
            
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        formatted = self.template.format(**kwargs)
        return [HumanMessage(content=formatted)]
    
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

In [36]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        
        # Parse out the action and action input
        regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        
        # If it can't parse the output it raises an error
        # You can add your own logic here to handle errors in a different way i.e. pass to a human, give a canned response
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
    
output_parser = CustomOutputParser()

In [37]:
# Initiate our LLM - default is 'gpt-3.5-turbo'
llm = ChatOpenAI(temperature=0)

# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Using tools, the LLM chain and output_parser to make an agent
tool_names = [tool.name for tool in tools]

agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    # We use "Observation" as our stop sequence so it will stop when it receives Tool output
    # If you change your prompt template you'll need to adjust this as well
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [38]:
# Initiate the agent that will respond to our queries
# Set verbose=True to share the CoT reasoning the LLM goes through
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [39]:
agent_executor.run("How many people live in canada as of 2023?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Hmm, I be not sure of the answer to that one.
Action: Search
Action Input: "Canada population 2023"[0m

Observation:[36;1m[1;3mThe current population of Canada is 38,680,894 as of Sunday, May 7, 2023, based on Worldometer elaboration of the latest United Nations data. Canada 2020 population is estimated at 37,742,154 people at mid year according to UN data.[0m
[32;1m[1;3mAhoy, that be the answer I was lookin' for!
Final Answer: According to me search, the population of Canada as of 2023 be 38,680,894 Arg![0m

[1m> Finished chain.[0m


'According to me search, the population of Canada as of 2023 be 38,680,894 Arg!'

In [41]:
agent_executor.run("How many in 2022?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Hmm, I'm not sure what this question means. I better search for more information.
Action: Search
Action Input: "2022 events"[0m

Observation:[36;1m[1;3m8. Humanitarian Crises Deepen · 7. Latin America Moves Left. · 6. Iranians Protest. · 5. COVID Eases. · 4. Inflation Returns. · 3. Climate Change ...[0m
[32;1m[1;3mNone of these seem to answer the question. I better try a different search.
Action: Search
Action Input: "What is happening in 2022?"[0m

Observation:[36;1m[1;3mMajor war returned to Europe, with the attendant threats of nuclear strikes, and the door closed firmly shut on the U.S. policy of strategic ...[0m
[32;1m[1;3mAhoy, that's not what I was looking for. Let me try one more search.
Action: Search
Action Input: "2022 calendar"[0m

Observation:[36;1m[1;3mUnited States 2022 – Calendar with American holidays. Yearly calendar showing months for the year 2022. Calendars – online and print frien

'In 2022, there be a calendar with American holidays. Arg!'

In [42]:
# Set up a prompt template which can interpolate the history
template_with_history = """You are SearchGPT, a professional search engine who provides informative answers to users. Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to give detailed, informative answers

Previous conversation history:
{history}

New question: {input}
{agent_scratchpad}"""

In [43]:
prompt_with_history = CustomPromptTemplate(
    template=template_with_history,
    tools=tools,
    # The history template includes "history" as an input variable so we can interpolate it into the prompt
    input_variables=["input", "intermediate_steps", "history"]
)

llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [44]:
# Initiate the memory with k=2 to keep the last two turns
# Provide the memory to the agent
memory = ConversationBufferWindowMemory(k=2)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)

In [45]:
agent_executor.run("How many people live in canada as of 2023?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the most recent population data for Canada.
Action: Search
Action Input: "Canada population 2023"[0m

Observation:[36;1m[1;3mThe current population of Canada is 38,680,894 as of Sunday, May 7, 2023, based on Worldometer elaboration of the latest United Nations data. Canada 2020 population is estimated at 37,742,154 people at mid year according to UN data.[0m
[32;1m[1;3mI now know the most recent population data for Canada.
Final Answer: As of May 7, 2023, the population of Canada is 38,680,894.[0m

[1m> Finished chain.[0m


'As of May 7, 2023, the population of Canada is 38,680,894.'

In [46]:
agent_executor.run("how about in mexico?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the current population of Mexico
Action: Search
Action Input: "current population of Mexico"[0m

Observation:[36;1m[1;3mMexico, officially the United Mexican States, is a country in the southern portion of North America. It is bordered to the north by the United States; to the south and west by the Pacific Ocean; to the southeast by Guatemala, Belize, and the Caribbean Sea; and to the east by the Gulf of Mexico.[0m
[32;1m[1;3mThat's not the answer to the question, I need to refine my search
Action: Search
Action Input: "population of Mexico 2023"[0m

Observation:[36;1m[1;3mThe current population of Mexico is 132,776,091 as of Sunday, May 7, 2023, based on Worldometer elaboration of the latest United Nations data.[0m
[32;1m[1;3mI now know the final answer
Final Answer: The current population of Mexico as of May 7, 2023 is 132,776,091.[0m

[1m> Finished chain.[0m


'The current population of Mexico as of May 7, 2023 is 132,776,091.'

In [50]:
agent_executor.run("Who won Wimbledon 2022 for men and women's title?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: This question requires information about both the men's and women's singles champions at Wimbledon 2022.
Action: Search
Action Input: "Wimbledon 2022 men's and women's singles champions"[0m

Observation:[36;1m[1;3mElena Rybakina defeated Ons Jabeur in the final, 3–6, 6–2, 6–2 to win the ladies' singles tennis title at the 2022 Wimbledon Championships.[0m
[32;1m[1;3mThe search result only provided information about the women's singles champion. I need to search again to find the men's singles champion.
Action: Search
Action Input: "Wimbledon 2022 men's singles champion"[0m

Observation:[36;1m[1;3mchampion Novak Djokovic[0m
[32;1m[1;3mI now know the final answer
Final Answer: Elena Rybakina won the ladies' singles title and Novak Djokovic won the men's singles title at the 2022 Wimbledon Championships.[0m

[1m> Finished chain.[0m


"Elena Rybakina won the ladies' singles title and Novak Djokovic won the men's singles title at the 2022 Wimbledon Championships."