In [23]:
from langchain_community.vectorstores import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import prompts

# import SystemMessagePromptTemplate
from langchain.prompts import SystemMessagePromptTemplate

import pinecone
import os
import datetime
import json
import openai
import os
import pandas as pd
import pinecone
import re
from tqdm.auto import tqdm
from typing import List, Union
import zipfile

# Langchain imports
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import BaseChatPromptTemplate, ChatPromptTemplate
from langchain import SerpAPIWrapper, LLMChain
from langchain.schema import AgentAction, AgentFinish, HumanMessage, SystemMessage
# LLM wrapper
from langchain.chat_models import ChatOpenAI
from langchain import OpenAI
# Conversational memory
from langchain.memory import ConversationBufferWindowMemory

In [24]:
# get the api keys
open_ai_key = os.getenv('OPENAI_API_KEY') or 'OPENAI_API_KEY'

# init embeddings model
embed = OpenAIEmbeddings(
    model='text-embedding-ada-002',
)

# init vector store
PINECONE_API_KEY = os.getenv('PINECONE_RAGDEMO_API_KEY') or 'PINECONE_RAGDEMO_API_KEY'
PINECONE_ENVIRONMENT = os.getenv('PINECONE_RAGDEMO_ENV') or 'PINECONE_RAGDEMO_ENV'

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENVIRONMENT
)

text_field = "text"
index_name = 'rag-demo'

# create index if it does not exist
if index_name not in pinecone.list_indexes():
    # we create a new index
    pinecone.create_index(
        name=index_name,
        metric='cosine',
        dimension=1536  # 1536 dim of text-embedding-ada-002
    )

# switch back to normal index for langchain
index = pinecone.Index(index_name)


vectorstore = Pinecone(
    index, embed, text_field
)

vector_db_index = pinecone.GRPCIndex(index_name)

# completion llm
llm_model = ChatOpenAI(
    # model_name='gpt-3.5-turbo',
    temperature=0.0
)

#qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
qa_chain = RetrievalQA.from_chain_type(
    llm=llm_model,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [25]:
retiver_tool = Tool(
    name = 'Knowledge Base',
    func=qa_chain.run,
    description="Useful for general questions about how to do things and for details on interesting topics. Input should be a fully formed question."
)
tools = [retiver_tool]

In [26]:
class CustomPromptTemplate(BaseChatPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format_messages(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
            
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        formatted = self.template.format(**kwargs)
        return [HumanMessage(content=formatted)]

In [27]:
template_with_history = """You know everything about Astrophysics. Only answer qustions by consulting your knowledge base. If you don't know the answer, say 'I don't know'.

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to give detailed, informative answers

Previous conversation history:
{history}

New question: {input}
{agent_scratchpad}"""

prompt_with_history = CustomPromptTemplate(
    template=template_with_history,
    tools=tools,
    # The history template includes "history" as an input variable so we can interpolate it into the prompt
    input_variables=["input", "intermediate_steps", "history"]
)

In [28]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        
        # Parse out the action and action input
        regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
        print(llm_output)
        match = re.search(regex, llm_output, re.DOTALL)
        
        # If it can't parse the output it raises an error
        # You can add your own logic here to handle errors in a different way i.e. pass to a human, give a canned response
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
    
output_parser = CustomOutputParser()

In [29]:
prompt_with_history = CustomPromptTemplate(
    template=template_with_history,
    tools=tools,
    input_variables=["input", "intermediate_steps", "history"]
)

llm_chain = LLMChain(llm=llm_model, prompt=prompt_with_history)
multi_tool_names = [tool.name for tool in tools]
multi_tool_agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=multi_tool_names
)

In [30]:
# Initiate our LLM - default is 'gpt-3.5-turbo'
llm = ChatOpenAI(temperature=0)

# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)

# Using tools, the LLM chain and output_parser to make an agent
tool_names = [tool.name for tool in tools]

agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    # We use "Observation" as our stop sequence so it will stop when it receives Tool output
    # If you change your prompt template you'll need to adjust this as well
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [31]:
multi_tool_memory = ConversationBufferWindowMemory(k=2)
multi_tool_executor = AgentExecutor.from_agent_and_tools(agent=multi_tool_agent, tools=tools, verbose=True, memory=multi_tool_memory)

In [32]:
multi_tool_executor.run("Tell me something about astrophysics")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I should provide a general overview of astrophysics.
Action: Knowledge Base
Action Input: "What is astrophysics?"[0m

Observation:[36;1m[1;3mAstrophysics is a branch of physics that deals with the study of celestial objects and phenomena. It involves the application of physics principles and theories to understand the behavior, composition, and evolution of stars, galaxies, planets, and other astronomical objects. Astrophysicists use observations, mathematical models, and computer simulations to study phenomena such as the formation of stars, the structure of galaxies, the behavior of black holes, and the nature of dark matter and dark energy.[0m
[32;1m[1;3mI have provided a general overview of astrophysics.
Final Answer: Astrophysics is a branch of physics that deals with the study of celestial objects and phenomena, applying physics principles and theories to understand their behavior, composition, and evolut

'Astrophysics is a branch of physics that deals with the study of celestial objects and phenomena, applying physics principles and theories to understand their behavior, composition, and evolution.'

In [33]:
multi_tool_executor.run("What is the most important aspect of these?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The most important aspect of astrophysics is understanding the behavior, composition, and evolution of celestial objects and phenomena.
Action: Knowledge Base
Action Input: Importance of astrophysics[0m

Observation:[36;1m[1;3mAstrophysics is important for several reasons:

1. Understanding the Universe: Astrophysics helps us understand the fundamental processes and phenomena that occur in the universe. It allows us to study the formation and evolution of galaxies, stars, and planets, as well as the origin of the universe itself.

2. Exploring the Unknown: Astrophysics pushes the boundaries of our knowledge and helps us explore the unknown. It allows us to study phenomena such as black holes, dark matter, and dark energy, which are still not fully understood.

3. Technological Advancements: Many technological advancements have been made as a result of astrophysics research. For example, the development of telescop

'The most important aspect of astrophysics is understanding the behavior, composition, and evolution of celestial objects and phenomena. Additionally, astrophysics is important for understanding the universe, exploring the unknown, driving technological advancements, inspiring the next generation, and leading to practical applications in other fields.'

In [34]:
multi_tool_executor.run("Tell me something about unicorns")



[1m> Entering new AgentExecutor chain...[0m


ValueError: Could not parse LLM output: `I'm sorry, but I am an AI trained in astrophysics and do not have knowledge about unicorns.`