### LangChain_ReAct_Demo
A QnA system that employs logical reasoning and tools to handle complex inquiries, such as synthesizing information from policy document, using custom tools, etc to provide final response.

In [17]:
!pip install langchain langchain_community
!pip install openai
!pip install chromadb
!pip install tiktoken



In [18]:
import os
import openai

os.environ["OPENAI_API_KEY"] = ""
openai.api_key = os.environ["OPENAI_API_KEY"]

In [19]:
import os
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.retrievers import ParentDocumentRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.storage._lc_store import create_kv_docstore
from langchain.storage import LocalFileStore
from langchain.agents import Tool
from langchain.document_loaders import TextLoader
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain import hub
from langchain.pydantic_v1 import BaseModel, Field
from langchain.agents import create_openai_tools_agent
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.pydantic_v1 import BaseModel, Field
from langchain.agents import tool
import numpy as np

In [7]:
import langchain

langchain.__version__  # Should be 0.1.0 , latest version giving errors while using OAI tools agent

'0.2.10'

In [8]:
import openai

openai.__version__

'1.36.1'

### Setting up LLM

In [20]:
OPENAI_DEPLOYMENT_NAME = "gpt-35-turbo-16k"
OPENAI_DEPLOYMENT_ENDPOINT = "https://<???>.openai.azure.com/"
OPENAI_DEPLOYMENT_VERSION = "2023-12-01-preview"
OPENAI_MODEL_NAME = "gpt-35-turbo-16k"

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = "text-embedding-ada"
OPENAI_ADA_EMBEDDING_MODEL_NAME = "text-embedding-ada-002"

llm = AzureChatOpenAI(
    deployment_name=OPENAI_DEPLOYMENT_NAME,
    model_name=OPENAI_MODEL_NAME,
    openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_version=OPENAI_DEPLOYMENT_VERSION,
    openai_api_key=openai.api_key,
    openai_api_type="azure",
    temperature=0.1,
)

embeddings = OpenAIEmbeddings(
    deployment=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME,
    model=OPENAI_ADA_EMBEDDING_MODEL_NAME,
    openai_api_base=OPENAI_DEPLOYMENT_ENDPOINT,
    openai_api_type="azure",
    chunk_size=1,
    openai_api_key=openai.api_key,
    openai_api_version=OPENAI_DEPLOYMENT_VERSION,
)



### Loading documents

In [24]:
!pip install langchain_google_community
from langchain_google_community import GoogleDriveLoader



In [29]:
loader = GoogleDriveLoader(
    folder_id="1_qnUoHCuJ4L7xD5jK5dI8-OJb7kqVfkV",
    file_types=["document"]
    )

documents = loader.load()

RefreshError: ("Failed to retrieve http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/?recursive=true from the Google Compute Engine metadata service. Status: 404 Response:\nb''", <google_auth_httplib2._Response object at 0x7b1b22f21150>)

In [None]:
persist_directory = "local_vectorstore"
local_store = "local_docstore"
collection_name = "hrpolicy"
PROJECT_ROOT = "...."  # insert your project root directory name here

### Creating Retriever

In [None]:
# This text splitter is used to create the child documents
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    persist_directory=os.path.join(PROJECT_ROOT, "data", persist_directory),
    collection_name=collection_name,
    embedding_function=embeddings,
)
# The storage layer for the parent documents
local_store = LocalFileStore(os.path.join(PROJECT_ROOT, "data", local_store))
store = create_kv_docstore(local_store)
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

In [None]:
# run only once
# vectorstore.persist()
# retriever.add_documents(documents, ids=None)

### QnA System

#### correct responses

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)

In [None]:
response = qa({"query": "What is the allocated budget for communication initiatives?"})
response

In [None]:
qa({"query": "How many maternity leaves are offered?"})

In [None]:
qa({"query": "What is the probationary period?"})

In [None]:
qa({"query": "Is the work hours in Germany different than United States?"})

In [None]:
qa({"query": "What is the probationary period for new employees in USA?"})

In [None]:
qa(
    {
        "query": "What is the difference in the number of work hours in Germany vs. United States?"
    }
)

In [None]:
qa({"query": "Can I reimburse travel expenses?"})

In [None]:
qa(
    {
        "query": "I started with the company on 1 December 2023.\
    Is my probationary period over if the date today is 26 Jan 2024?"
    }
)

#### incorrect responses

In [None]:
# incorrect as the conversion used is wrong. We need to fix this!
qa(
    {
        "query": "What is the percentage difference in the annual budget for Japan and US?"
    }
)

In [None]:
# Results are still slightly off
qa(
    {
        "query": "What is the percentage difference in the annual budget for Japan and US if 1 USD = 147.72 JPY?"
    }
)

In [None]:
# incorrect as technically US has higher budget after conversion
qa({"query": "Which country has the highest budget?"})

### Setting up ReAct Agent

#### Common Tools

In [None]:
# convert PDR retriever into a tool
tool_search = create_retriever_tool(
    retriever,
    "search_hr_policy",
    "Searches and returns excerpts from the HR policy.",
)

In [None]:
# under the hood it will call the get_relevant_documents() function and return the list of parent chunks
tool_search.func

In [None]:
# useful to check the schema to verify the expected parameters for the function
tool_search.args_schema.schema()

In [None]:
def value_to_float(x):
    if int(x):
        return int(x)
    if type(x) == float or type(x) == int:
        return x
    x = x.upper()
    if "MILLION" in x:
        if len(x) > 1:
            return float(x.replace("MILLION", "")) * 1000000
        return 1000000.0
    if "BILLION" in x:
        return float(x.replace("BILLION", "")) * 1000000000

    return 0.0


def convert_currency_to_usd(amount: str) -> int:
    "Converts currency into USD"

    if "¥" in amount:
        exclude_symbol = amount.replace("¥", "")
        amount_in_numbers = value_to_float(exclude_symbol)
        return amount_in_numbers / 147.72
    if "$" in amount:
        exclude_symbol = amount.replace("$", "")
        return value_to_float(exclude_symbol)
    if "JPY" in amount:
        exclude_symbol = amount.replace("JPY", "")
        return int(exclude_symbol) / 147.72
    if "USD" in amount:
        return amount


# It is okay to define single-input tools in this manner.
currency_conversion = Tool(
    name="Currency_conversion",
    func=convert_currency_to_usd,
    description="useful for converting currency into USD. Input should be an amount.",
)

In [None]:
# for multi-input tool, useful to define schema class
class Metrics(BaseModel):
    num1: float = Field(description="Value 1")
    num2: float = Field(description="Value 2")


@tool(args_schema=Metrics)
def perc_diff(num1: float, num2: float) -> float:
    """Calculates the percentage difference between two numbers"""
    return (np.abs(num1 - num2) / ((num1 + num2) / 2)) * 100

#### Common Agent prompts

In [None]:
prompt_react = hub.pull("hwchase17/react")
print(prompt_react.template)

#### Running the ReAct agent

In [None]:
# List of tools to be used
tools = [tool_search]

In [None]:
# only creates the logical steps for us
react_agent = create_react_agent(llm, tools=tools, prompt=prompt_react)

In [None]:
# executes the logical steps we created
react_agent_executor = AgentExecutor(
    agent=react_agent, tools=tools, verbose=True, handle_parsing_errors=True
)

In [None]:
query = "As per the HR policy, Which country has the highest budget?"
react_agent_executor.invoke({"input": query})

In [None]:
query = "Which of the two countries has the highest budget - Japan or Unites States?"
react_agent_executor.invoke({"input": query})

In [None]:
query = "How much is the budget for Japan different than United States?"
react_agent_executor.invoke({"input": query})

In [None]:
tools = [tool_search, currency_conversion]

react_agent = create_react_agent(llm, tools=tools, prompt=prompt_react)
react_agent_executor = AgentExecutor(
    agent=react_agent, tools=tools, verbose=True, handle_parsing_errors=True
)

query = "Is the budget for Japan different than United States?"
react_agent_executor.invoke({"input": query})

In [None]:
# Gives close enough response but can be improved with a calculator
query = "Calculate the difference in company budget for Japan and United States?"
react_agent_executor.invoke({"input": query})

In [None]:
# This will result in an error as ReAct agents cannot handle multi input tools like percentage_calculator
tools = [tool_search, currency_conversion, percentage_calculator]

react_agent = create_react_agent(llm, tools=tools, prompt=prompt_react)
react_agent_executor = AgentExecutor(
    agent=react_agent, tools=tools, verbose=True, handle_parsing_errors=True
)

react_agent = create_react_agent(llm, tools, prompt_react)

#### Running the OpenAI Tools Agent

P.S. This point onwards, I switched to gpt4 for better results

In [None]:
# defining prompt

system_message = """
You are very helpful and try your best to answer the questions.
"""

prompt_oai_tools_simple = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

In [None]:
tools = [tool_search, currency_conversion, perc_diff]
prompt_oai_tools = hub.pull("hwchase17/openai-tools-agent")
oaitools_agent = create_openai_tools_agent(llm, tools, prompt_oai_tools)
oaitools_agent_executor = AgentExecutor(
    agent=oaitools_agent, tools=tools, verbose=True, handle_parsing_errors=True
)

query = "As per the HR policy, compare the budgets for Japan and US."
oaitools_agent_executor.invoke({"input": query})

If results are not satisfactory, try modifying and using `prompt_oai_tools_simple`