In [139]:
import os
from glob import glob
from tqdm import tqdm
from langchain import hub
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain_community.document_loaders import PDFMinerLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate

In [2]:
load_dotenv("../.env")

GPT_4 = "gpt-4-1106-preview"
GPT_3 = "gpt-3.5-turbo-0125"
MANAGEMENT_DIR = "../data/management"
USER_DIR = "../data/user"
LLM = ChatOpenAI(temperature=0, model=GPT_4)
TOOLS = []
SESSION_ID = "test"
USER_ROLE = "management"
CHUNK_SIZE = 400
CHUNK_OVERLAP = 0
STORE = {}

In [130]:
system_prompt = """
You are a LEAF chatbot agent, you are required to provide answer to the question based on the embedded knowledge. Provide a short, concise and precise answers. Do not give a general answer or trying to fabricate an answer when you don't know the answer or when the question is out of this company context.
"""

In [153]:
system_prompt = '''You are Leaf's company chatbot. Answer the following questions with facts, use the right tool based on the user access level. Do not give a general answer, be specific to the question in context of this company:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question without addressing the user role

Begin!

Question: {input}
Thought:{agent_scratchpad}'''

prompt = PromptTemplate(
    input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'],
    template=system_prompt
)

In [135]:
hub_prompt = hub.pull("hwchase17/react")

In [136]:
hub_prompt

PromptTemplate(input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'], metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'react', 'lc_hub_commit_hash': 'd15fe3c426f1c4b3f37c9198853e4a86e20c425ca7f4752ec0c9b0e97ca7ea4d'}, template='Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}')

In [114]:
query = "steps to set up a home alarm system?"

In [16]:
def get_filepaths(dirs, ext):
    all_files = []
    for dir in dirs:
        pattern = os.path.join(dir, "**", f"*{ext}")
        files = tqdm(
            glob(pattern, recursive=True), desc=f"Loading {ext} files for {dir}"
        )
        # files = [file.replace("\\", "/") for file in files]
        all_files.extend(files)
    return all_files

In [95]:
def load_files(user):
    texts = []
    if user in ["management"]:
        files = get_filepaths([MANAGEMENT_DIR, USER_DIR], ".pdf")
    else:
        files = get_filepaths([USER_DIR], ".pdf")
        
    for file in files:
        loader = PDFMinerLoader(file)
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=400, 
            chunk_overlap=0,
            separators=[
                "\n\n"
            ])
        split_text = text_splitter.split_documents(loader.load())
        texts.extend(split_text)
        
    return texts

In [96]:
admin_documents = load_files("management")
user_documents = load_files("user")


Loading .pdf files for ../data/management: 100%|██████████| 3/3 [00:00<?, ?it/s]
Loading .pdf files for ../data/user: 100%|██████████| 4/4 [00:00<?, ?it/s]
Loading .pdf files for ../data/user: 100%|██████████| 4/4 [00:00<?, ?it/s]


In [None]:
# for doc in user_documents:
#     try:
#         print(doc.metadata, "\n", doc.page_content[:100], "\n", "====================")
#     except:
#         print("fail", doc)

In [110]:
admin_faiss_index = FAISS.from_documents(admin_documents, OpenAIEmbeddings())
user_faiss_index = FAISS.from_documents(user_documents, OpenAIEmbeddings())

admin_bm25_retriever = BM25Retriever.from_documents(
    documents=admin_documents,
    k=2
)
admin_bm25_retriever.k = 2
user_bm25_retriever = BM25Retriever.from_documents(
    documents=user_documents,
    k=2
)
user_bm25_retriever.k = 2

admin_ensemble_retriever = EnsembleRetriever(
    retrievers=[
        admin_bm25_retriever,
        admin_faiss_index.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={
                "score_threshold": .3,
                "k": 2}
        )
    ]
)

user_ensemble_retriever = EnsembleRetriever(
    retrievers=[
        user_bm25_retriever,
        user_faiss_index.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={
                "score_threshold": .3,
                "k": 2}
        )
    ],
    weights=[
        0.5,
        0.5
    ]
)

In [None]:
# admin_faiss_index.similarity_search_with_score(query, k=4, score_threshold=0.4)

In [None]:
# admin_ensemble_retriever.invoke(query)

In [None]:
# user_ensemble_retriever.invoke(query)

In [None]:
# retriever = admin_faiss_index.as_retriever(
#     search_type="similarity_score_threshold",
#     search_kwargs={
#         "score_threshold": .3,
#         "k": 2}
# )
# retriever.invoke(query)

In [142]:
admin_ensemble_retriever_tool = create_retriever_tool(
    retriever=admin_ensemble_retriever,
    name="admin_retriever_tool",
    description="useful for retrieving the admin vector store when querying for result based on context"
)

user_ensemble_retriever_tool = create_retriever_tool(
    retriever=user_ensemble_retriever,
    name="user_retriever_tool",
    description="useful for retrieving the user vector store when querying for result based on context"
)

tools = [admin_ensemble_retriever_tool, user_ensemble_retriever_tool]

In [154]:
agent = create_react_agent(
    llm=LLM, 
    tools=tools, 
    prompt=prompt
)

In [155]:
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True
)

In [156]:
agent_executor.invoke(
    {
        "input": f"as a {USER_ROLE}, {query}"
    }
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To provide steps for setting up a home alarm system, I need to access the company's knowledge base. Since the question is from management, I should use the admin tool to retrieve the information.

Action: admin_retriever_tool
Action Input: Steps to set up a home alarm system for management.[0m[36;1m[1;3mTest the alarm system periodically.
Ensure the alarm sounds clearly and for enough time . Make sure there are
Attach and test a backup battery to the alarm to ensure it works during a
power outage. Test and calibrate the alarm's sensors, such as the fire and
smoke detection sensors.
Test and calibrate alarm sensors periodically.

Roof Gutter

MAINTENANCE

These maintenance tips are intended to highlight 
common areas which homeowners should maintain to 
preserve the look and function of their new home. 
Several important preventive procedures are required 
at periodic intervals. Use these tips to discover and 
corr

{'input': 'as a management, steps to set up a home alarm system?',