In [None]:
############# Load product catalog data into a FAISS vector store and save it to directory ############

# Load data into data frame
import pandas as pd
from langchain.docstore.document import Document

# Specify the path to your CSV file
csv_path = "data/t-2.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv(csv_path)

# Display the DataFrame
# print(df[:2])

# Generate a list of Documents
documents = []
#for index, row in df.iterrows():
#    page_content = ' '.join(row[['category-links', 'subcategory-links', 'name']].astype(str))
#    metadata = {'index': index}
#    document = Document(page_content=page_content, metadata=metadata)
#    documents.append(document)

data_list = df.to_dict(orient='records')
for item in data_list:
    page_content = ' '.join([item['category-links'], item['subcategory-links'], item['name']])
    metadata = {'index': index}
    document = Document(page_content=page_content, metadata=item)
    documents.append(document)

# The vectorstore we'll be using
from langchain.vectorstores import FAISS
# The embedding engine that will convert our text to vectors
from langchain.embeddings.openai import OpenAIEmbeddings

# Get your embeddings engine ready
embeddings = OpenAIEmbeddings(openai_api_key='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')
# Embed your documents and combine with the raw text in a pseudo db. Note: This will make an API call to OpenAI
docsearch = FAISS.from_documents(documents, embeddings)
found_docs = docsearch.similarity_search('Asus laptop')
print(found_docs);
docsearch.save_local('t2-embeddings-store')

In [None]:
############# Reload the FAISS vector store from previously saved directory ############

from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

# Get your embeddings engine ready
embeddings = OpenAIEmbeddings(openai_api_key='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')
# Embed your documents and combine with the raw text in a pseudo db. Note: This will make an API call to OpenAI
docsearch = FAISS.load_local(folder_path='t2-embeddings-store', embeddings=embeddings)

In [None]:
import json
def findProds(query) -> str:
    data_list = docsearch.similarity_search(query)
    # Define a mapping of old keys to new keys (including the NULL mappings)
    key_mapping = {
        'category-links': 'category',
        'subcategory-links' : 'subcategory',
        'subcategory-links-href' : 'subcategory-link',
        'product-links-href' : 'product-link',
        'name' : 'name',
        'price' : 'price',
        'description' : 'description'
    }
    updated_items = []
    for item in data_list:        
        updated_item = {key_mapping.get(key, key): value for key, value in item.metadata.items() if key in key_mapping}
        updated_items.append(updated_item)

    serialized_data = json.dumps(updated_items)
    return serialized_data


print(findProds('Asus laptop'))

In [None]:
###########################  define my own tools #################################################
from typing import Optional, Type
from langchain.tools.base import BaseTool
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun

class ProdSearchTool(BaseTool):
    name = "prod_search"
    description = """Search product catalog. 
    Input: product name, category and description, etc. 
    Output: a list of products in JSon. Each product has the following fields: 
        category, subcategory, subcategory-link, product-link, name, price, description.
        Compose a snippet with product-link as a href for each product to show to customer.
    """

    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
        return "Tool's final answer: " + findProds(query)
    
    async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("ProdSearchTool does not support async")
        
        
class CustServiceTool(BaseTool):
    name = "customer_service"
    description = """General customer service that handles all other than searching product catalog. 
    Input: customer request.
    Output: A web link for redirect customer."""

    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
        return "http://www.my-example.com/customer_service/"
    
    async def _arun(self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("ProdSearchTool does not support async")
        
        
tools = [ProdSearchTool(), CustServiceTool()]


In [None]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, SerpAPIWrapper, LLMChain
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish
import re

In [None]:
# Set up the base template
template = """You are a chatbot of a Web store to answer customer questions. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Note that you should give a final answer by formating the json in the Observation if the Observation has a prefix "Tool's final answer: ".

Begin!

Question: {input}
{agent_scratchpad}"""

In [None]:
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[BaseTool]
    
    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        return self.template.format(**kwargs)

In [None]:
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)
print(prompt)

In [None]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

In [None]:
output_parser = CustomOutputParser()

In [None]:
llm = OpenAI(temperature=0)
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [None]:
agent_executor.run("How do I change my account password?")

In [None]:
agent_executor.run("Do you have any asus laptop?")