In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
OPENAI_API_KEY=os.getenv('OPENAI_API_KEY')

In [4]:
# loader
# use the data from documents to create vector store
from langchain.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader('./data', glob="**/*.txt", loader_cls=TextLoader, show_progress=True)

data = loader.load()

100%|██████████| 9/9 [00:00<00:00, 8996.36it/s]


In [5]:
# chunking the data to store in the database
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)

documents = text_splitter.split_documents(data)

In [8]:
# embeddings
# from langchain.embeddings import HuggingFaceInstructEmbeddings
# embeddings = HuggingFaceInstructEmbeddings(model_name='hkunlp/instructor-large')

.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.41k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512


In [6]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [12]:
# loading vectors into VectorDB
from langchain.vectorstores.faiss import FAISS

vectorstore = FAISS.from_documents(documents, embeddings)
vectorstore.save_local("vectorstore")

In [13]:
# load the vectorstore
vectorstore = FAISS.load_local("vectorstore", embeddings)

In [20]:
# Prompt to answer question
from langchain.prompts import PromptTemplate

prompt_template = """You are a helpful assistant for our restaurant that answers the queries of the customer

{context}
Question: {question}
Answer here: """

PROMPT = PromptTemplate(
    template = prompt_template, input_variables=["context","question"]
)

In [32]:
# memory to store chat history
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")

In [33]:
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain

chain_type = {"prompt": PROMPT}
llm = OpenAI(openai_api_key=OPENAI_API_KEY)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    memory=memory,
    retriever=vectorstore.as_retriever(),
    combine_docs_chain_kwargs=chain_type
)

In [44]:
query="How do I complain about the staff?"


In [45]:
qa_chain({"question": query})

{'question': 'How do I complain about the staff?',
 'chat_history': [HumanMessage(content='Can you give me information about the restaurant?'),
  AIMessage(content='\nSure! Our restaurant is called Gourmet Bistro and we are located at 123 Main Street in Downtown City. We offer a cozy and inviting ambiance, with warm lighting, comfortable seating, and tasteful artwork. Our menu includes a variety of starters, main courses, desserts, and beverages. We also have daily specials and a happy hour from 4 PM to 6 PM. We offer both takeout and delivery services, and we have a parking lot available for customers. We also have an outdoor seating area, and service animals are always welcome. We prioritize providing a great dining experience for our customers and we look forward to serving you.'),
  HumanMessage(content='Do you have parking or not?'),
  AIMessage(content="\nYes, we have a parking lot available for customers. Additionally, there's street parking nearby."),
  HumanMessage(content='An