In [1]:
# !pip install langchain
# !pip install openai
# !pip install python-dotenv
# !pip install faiss-cpu

In [2]:
import os
import warnings
warnings.filterwarnings("ignore")
from secret_key import openapi_key

os.environ['OPENAI_API_KEY'] = openapi_key
# load_dotenv()
# API_KEY = os.environ.get("API_KEY")

## Loaders  
To use data with an LLM, documents must first be loaded into a vector database. 
The first step is to load them into memory via a loader

In [3]:
from langchain.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader(
    "./FAQ", glob="**/*.txt", loader_cls=TextLoader, show_progress=True
)
docs = loader.load()

100%|██████████| 3/3 [00:00<00:00, 2522.64it/s]


## Text splitter
Texts are not loaded 1:1 into the database, but in pieces, so called "chunks". You can define the chunk size and the overlap between the chunks.

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
)

documents = text_splitter.split_documents(docs)
print(documents[0])

page_content='Q: What are the hours of operation for your restaurant?\nA: Our restaurant is open from 11 a.m. to 10 p.m. from Monday to Saturday. On Sundays, we open at 12 p.m. and close at 9 p.m.\n\nQ: What type of cuisine does your restaurant serve?\nA: Our restaurant specializes in contemporary American cuisine with an emphasis on local and sustainable ingredients.' metadata={'source': 'FAQ\\General.txt'}


## Embeddings
Texts are not stored as text in the database, but as vector representations.
Embeddings are a type of word representation that represents the semantic meaning of words in a vector space.

In [8]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)

  warn_deprecated(


## Loading Vectors into VectorDB (FAISS)
As created by OpenAIEmbeddings vectors can now be stored in the database. The DB can be stored as .pkl file

In [9]:
from langchain.vectorstores.faiss import FAISS
import pickle

vectorstore = FAISS.from_documents(documents, embeddings)

with open("vectorstore.pkl", "wb") as f:
    pickle.dump(vectorstore, f)

## Loading the database
Before using the database, it must of course be loaded again.

In [10]:
with open("vectorstore.pkl", "rb") as f:
    vectorstore = pickle.load(f)

## Prompts
With an LLM you have the possibility to give it an identity before a conversation or to define how question and answer should look like.

In [11]:
from langchain.prompts import PromptTemplate

prompt_template = """You are a helpful assistant for our restaurant.

{context}

Question: {question}
Answer here:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

## Chains
With chain classes you can easily influence the behavior of the LLM

In [13]:
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

chain_type_kwargs = {"prompt": PROMPT}

llm = OpenAI(openai_api_key=openapi_key)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs=chain_type_kwargs,
)

query = "When does the restaurant open?"
qa.run(query)

  warn_deprecated(
  warn_deprecated(


' Our restaurant opens at 11 a.m. from Monday to Saturday, and at 12 p.m. on Sundays.'

## Memory
In the example just shown, each request stands alone. A great strength of an LLM, however, is that it can take the entire chat history into account when responding. For this, however, a chat history must be built up from the different questions and answers. With different memory classes this is very easy in Langchain.

In [14]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history", return_messages=True, output_key="answer"
)

## Use Memory in Chains
The memory class can now easily be used in a chain. This is recognizable, for example, by the fact that when one speaks of "it", the bot understands the rabbit in this context.

In [18]:
from langchain.chains import ConversationalRetrievalChain

qa = ConversationalRetrievalChain.from_llm(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.7),
    memory=memory,
    retriever=vectorstore.as_retriever(),
    combine_docs_chain_kwargs={"prompt": PROMPT},
)


query = "Do you offer vegan food?"
qa({"question": query})
qa({"question": "How much does it cost?"})

Retrying langchain_community.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in organization org-GBzolOmzUHWBjRNhBNkzVVS1 on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing..


{'question': 'How much does it cost?',
 'chat_history': [HumanMessage(content='Do you offer vegan food?'),
  AIMessage(content='Yes, we offer a variety of vegan options on our menu. Our staff can help guide you to the vegan dishes we have available.'),
  HumanMessage(content='How much does it cost?'),
  AIMessage(content="I'm sorry, but I would need more specific information about the vegan dishes on our menu to provide pricing. Please refer to our menu or ask our staff for more details on the pricing of our vegan options.")],
 'answer': "I'm sorry, but I would need more specific information about the vegan dishes on our menu to provide pricing. Please refer to our menu or ask our staff for more details on the pricing of our vegan options."}