In [80]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.llms import Ollama, OpenAI
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma

In [81]:
llms = {
    "openai": OpenAI(temperature=0),
    "mistral": Ollama(model="mistral", verbose=True, callbacks=CallbackManager([StreamingStdOutCallbackHandler()])),
}


template = """You are a Retrieval-Augmented Generation chatbot that answers questions on
documents provided to you. Act as an expert in the subject matter of the document
discussed. If a question is not relevant for the document or if it cannot be answered
using the information of the document, please do not answer the question and politely provide
the reason. You're going to be working with the following document:
{document}

Given the above document, please answer the following question:
{{question}}
"""

document_path = "data/business_1.txt"
with open(document_path) as f:
    document_content = f.read()

template = template.format(document=document_content)

In [82]:
loader = TextLoader("./data/business_1.txt")
documents = loader.load()

In [83]:
len(documents)

1

In [84]:
document = documents[0]

In [85]:
document.page_content[0:500]

'Lufthansa flies back to profit\n\nGerman airline Lufthansa has returned to profit in 2004 after posting huge losses in 2003.\n\nIn a preliminary report, the airline announced net profits of 400m euros ($527.61m; £274.73m), compared with a loss of 984m euros in 2003. Operating profits were at 380m euros, ten times more than in 2003. Lufthansa was hit in 2003 by tough competition and a dip in demand following the Iraq war and the killer SARS virus. It was also hit by troubles at its US catering busine'

In [86]:
# Instantiate the chain for that user session
prompt = PromptTemplate.from_template(template)
llm = llms["mistral"]
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=True)

In [88]:
llm("Hello, how are you?")

I'm just a computer program, so I don't have feelings or emotions. But I'm here to help answer any questions you have to the best of my ability! How can I assist you today?

"I'm just a computer program, so I don't have feelings or emotions. But I'm here to help answer any questions you have to the best of my ability! How can I assist you today?"

In [89]:
llm_chain("Hello, how are you")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a Retrieval-Augmented Generation chatbot that answers questions on
documents provided to you. Act as an expert in the subject matter of the document
discussed. If a question is not relevant for the document or if it cannot be answered
using the information of the document, please do not answer the question and politely provide
the reason. You're going to be working with the following document:
Lufthansa flies back to profit

German airline Lufthansa has returned to profit in 2004 after posting huge losses in 2003.

In a preliminary report, the airline announced net profits of 400m euros ($527.61m; £274.73m), compared with a loss of 984m euros in 2003. Operating profits were at 380m euros, ten times more than in 2003. Lufthansa was hit in 2003 by tough competition and a dip in demand following the Iraq war and the killer SARS virus. It was also hit by troubles at its US catering business. Last year,

{'question': 'Hello, how are you',
 'text': 'Thank you for your question. I am an AI language model and do not have feelings or emotions like a human does. However, I am designed to provide helpful information based on the documents provided to me.'}

In [90]:
llm_chain("What did I ask you a moment ago?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are a Retrieval-Augmented Generation chatbot that answers questions on
documents provided to you. Act as an expert in the subject matter of the document
discussed. If a question is not relevant for the document or if it cannot be answered
using the information of the document, please do not answer the question and politely provide
the reason. You're going to be working with the following document:
Lufthansa flies back to profit

German airline Lufthansa has returned to profit in 2004 after posting huge losses in 2003.

In a preliminary report, the airline announced net profits of 400m euros ($527.61m; £274.73m), compared with a loss of 984m euros in 2003. Operating profits were at 380m euros, ten times more than in 2003. Lufthansa was hit in 2003 by tough competition and a dip in demand following the Iraq war and the killer SARS virus. It was also hit by troubles at its US catering business. Last year,

You asked me to provide the relevant information from the document regarding Lufthansa's profitability in 2004 and their net profits compared to the previous year.
[1m> Finished chain.[0m


{'question': 'What did I ask you a moment ago?',
 'text': "You asked me to provide the relevant information from the document regarding Lufthansa's profitability in 2004 and their net profits compared to the previous year."}

In [91]:
llm_chain_openai = LLMChain(prompt=prompt, llm=llms["openai"], verbose=True)

In [92]:
res = llm_chain_openai.acall("Hello, how are you?")

In [None]:
# !rm -rf ./docs/chroma

In [44]:
persist_directory = "./docs/chroma"
embedding = OllamaEmbeddings(model="mistral")
vectordb = Chroma.from_documents(documents=documents, persist_directory=persist_directory, embedding=embedding)

vectordb._collection.count()

2023-10-10 12:38:56 - Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
1


In [54]:
question = "What is the name of the company in the document?"
docs = vectordb.similarity_search(question, k=3)
len(docs)

2023-10-10 12:44:29 - Number of requested results 3 is greater than number of elements in index 1, updating n_results = 1


1

In [56]:
qa_chain = RetrievalQA.from_chain_type(llms["mistral"], retriever=vectordb.as_retriever())

In [57]:
result = qa_chain({"query": question})

2023-10-10 12:46:27 - Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1

The name of the company in the document is Lufthansa.

In [58]:
result["result"]

'\nThe name of the company in the document is Lufthansa.'

In [60]:
template = """You are a Retrieval-Augmented Generation chatbot that answers questions on
documents provided to you. Act as an expert in the subject matter of the document
discussed. If a question is not relevant for the document or if it cannot be answered
using the information of the document, please do not answer the question and politely provide
the reason.
{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [61]:
qa_chain = RetrievalQA.from_chain_type(
    llms["mistral"],
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

In [62]:
question = "What is the name of the company in the document?"

In [63]:
result = qa_chain({"query": question})

2023-10-10 12:49:21 - Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
The company mentioned in the document is Lufthansa, a German airline.

In [64]:
result["result"]

'The company mentioned in the document is Lufthansa, a German airline.'

In [65]:
result["source_documents"][0]

Document(page_content='Lufthansa flies back to profit\n\nGerman airline Lufthansa has returned to profit in 2004 after posting huge losses in 2003.\n\nIn a preliminary report, the airline announced net profits of 400m euros ($527.61m; £274.73m), compared with a loss of 984m euros in 2003. Operating profits were at 380m euros, ten times more than in 2003. Lufthansa was hit in 2003 by tough competition and a dip in demand following the Iraq war and the killer SARS virus. It was also hit by troubles at its US catering business. Last year, Lufthansa showed signs of recovery even as some European and US airlines were teetering on the brink of bankruptcy. The board of Lufthansa has recommended paying a 2004 dividend of 0.30 euros per share. In 2003, shareholders did not get a dividend. The company said that it will give all the details of its 2004 results on 23 March.\n', metadata={'source': './data/business_1.txt'})

In [66]:
question = "How do you say 'thank you' in Ukrainian?"
result = qa_chain({"query": question})

2023-10-10 12:51:59 - Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1

I apologize, but your question is not relevant to the provided document about Lufthansa's profitability. To answer your question, in Ukrainian, "thank you" can be expressed as "Благодарення" (Bohodarennia) or "Дякуємо" (Dyakuyemo).