## Load documents

In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader

loader = DirectoryLoader('./docs', glob="**/*.pdf",show_progress=True, loader_cls=PyPDFLoader)

docs = loader.load()

docs_length = len(docs)
print(docs_length)
print(docs[0])

100%|██████████| 1/1 [00:00<00:00,  1.30it/s]

13
page_content=' \n \nTHIS TOOL AND HUNDREDS MORE AVAILABLE IN THE HR TOOLBOX AT http://hrinsider.ca/.  \nTemplates and tools from HR Insider are provided for members of our service. Members may use this document as is or as a \nstarting point to customize their own documents. HR Insider assumes no responsibility for the effectiveness or legality of an y of \nits online templates or tools. Always consult your legal c ounsel and management before implementing any new policies or \nprocedures.  CAR ALLOWANCE  POLICY  \nPolicy Title  Car Allowance Policy  \nPolicy Owner  Human Resources  \nPolicy \nApprover(s)  Vice President of Human Resources  \nRelated Policies  Name other related enterprise policies both within or external to \nthis policy.  \nRelated \nProcedures  Name other related enterprise procedures both within or external \nto this policy.  \nStorage \nLocation  Describe physical or digital location of copies of this policy.  \nEffective Da te List the date that this policy we




## Chunking

By this we ensure that we're embedding a piece of content with as little noise as possible that is still semantically relevant with the help of overlaps

In [2]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 100)

document_chunks = text_splitter.split_documents(docs)

print(f"Number documents {len(docs)}")
print(f"Number chunks {len(document_chunks)}")
print(document_chunks[0])


Number documents 13
Number chunks 34
page_content='THIS TOOL AND HUNDREDS MORE AVAILABLE IN THE HR TOOLBOX AT http://hrinsider.ca/.  \nTemplates and tools from HR Insider are provided for members of our service. Members may use this document as is or as a \nstarting point to customize their own documents. HR Insider assumes no responsibility for the effectiveness or legality of an y of \nits online templates or tools. Always consult your legal c ounsel and management before implementing any new policies or \nprocedures.  CAR ALLOWANCE  POLICY  \nPolicy Title  Car Allowance Policy  \nPolicy Owner  Human Resources  \nPolicy \nApprover(s)  Vice President of Human Resources  \nRelated Policies  Name other related enterprise policies both within or external to \nthis policy.  \nRelated \nProcedures  Name other related enterprise procedures both within or external \nto this policy.  \nStorage \nLocation  Describe physical or digital location of copies of this policy.  \nEffective Da te List 

## Creating Embeddings
Embeddings transform your text into a vector (a series of numbers that hold the semantic 'meaning' of your text). Vectors are often used when comparing two pieces of text together

 A vectorstore stores these embeddings , and provides fast ways to look up relevant Documents by embeddings

 Example

|Embedding|Metadata|
|---|---|
|[-0.00015641732898075134, -0.003165106289088726, ...]	|{'date' : '1/2/23}|
|[-0.00035465431654651654, 1.4654131651654516546, ...]	|{'date' : '1/3/23}|


* Each input must not exceed the max input tokens for the model (Example: 8191 tokens for text-embedding-ada-002)

* 1 token ~4 characters

In [18]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.embeddings import HuggingFaceEmbeddings
import os
os.environ["OPENAI_API_KEY"] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(document_chunks,embeddings)
db.save_local("./faiss_index")


## Loading the index

In [2]:
import os
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

os.environ["OPENAI_API_KEY"] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

embeddings = OpenAIEmbeddings()
new_db = FAISS.load_local("./faiss_index", embeddings)

query = "what happens when an essential car user leaves his/her job role?"
docs = new_db.similarity_search(query)

docs[0].page_content

'Failure by the employee to notify the [organization]  of any motoring offences or \ndisqualification from driving may result in further action being taken against the \nemployee.  \nCHANGE OF JOB ROLE  \nWhen an essential car use r leaves his/her job role, the requirement for that role to be \ndesignated for an essential car user will be reviewed.  \nEmployees who are essential car users who change job and whose new role requires \nan essential car user will be eligible for a [organizatio n] car or monthly car allowance.  \nEmployees who are essential car users and who are promoted to a job which still \nrequires an essential car user and which carries a higher allowance, will continue with'

## Using Conversational Retrieval chain

Chains allow us to combine multiple components together to create a single, coherent application. For example, we can create a chain that takes user input, formats it with a PromptTemplate, and then returns response from LLM

In [11]:
from langchain.chains import ConversationalRetrievalChain
from langchain import OpenAI, PromptTemplate



chat_history=[]
llm=OpenAI(temperature=0)
prompt_template = """Answer the question from information in index only. Dont make up if you don't know the answer, just say I don't know. Also consider the history of chat and make the user query more meaning full, give sources where possible and construct a detailed response wherever possible, don't just give a one liner.

{chat_history}
Question: {question}
"""
prompt = PromptTemplate(
    input_variables=["chat_history","question"], 
    template=prompt_template

)

def llm_answer(query):
   result = qa({"question": query, "chat_history": chat_history})
   populateHistory(result["answer"],query)
   return result


def populateHistory(answer, question):
    chat_history.append((question, answer))
    if len(chat_history) > 10:
        chat_history.pop(0)
        chat_history.pop(0)

qa = ConversationalRetrievalChain.from_llm(llm, new_db.as_retriever(), condense_question_prompt=prompt, return_source_documents=True)

query = "Breiefly summarize the car allowance policy?"
print("Query: " + query)
q_answer = llm_answer(query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))

print("================================================================================================================================")


query = "What are offences that are not covered in it and employees are responsible for those offences?"
print("Query: " + query)
q_answer = llm_answer(query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))

print("================================================================================================================================")

query = "What is the policy of employee who is absent from work?"
q_answer = llm_answer(query)
print("Query: " + query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))




Query: Breiefly summarize the car allowance policy?
ANSWER:  The car allowance policy provides two options for essential car users: a fully maintained company car supplied on contract hire or a monthly car allowance paid into salary. The monthly car allowance is equivalent in value to the provision of a company car with an additional payment of [x] towards the cost of the employee's car insurance premium. The allowance is non-superannuable and is not subject to any annual pay award.
car-allowance-policy-hr-insider.pdf - Page: 0
Query: What are offences that are not covered in it and employees are responsible for those offences?
ANSWER: 
Yes, the car allowance policy does not cover offences that employees may commit while driving. Employees are responsible for any fines or penalties incurred by breaking the law and the company will not reimburse any costs incurred. Additionally, the company reserves the right to take disciplinary action against an employee if they are convicted of a dri

In [13]:
from langchain.chains import ConversationalRetrievalChain
from langchain import OpenAI, PromptTemplate



chat_history=[]
llm=OpenAI(temperature=0)
prompt_template = """Answer the question from information in index only. Dont make up if you don't know the answer, just say I don't know. Also consider the history of chat and make the user query more meaning full, give sources where possible and construct a detailed response wherever possible, don't just give a one liner.

{chat_history}
Question: {question}
"""
prompt = PromptTemplate(
    input_variables=["chat_history","question"], 
    template=prompt_template

)

def llm_answer(query):
   result = qa({"question": query, "chat_history": chat_history})
   populateHistory(result["answer"],query)
   return result


def populateHistory(answer, question):
    chat_history.append((question, answer))
    if len(chat_history) > 10:
        chat_history.pop(0)
        chat_history.pop(0)

qa = ConversationalRetrievalChain.from_llm(llm, new_db.as_retriever(), condense_question_prompt=prompt, return_source_documents=True)

query = "Breiefly summarize the car allowance policy?"
print("Query: " + query)
q_answer = llm_answer(query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))

print("================================================================================================================================")


query = "What are offences that are not covered in it and employees are responsible for those offences?"
print("Query: " + query)
q_answer = llm_answer(query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))

print("================================================================================================================================")

query = "What is the policy of employee who is absent from work?"
q_answer = llm_answer(query)
print("Query: " + query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))




Query: Breiefly summarize the car allowance policy?
ANSWER:  The car allowance policy provides two options for essential car users: a fully maintained company car supplied on contract hire or a monthly car allowance paid into salary. The monthly car allowance is equivalent in value to the provision of a company car, plus an additional payment of [x] as a contribution towards the cost of the employee's car insurance premium. The allowance is non-superannuable and not subject to any annual pay award. It will be subject to review at intervals and amended accordingly.
car-allowance-policy-hr-insider.pdf - Page: 0
Query: What are offences that are not covered in it and employees are responsible for those offences?
ANSWER:  Yes, offences that are not covered in the car allowance policy and for which employees are responsible include driving without a valid license, driving without insurance, driving under the influence of drugs or alcohol, and any other offence that is considered to be a cri