## Load documents

In [3]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader

loader = DirectoryLoader('./docs', glob="**/*.pdf",show_progress=True, loader_cls=PyPDFLoader)

docs = loader.load()

docs_length = len(docs)
print(docs_length)
print(docs[0])

100%|██████████| 1/1 [00:00<00:00,  2.00it/s]

13
page_content=' \n \nTHIS TOOL AND HUNDREDS MORE AVAILABLE IN THE HR TOOLBOX AT http://hrinsider.ca/.  \nTemplates and tools from HR Insider are provided for members of our service. Members may use this document as is or as a \nstarting point to customize their own documents. HR Insider assumes no responsibility for the effectiveness or legality of an y of \nits online templates or tools. Always consult your legal c ounsel and management before implementing any new policies or \nprocedures.  CAR ALLOWANCE  POLICY  \nPolicy Title  Car Allowance Policy  \nPolicy Owner  Human Resources  \nPolicy \nApprover(s)  Vice President of Human Resources  \nRelated Policies  Name other related enterprise policies both within or external to \nthis policy.  \nRelated \nProcedures  Name other related enterprise procedures both within or external \nto this policy.  \nStorage \nLocation  Describe physical or digital location of copies of this policy.  \nEffective Da te List the date that this policy we




## Chunking

In [5]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 1000,
    chunk_overlap  = 100)

document_chunks = text_splitter.split_documents(docs)

print(f"Number documents {len(docs)}")
print(f"Number chunks {len(document_chunks)}")
print(document_chunks[0])


Number documents 13
Number chunks 34
page_content='THIS TOOL AND HUNDREDS MORE AVAILABLE IN THE HR TOOLBOX AT http://hrinsider.ca/.  \nTemplates and tools from HR Insider are provided for members of our service. Members may use this document as is or as a \nstarting point to customize their own documents. HR Insider assumes no responsibility for the effectiveness or legality of an y of \nits online templates or tools. Always consult your legal c ounsel and management before implementing any new policies or \nprocedures.  CAR ALLOWANCE  POLICY  \nPolicy Title  Car Allowance Policy  \nPolicy Owner  Human Resources  \nPolicy \nApprover(s)  Vice President of Human Resources  \nRelated Policies  Name other related enterprise policies both within or external to \nthis policy.  \nRelated \nProcedures  Name other related enterprise procedures both within or external \nto this policy.  \nStorage \nLocation  Describe physical or digital location of copies of this policy.  \nEffective Da te List 

## Embedding

In [20]:
import time
from langchain.embeddings import VertexAIEmbeddings
import pandas as pd
import json

print('start')
def handle_quota_errors(func, *args, retry_delay=5,backoff_factor=2, **kwargs):
    retries = 0

    try:
      return func(*args, **kwargs)
    except Exception as e:
      print(f"error: {e}")
      retries += 1
      wait = retry_delay * (backoff_factor ** retries)
      time.sleep(wait)
      print("wait for {wait} seconds")


print('Creating embeddings')
embeddings = VertexAIEmbeddings()
text = [doc.page_content for doc in document_chunks]
print(len(text))
#df = pd.DataFrame(document_chunks, columns =['page_content','metadata'])

index_embeddings = []

print('Iterating...')
index=0
for doc in text:
  index=index+1
  print(doc)
  print(f"Get embedding and write document for document {index} of {len(text)}")
  embedding = handle_quota_errors(embeddings.embed_query, doc)

  if embedding is not None:

    doc_id=f"{index}.txt"
    embedding_dict = {
              "id": doc_id,
              "embedding": [str(value) for value in embedding],
    }
    index_embeddings.append(json.dumps(embedding_dict) + "\n")

    doc_id = f"{index}.txt"
    with open(f"./documents/{doc_id}", "w") as document:
      document.write(doc)


with open("embeddings.json", "w") as f:
    f.writelines(index_embeddings)

start
Creating embeddings
34
Iterating...
THIS TOOL AND HUNDREDS MORE AVAILABLE IN THE HR TOOLBOX AT http://hrinsider.ca/.  
Templates and tools from HR Insider are provided for members of our service. Members may use this document as is or as a 
starting point to customize their own documents. HR Insider assumes no responsibility for the effectiveness or legality of an y of 
its online templates or tools. Always consult your legal c ounsel and management before implementing any new policies or 
procedures.  CAR ALLOWANCE  POLICY  
Policy Title  Car Allowance Policy  
Policy Owner  Human Resources  
Policy 
Approver(s)  Vice President of Human Resources  
Related Policies  Name other related enterprise policies both within or external to 
this policy.  
Related 
Procedures  Name other related enterprise procedures both within or external 
to this policy.  
Storage 
Location  Describe physical or digital location of copies of this policy.  
Effective Da te List the date that this policy

## Creating Index

In [21]:
from langchain.vectorstores import FAISS

embeddings_vtx = VertexAIEmbeddings()
db = FAISS.from_documents(document_chunks,embeddings_vtx)

query = "What is the entitlement to drive?"
docs = db.similarity_search(query)

print(docs[0].page_content)

from pay, and for any road traffic offences where an employee is s ubsequently 
charged and convicted.     
If an essential car user receives fixed penalty points or endorsements to his/her 
license, the employee must present the amended license to the [organization]  
immediately and a copy will be held on the employee’s pers onal file.  
In the event of an employee being disqualified from driving, a meeting will take place 
between the employee, their line manager/Head of Department and Human 
Resources in order to discuss the potential impact of this on the employee’s ability to 
carry out their job role.  In the case of disqualification, Heads of Department/line 
managers may discuss whether the allowance or [organization]  car may be 
withdrawn or continued.  The longer the disqualification the more likely it is that the 
allowance o r car may be withdrawn immediately.  
Failure by the employee to notify the [organization]  of any motoring offences or


## Saving the index

In [26]:
db.save_local("./index/faiss_index")
embeddings_vtx_load = VertexAIEmbeddings()
new_db = FAISS.load_local("./index/faiss_index", embeddings_vtx_load)

query = "what happens when an essential car user leaves his/her job role?"
docs = new_db.similarity_search(query)

docs[0].page_content

'Failure by the employee to notify the [organization]  of any motoring offences or \ndisqualification from driving may result in further action being taken against the \nemployee.  \nCHANGE OF JOB ROLE  \nWhen an essential car use r leaves his/her job role, the requirement for that role to be \ndesignated for an essential car user will be reviewed.  \nEmployees who are essential car users who change job and whose new role requires \nan essential car user will be eligible for a [organizatio n] car or monthly car allowance.  \nEmployees who are essential car users and who are promoted to a job which still \nrequires an essential car user and which carries a higher allowance, will continue with'

## Using Conversational Retrieval chain

In [64]:
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import VertexAI
from langchain import PromptTemplate


llm = VertexAI()
prompt_template = """Use the following pieces of context to answer the question at the end. Dont make up if you don't know the answer, just say I don't know. Also consider the history of chat and make the user query more meaning full, give sources where possible and construct a detailed response wherever possible, don't just give a one liner.

{context}
{chat_history}
Question: {question}
"""
prompt = PromptTemplate(
    input_variables=["chat_history", "context","question"], 
    template=prompt_template

)

def llm_answer(query):
   result = qa({"question": query, "chat_history": chat_history})
   #print(result)
   return result


def populateHistory(answer, question):
    chat_history.append((question, answer))
    if len(chat_history) > 10:
        chat_history.pop(0)
        chat_history.pop(0)
    print(chat_history)

qa = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(), condense_question_prompt=prompt, return_source_documents=True)


query = "What are offences that are not covered in insurance policy and employees are responsible for those offences? Give all offences list."
#query = "Which vehicles are covered in insurance policy?"
print("Query: " + query)
q_answer = llm_answer(query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))

print("================================================================================================================================")

query = "What is the car policy of employee who is absent from work?"
q_answer = llm_answer(query)
print(f"ANSWER: "+str(q_answer["answer"]))
print(q_answer['source_documents'][0].metadata["source"].replace("docs\\","")+" - Page: "+str(q_answer['source_documents'][0].metadata["page"]))




RetryError: Deadline of 120.0s exceeded while calling target function, last exception: 503 Getting metadata from plugin failed with error: ('invalid_grant: Token has been expired or revoked.', {'error': 'invalid_grant', 'error_description': 'Token has been expired or revoked.'})