# Install Packages

In [1]:
#!pip3 install langchain

# Import packages

In [6]:
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import ElasticsearchStore
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.runnable import RunnableLambda
from langchain.schema import HumanMessage
from urllib.request import urlopen
import os, json
 
load_dotenv()
 
openai_api_key=os.getenv('OPENAI_API_KEY')
elastic_user=os.getenv('ES_USER')
elastic_password=os.getenv('ES_PASSWORD')
elastic_endpoint=os.getenv("ES_ENDPOINT")
elastic_index_name='langchain-rag'

# Add documents and split them into passages

In [7]:
with open('workplace-docs.json') as f:
   workplace_docs = json.load(f)
 
print(f"Successfully loaded {len(workplace_docs)} documents")

Successfully loaded 15 documents


In [8]:
metadata = []
content = []
 
for doc in workplace_docs:
  content.append(doc["content"])
  metadata.append({
      "name": doc["name"],
      "summary": doc["summary"],
      "rolePermissions":doc["rolePermissions"]
  })
 
text_splitter = CharacterTextSplitter(chunk_size=50, chunk_overlap=0)
docs = text_splitter.create_documents(content, metadatas=metadata)

Created a chunk of size 245, which is longer than the specified 50
Created a chunk of size 288, which is longer than the specified 50
Created a chunk of size 204, which is longer than the specified 50
Created a chunk of size 281, which is longer than the specified 50
Created a chunk of size 249, which is longer than the specified 50
Created a chunk of size 285, which is longer than the specified 50
Created a chunk of size 298, which is longer than the specified 50
Created a chunk of size 270, which is longer than the specified 50
Created a chunk of size 224, which is longer than the specified 50
Created a chunk of size 288, which is longer than the specified 50
Created a chunk of size 260, which is longer than the specified 50
Created a chunk of size 199, which is longer than the specified 50
Created a chunk of size 290, which is longer than the specified 50
Created a chunk of size 251, which is longer than the specified 50
Created a chunk of size 195, which is longer than the specifie

# Index Documents using ELSER - SparseVectorRetrievalStrategy()

In [12]:
from elasticsearch import Elasticsearch

url = f"https://{elastic_user}:{elastic_password}@{elastic_endpoint}:9200"
connection = Elasticsearch(url, ca_certs = "./http_ca.crt", verify_certs = True)

es = ElasticsearchStore.from_documents(
    docs,
    es_url = url,
    es_connection = connection,
    es_user=elastic_user,
    es_password=elastic_password,
    index_name=elastic_index_name,
    strategy=ElasticsearchStore.SparseVectorRetrievalStrategy()
)

In [11]:
es

<langchain.vectorstores.elasticsearch.ElasticsearchStore at 0x107ce5b50>

# Show Result

In [14]:
def showResults(output):
  print("Total results: ", len(output))
  for index in range(len(output)):
    print(output[index])

# Search

In [15]:
r = es.similarity_search("work from home policy")
showResults(r)

Total results:  4
page_content='The purpose of this full-time work-from-home policy is to provide guidelines and support for employees to conduct their work remotely, ensuring the continuity and productivity of business operations during the COVID-19 pandemic and beyond.\nScope' metadata={'summary': 'This policy outlines the guidelines for full-time remote work, including eligibility, equipment and resources, workspace requirements, communication expectations, performance expectations, time tracking and overtime, confidentiality and data security, health and well-being, and policy reviews and updates. Employees are encouraged to direct any questions or concerns', 'rolePermissions': ['demo', 'manager'], 'name': 'Work From Home Policy'}
page_content='This work-from-home policy will be reviewed periodically and updated as necessary, taking into account changes in public health guidance, business needs, and employee feedback.\nQuestions and Concerns' metadata={'summary': 'This policy outli

# RAG with Elasticsearch - Method 1 (Using Retriever)

In [17]:
retriever = es.as_retriever(search_kwargs={"k": 4})

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | ChatOpenAI() 
    | StrOutputParser()
)

chain.invoke("vacation policy")

'The vacation policy outlines the guidelines and procedures for requesting and taking time off from work for personal and leisure purposes. Full-time employees accrue vacation time at a rate of [X hours] per month, equivalent to [Y days] per year. Vacation requests must be submitted to supervisors at least [A weeks] in advance. The policy applies to all full-time and part-time employees who have completed their probationary period.'

# RAG with Elasticsearch - Method 2 (Without Retriever)

## Add Context

In [19]:
def add_context(question: str):
    r = es.similarity_search(question)
    
    context = "\n".join(x.page_content for x in r)
    
    return context

## Chain

In [20]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": RunnableLambda(add_context), "question": RunnablePassthrough()}
    | prompt
    | ChatOpenAI()
    | StrOutputParser()
)

chain.invoke("canada employees guidelines")

'Employees in Canada must follow all health and safety guidelines and procedures to maintain a safe and healthy work environment. This includes reporting any hazards or unsafe conditions to the appropriate personnel. Additionally, starting May 2022, the company will be implementing a two-day in-office work requirement per week for all eligible employees. Employees are advised to coordinate with their supervisor and HR department to schedule their in-office workdays while continuing to follow all safety protocols.'

# Compare with RAG and without RAG

In [22]:
q = input("Ask Question: ")

## Question to OpenAI

chat = ChatOpenAI()

messages = [
    HumanMessage(
        content=q
    )
]

gpt_res = chat(messages)

# Question with RAG

gpt_rag_res = chain.invoke(q)


# Responses

s = f"""
ChatGPT Response:

{gpt_res}

ChatGPT with RAG Response:

{gpt_rag_res}
"""

print(s)

Ask Question:  detailed steps for new employee onboarding



ChatGPT Response:

content="Onboarding a new employee is a crucial step in their integration into your organization. Here are some detailed steps for a comprehensive new employee onboarding process:\n\n1. Pre-boarding:\n   a. Send a welcome email: Before the employee's first day, send them a welcome email that includes information about their start date, time, location, dress code, and any documents they need to bring.\n   b. Prepare their workspace: Make sure the employee's workspace is clean, organized, and equipped with the necessary tools and technology they will need.\n\n2. First day orientation:\n   a. Meet and greet: Have someone from the HR department or the employee's supervisor greet them on their first day, introduce themselves, and give them a tour of the office.\n   b. Provide necessary paperwork: Present the employee with any paperwork they need to complete, such as tax forms, employment contracts, and company policies.\n   c. Explain policies and procedures: Go over imp