In [2]:
# following tutorial from https://python.langchain.com/docs/use_cases/chatbots/retrieval

In [21]:
from langchain.agents import Tool
from langchain.chains import RetrievalQA
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch, Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pydantic.v1 import BaseModel, Field
from PyPDF2 import PdfReader
from typing import Dict

# Credentials
import dotenv
from pathlib import Path
import os

### ingest pdf file

In [2]:
pdf_path = '../data/Employment Act 1968.pdf'

reader = PdfReader(pdf_path)
number_of_pages = len(reader.pages)

text = ''
for page_number in range(number_of_pages):
    page = reader.pages[page_number]
    text += page.extract_text()

print(text[:1000])

THE STATUTES OF THE REPUBLIC OF SINGAPORE
EMPLOYMENT ACT 1968
2020 REVISED EDITION
This revised edition incorporates all amendments up to and
including 1 December 2021 and comes into operation on 31 December 2021.
Prepared and Published by
THE LAW REVISION COMMISSION
UNDER THE AUTHORITY OF
THE REVISED EDITION OF THE LAWS ACT 1983
Informal Consolidation –version in force from 29/5/2022Employment Act 1968
ARRANGEMENT OF SECTIONS
PART 1
PRELIMINARY
Section
1. Short title
2. Interpretation3. Appointment of officers4. Rules and orders5. Minister may restrict application
6. Existing law not affected
7. [ Repealed ]
PART 2
CONTRACTS OF SERVICE
8. Illegal terms of contract of service
9. Termination of contract
10. Notice of termination of contract
11. Termination of contract without notice
12. Contractual age13. When contract deemed to be broken by employer and employee14. Dismissal15. Termination by employee threatened by danger16. Liability on breach of contract17. Contract of service not to

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.create_documents([text])

#### create subset of only 5 documents from arbitrary split above

In [4]:
five_docs = all_splits[:5]
five_docs

[Document(page_content='THE STATUTES OF THE REPUBLIC OF SINGAPORE\nEMPLOYMENT ACT 1968\n2020 REVISED EDITION\nThis revised edition incorporates all amendments up to and\nincluding 1 December 2021 and comes into operation on 31 December 2021.\nPrepared and Published by\nTHE LAW REVISION COMMISSION\nUNDER THE AUTHORITY OF\nTHE REVISED EDITION OF THE LAWS ACT 1983\nInformal Consolidation –version in force from 29/5/2022Employment Act 1968\nARRANGEMENT OF SECTIONS\nPART 1\nPRELIMINARY\nSection\n1. Short title'),
 Document(page_content='PART 1\nPRELIMINARY\nSection\n1. Short title\n2. Interpretation3. Appointment of officers4. Rules and orders5. Minister may restrict application\n6. Existing law not affected\n7. [ Repealed ]\nPART 2\nCONTRACTS OF SERVICE\n8. Illegal terms of contract of service\n9. Termination of contract\n10. Notice of termination of contract\n11. Termination of contract without notice'),
 Document(page_content='11. Termination of contract without notice\n12. Contractual a

#### Use HuggingFace Embedding model to create embeddings

In [5]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


#### create vector store using ingested documents and Huggingface embedding model

In [7]:
vectorstore = Chroma.from_documents(documents=five_docs, embedding=embeddings)

#### set the vector store as a retriever for RAG

In [8]:
# k is the number of chunks to retrieve
retriever = vectorstore.as_retriever(k=2)

In [9]:
docs = retriever.invoke("can you tell me about the employment act?")
docs

[Document(page_content='THE STATUTES OF THE REPUBLIC OF SINGAPORE\nEMPLOYMENT ACT 1968\n2020 REVISED EDITION\nThis revised edition incorporates all amendments up to and\nincluding 1 December 2021 and comes into operation on 31 December 2021.\nPrepared and Published by\nTHE LAW REVISION COMMISSION\nUNDER THE AUTHORITY OF\nTHE REVISED EDITION OF THE LAWS ACT 1983\nInformal Consolidation –version in force from 29/5/2022Employment Act 1968\nARRANGEMENT OF SECTIONS\nPART 1\nPRELIMINARY\nSection\n1. Short title'),
 Document(page_content='11. Termination of contract without notice\n12. Contractual age13. When contract deemed to be broken by employer and employee14. Dismissal15. Termination by employee threatened by danger16. Liability on breach of contract17. Contract of service not to restrict rights of employees to join,\nparticipate in or organise trade unions\n18. Change of employer18A. Transfer of employment19. Offence\nPART 3\nPAYMENT OF SALARY\n20. Fixation of salary period'),
 Documen

#### set up system template for document chain (set document store as context)

In [11]:
SYSTEM_TEMPLATE = """
Answer the user's questions based on the below context. 
If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know":

<context>
{context}
</context>
"""

#### set up llm model

In [16]:
config = dotenv.dotenv_values("../.env") # located at root
llm = ChatOpenAI(
    openai_api_key = config.get("openai_api_key"), 
    temperature=0.1, 
    max_tokens=100
)

#### test out Q&A promopt using llm 

In [17]:
question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            SYSTEM_TEMPLATE
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)
document_chain = create_stuff_documents_chain(llm, question_answering_prompt)

#### combine document chain with retriever for retriever-augmented generation by llm

In [19]:
document_chain.invoke(
    {
        "context": docs, 
        "messages":[
            HumanMessage(content="tell me about the employment laws in singapore")
        ],
    }
)

'The context provided outlines the Employment Act 1968 of Singapore, which covers various aspects of employment such as termination of contracts, payment of salary, deductions, rest days, hours of work, and other conditions of service. The Act also addresses illegal terms of contracts, termination procedures, and restrictions on applications. It is important to note that this is a specific piece of legislation and there may be other laws and regulations in Singapore that govern different aspects of employment.'

##### checking document chain WITHOUT context from our vector store (i.e. generic chatGPT response)

In [20]:
document_chain.invoke(
    {
        "context": [], 
        "messages":[
            HumanMessage(content="tell me about the employment laws in singapore")
        ],
    }
)

'Employment laws in Singapore are governed by the Ministry of Manpower (MOM) and cover various aspects such as employment contracts, working hours, leave entitlements, and workplace safety. Some key regulations include the Employment Act, which sets out the basic terms and conditions of employment, and the Work Injury Compensation Act, which provides for compensation in the event of work-related injuries or illnesses. Employers are also required to contribute to the Central Provident Fund (CPF) for their employees. It is important'

#### Combine document chain with retriever 

In [22]:
def parse_retriever_input(params: Dict):
    return params["messages"][-1].content

retrieval_chain = RunnablePassthrough.assign(
    context=parse_retriever_input | retriever,
).assign(
    answer=document_chain
)

In [23]:
retrieval_chain.invoke(
    {
        "messages": [
            HumanMessage(content="which year was the singapore employment act passed?")
        ]
    }
)

{'messages': [HumanMessage(content='which year was the singapore employment act passed?')],
 'context': [Document(page_content='THE STATUTES OF THE REPUBLIC OF SINGAPORE\nEMPLOYMENT ACT 1968\n2020 REVISED EDITION\nThis revised edition incorporates all amendments up to and\nincluding 1 December 2021 and comes into operation on 31 December 2021.\nPrepared and Published by\nTHE LAW REVISION COMMISSION\nUNDER THE AUTHORITY OF\nTHE REVISED EDITION OF THE LAWS ACT 1983\nInformal Consolidation –version in force from 29/5/2022Employment Act 1968\nARRANGEMENT OF SECTIONS\nPART 1\nPRELIMINARY\nSection\n1. Short title'),
  Document(page_content='11. Termination of contract without notice\n12. Contractual age13. When contract deemed to be broken by employer and employee14. Dismissal15. Termination by employee threatened by danger16. Liability on breach of contract17. Contract of service not to restrict rights of employees to join,\nparticipate in or organise trade unions\n18. Change of employer18A