In [1]:
!pip install docx==0.2.4
!pip install langchain==0.1.16
!pip install langchain_community==0.0.32
!pip install langchain_core==0.1.42
!pip install langchain_openai==0.1.3
!pip install numpy==1.23.5
!pip install pandas==2.2.2



In [2]:
from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chat_models import ChatOpenAI
import os
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma, FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate

  from tqdm.autonotebook import tqdm


In [3]:
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] 

In [4]:
# data loading
def doc_loader(file_path):
  _, file_ext = os.path.splitext(file_path)

  if file_ext.lower() == 'docx':
    txt_loader = DirectoryLoader(os.path.dirname(file_path), glob=os.path.basename(file_path), loader_cls=Docx2txtLoader)
    documents = txt_loader.load()
    print("Successfully extracted DOCX file")
    return documents
  
  elif file_ext.lower() == '.pdf':
    pdf_loader = DirectoryLoader(os.path.dirname(file_path), glob=os.path.basename(file_path), loader_cls=PyPDFLoader)
    documents = pdf_loader.load()
    print("Successfully extracted PDF file")
    return documents
  
  else:
    documents = []
    print("Unsupported file format")

documents = doc_loader("Part B - Form and Conditions of Contract_29032017.pdf")

# cleaning
def extract_and_combine_text(documents):
  combined_text = ' '.join(doc.page_content for doc in documents)
  return combined_text

def clean_text(combined_text):
  return re.sub(r'\s+', ' ', combined_text)

combined_text = extract_and_combine_text(documents)
cleaned_text = clean_text(combined_text)
type(cleaned_text)

Successfully extracted PDF file


str

In [5]:
# Used FAISS
chunk_size = 1000
chunk_overlap = 200
k = 3

# Initial
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    os.environ["OPENAI_API_KEY"] = ""
    raise EnvironmentError("OpenAI API key not found in environment variables.")

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
split = text_splitter.split_documents(documents)

# Embedding, FAISS
vectorstore = FAISS.from_documents(documents=split, embedding=OpenAIEmbeddings(api_key=api_key))

# Retriever
retriever = vectorstore.as_retriever(search_kwargs={'k': k})

# prompt
template = """
You are a well-informed assistant, tasked to answer the question below using the provided context. Include sources only if they are specific and verifiable within the provided context. If unsure about any part of the question, state explicitly that you don't know.
---------
Context:
{context}
---------
Question: {question}
---------
Guidelines:
- Answer format: 
  1. Answer in English
  2. Sources:
- If there's no resources, respond with "I don't know."
- This is important! Be concise and directly address the question.

Your have to inform response with sources:
"""
prompt = ChatPromptTemplate.from_template(template=template)


In [6]:
# llm chain, response
def get_response(question):
    llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature=0)
    # print('llm completed')

    def format_docs(docs):
        # combined found docs
        return "\n\n".join(doc.page_content for doc in docs)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    response = rag_chain.invoke(question)
    # print('chain execute')

    print(f"[HUMAN]\n{question}\n")
    print(f"[AI]\n{response}\n")
    print("===" * 20)

In [7]:
questions = ["What is the 'NOTICE OF AWARD' as defined in the document?", 
             "What is included in the 'SCOPE OF WORK' for the contractor?",
             "Define 'MECHANICAL COMPLETION' as per the document.",
             "What does the 'CERTIFICATE OF FINAL ACCEPTANCE' signify?",
             "What is included in the 'SCOPE OF WORK' for the contractor?",
             "What does 'PERFORMANCE SECURITY' refer to in the contract?",
             "What are the responsibilities of the contractor regarding taxes and duties as mentioned in the document?",
             "What does 'BOG' stand for as mentioned in the document?",
             "What is meant by 'CERTIFICATE OF INITIAL ACCEPTANCE'?",
             "Describe the 'PERFORMANCE GUARANTEES' as outlined in the document."]

for question in questions:
  get_response(question)


[HUMAN]
What is the 'NOTICE OF AWARD' as defined in the document?

[AI]
The 'NOTICE OF AWARD' as defined in the document is the notice by PTTLNG to the successful Bidder evidencing that he has been appointed by PTTLNG as CONTRACTOR (as per the provided context). (Source: PTTLNG Nong Fab LNG Receiving Terminal Project Invitation to Bid EPC C Services, Part B, Form and Conditions of Contract, Rev. 0, Mar. 2017)

[HUMAN]
What is included in the 'SCOPE OF WORK' for the contractor?

[AI]
The 'SCOPE OF WORK' for the contractor includes the contractor's scope of work as per Article 2 of the contract, which is based on the process design definitively specified and agreed upon between PTTLNG and the contractor at the award of the contract. Any modifications or improvements proposed by the contractor after the award of the contract must follow the procedure for changes in the scope of work according to Article 5. Additionally, the contractor is responsible for checking and verifying all work and

In [None]:
# [HUMAN]
# According to the contract, what is the main responsibility of the contractor?

# gpt3.5 w/ hub.pull("rlm/rag-prompt")
# [AI]
# The main responsibility of the contractor is to carry out the construction of the TERMINAL in accordance with all relevant codes, specifications, drawings, and time schedules as stipulated in the PROJECT RECORD and the CONTRACT. This includes supervision, labor, equipment, materials, and installation of temporary facilities. The contractor is also responsible for undertaking the installation, pre-commissioning, commissioning, and testing of OFE items.

# gpt3.5 w/ engineered prompt
# [AI]
# According to the contract, the main responsibility of the contractor is to carry out the construction of the TERMINAL in accordance with all relevant codes, specifications, drawings, time schedules, etc. as stipulated in the PROJECT RECORD and the CONTRACT. This includes providing supervision, labor, equipment, materials, energy, utilities, and installation of temporary facilities on the site. (Source: Contract excerpt provided)

# gpt4 w/ hub.pull("rlm/rag-prompt")
# [AI]
# The main responsibility of the contractor, according to the contract, is to carry out the construction of the TERMINAL in accordance with all relevant codes, specifications, drawings, time schedules, etc., as stipulated in the PROJECT RECORD and the CONTRACT. This includes supervision, labor, equipment, materials, and the installation of site temporary facilities. Additionally, the contractor is responsible for the installation, pre-commissioning, commissioning, and testing of OFE items, closely cooperating with the VENDORS' engineers.

# gpt4 w/ engineered prompt
# [AI]
#According to the contract, the main responsibility of the contractor is to carry out the construction of the TERMINAL in accordance with all relevant codes, specifications, drawings, time schedules, etc., as stipulated in the PROJECT RECORD and the CONTRACT. This includes supervision, labor, erection equipment and tools, consumable materials, energy and utilities, and the installation of SITE temporary facilities. Additionally, the contractor is responsible for the installation, pre-commissioning, commissioning, and testing of the WORK.