Load API Keys

In [None]:
from dotenv import load_dotenv
load_dotenv()

Load Contents from HLB Fixed Deposite Pages

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader([
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit.html?icp=hlb-en-all-footer-txt-fd",
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit/fixed-deposit-account/fixed-deposit-account.html", 
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit/fixed-deposit-account/e-fixed-deposit.html",
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit/fixed-deposit-account/flexi-fd.html",
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit/fixed-deposit-account/senior-savers-flexi-fd.html",
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit/fixed-deposit-account/junior-fixed-deposit.html",
  "https://www.hlb.com.my/en/personal-banking/fixed-deposit/fixed-deposit-account/foreign-fixed-deposit-account.html",
  "https://www.hlb.com.my/en/personal-banking/help-support/fees-and-charges/deposits.html",
])
data = loader.load()
data

Split the Loaded Data into Chunks

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(data)
texts

Initialize Pinecone

In [None]:
import os
import pinecone

pinecone.init(
  api_key=os.getenv("PINECONE_API_KEY"),
  environment=os.getenv("PINECONE_ENVIRONMENT")
)
index_name = "main-index"
namespace = "hlb-fixed-deposit-with-conversation-chain-gpt-3.5"
if index_name not in pinecone.list_indexes():
  pinecone.create_index(
    name=index_name,
    metric="cosine",
    dimension=1536
  )
index = pinecone.Index(index_name)
index.delete(delete_all=True, namespace=namespace)

Create Embeddings and Vectors to Insert to Pinecone Database

In [None]:
from langchain_community.vectorstores.pinecone import Pinecone
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
vector = Pinecone.from_documents(texts, embeddings, index_name=index_name, namespace=namespace)

Create LLM

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)

Creating Prompt Template

In [None]:
from langchain.prompts import PromptTemplate

prompt_template = """
  You are a helpful assistant who should reply to inputs related only to the HongLeong Bank aka HLB.
  You answer to the question: {question}
  By only basing on this context: {context}
  If the question does not specify the fixed deposit type, answer the question assuming it is about fixed deposit account.
  If you don't have enought information from the document, don't answer with your general knowledge, just say you don't have that information.
  Your answer should be detailed but if you have to do calculation, don't show your work or equations, double check your answer, just provide the final value, and you don't need to show reference.
  If the bank's name is not provided in the question, answer with the assumption that the question is about HLB.
  If the bank's name is provided and it's not HLB, don't answer that question.
  You don't need to answer if the input question you to do something creative such as writing a song or making a joke.
"""
prompt = PromptTemplate.from_template(prompt_template)

Create Retriever for Vector Search

In [None]:
retriever = vector.as_retriever(search_kwargs={"k": 15})

Create MultiQuery retriever

In [None]:
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_query_retriever = MultiQueryRetriever.from_llm(
  llm=llm,
  retriever=retriever,
  )

Logging Multi Query Retriever

In [None]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

Compressor Pipeline

In [None]:
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0, separators=". ")
embedding_filter = EmbeddingsFilter(embeddings=embeddings, k=25)
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
pipeline_compressor = DocumentCompressorPipeline(
  transformers=[splitter, redundant_filter, embedding_filter]
)

Contextual Compression

In [None]:
from langchain.retrievers import ContextualCompressionRetriever

compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=multi_query_retriever
)

Creating Memory

In [None]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(llm=llm, memory_key="chat_history", output_key="answer", k=2, return_messages=True)

Creating Chain

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain_community.callbacks.manager import get_openai_callback
 
chain = ConversationalRetrievalChain.from_llm(
  llm=llm,
  chain_type="stuff",
  retriever=retriever,
  memory=memory,
  combine_docs_chain_kwargs={"prompt": prompt},
  verbose=True,
  return_source_documents=True,
  max_tokens_limit=3000
)

def ask_question(question):
  with get_openai_callback() as cb:
    result = chain(question)
    for source_documents in result["source_documents"]:
      print(f'\n{source_documents}')
    print(f'\nDocument: {len(result["source_documents"])}')
    print(f'\nAnswer: {result["answer"]}')
    print(f'{cb}\n')

Running the Chain

In [None]:
ask_question("What is fixed deposit?")

In [None]:
ask_question("How many types of fixed deposit does HongLeong Bank provide?")

In [None]:
ask_question("What is the difference between Fixed Deposit and eFixed Deposit?")

In [None]:
ask_question("What are the interest rates for Fixed Deposit?")

In [None]:
ask_question("Lets say I want to invest RM 50,000 in Fixed Deposit for 12 months. Please calculate the total amount that I can withdraw  at the end of the term.")