<a href="https://colab.research.google.com/github/joshiayush/cardmatch/blob/main/notebooks/card_seek_chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install langchain >/dev/null
!pip install langchain-community >/dev/null
!pip install langchain-openai >/dev/null
!pip install pypdf >/dev/null
!pip install chromadb >/dev/null

In [1]:
import os
import getpass

In [2]:
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LANGCHAIN_API_KEY:")
os.environ["OPENAI_API_KEY"] = getpass.getpass("OPENAI_API_KEY:")

LANGCHAIN_API_KEY:··········
OPENAI_API_KEY:··········


In [16]:
import pathlib
import re
from typing import Dict, Any, Optional

import numpy as np
import pandas as pd
from langchain.schema import Document, BaseRetriever
from langchain.document_loaders import (
    PyPDFLoader,
    TextLoader,
    UnstructuredWordDocumentLoader,
    UnstructuredEPubLoader,
    WebBaseLoader,
)
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.base import Chain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers.document_compressors.embeddings_filter import EmbeddingsFilter

In [4]:
_cc_sheets = ["sbi", "axis", "hdfc"]


def load_cc_info() -> Dict:
    cc_info = dict()
    for sheet in _cc_sheets:
        df = pd.read_excel("cc_urls.xlsx", sheet_name=sheet)
        cc_info.update(df.set_index("card_name").T.to_dict())
    return cc_info

In [5]:
credit_cards_info = load_cc_info()

  cc_info.update(df.set_index("card_name").T.to_dict())
  cc_info.update(df.set_index("card_name").T.to_dict())


In [6]:
class DocumentLoaderException(Exception):
    pass

In [11]:
class EPubReader(UnstructuredEPubLoader):
    def __init__(self, file_path: str | list[str], **kwargs: Any):
        super().__init__(file_path, **kwargs, mode="elements", strategy="fast")

In [12]:
class DocumentLoader(object):
    """Loads in a document with a supported extension."""

    supported_extensions = {
        ".pdf": PyPDFLoader,
        ".txt": TextLoader,
        ".epub": EPubReader,
        ".docx": UnstructuredWordDocumentLoader,
        ".doc": UnstructuredWordDocumentLoader,
    }

In [13]:
def load_document(filepath: str) -> list[Document]:
  """Load a file and return it as a list of Document."""
  ext = pathlib.Path(filepath).suffix

  loader = DocumentLoader.supported_extensions.get(ext)
  if not loader:
    raise DocumentLoaderException(f"Unsupported file extension: {ext}")

  return loader(filepath).load()

In [25]:
def configure_retriever(
    docs: list[Document], use_compression: Optional[bool] = False
) -> BaseRetriever:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800, chunk_overlap=200, separators=["\n\n## ", "\n\n", "\n", ". "]
    )
    splits = text_splitter.split_documents(docs)

    for i, split in enumerate(splits):
        split.metadata["section_id"] = i // 10

    embeddings = OpenAIEmbeddings()
    vectordb = Chroma.from_documents(splits, embeddings)

    base_retriever = vectordb.as_retriever(
        search_type="mmr", search_kwargs={"k": 6, "fetch_k": 20, "alpha": 0.6}
    )
    retriever = MultiQueryRetriever.from_llm(
        llm=ChatOpenAI(temperature=0), retriever=base_retriever
    )
    if use_compression:
        embeddings_filter = EmbeddingsFilter(
            embeddings=embeddings, similarity_threshold=0.76
        )
        return ContextualCompressionRetriever(
            base_compressor=embeddings_filter, base_retriever=retriever
        )
    return retriever

In [18]:
def configure_chain(retriever: BaseRetriever) -> Chain:
    """Configure chain with a retriever."""
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
    return ConversationalRetrievalChain.from_llm(
        llm, retriever=retriever, memory=memory, verbose=True,
    )

In [22]:
def configure_qa_chain(cc_info: Dict) -> Chain:
    """Read documents, configure retriever, and the chain."""
    docs = []
    for key, info in cc_info.items():
        if info["card_link"] is np.nan:
            continue

        card_doc = WebBaseLoader(info["card_link"]).load()
        docs.extend(card_doc)

        tnc_doc = load_document(info["tnc"])
        docs.extend(tnc_doc)
        break

    retriever = configure_retriever(docs, use_compression=True)
    return configure_chain(retriever)

In [26]:
qa_chain = configure_qa_chain(credit_cards_info)

In [27]:
qa_chain.run("What is the full name of the credit card?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
GET FAMILIAR WITH YOUR SBI CARD
Inside Book 9.5x9.75
6
VALID
THRU
MONTH / YEAR
ELITE Master Card Aw_Rose Gold_85x54mm_Front
ON THE FACE OF YOUR CARD, YOU WILL FIND
A. Your Name
Please check that your name appears correctly. In case it does not, please call the SBI Card 
Helpline to make the necessary corrections.
B. Your individual 16 digit card number
C. Valid Thru
This is the date after which your card needs to be renewed. Your card is valid from the day you 
receive it up to and including the last day of the month indicated on your card.
D. The VISA/MasterCard logo
The hologram and the logo ensure that all establishments throughout the world
displaying the VISA/MasterC

'The full name of the credit card is "ELITE Master Card Aw_Rose Gold."'

In [28]:
qa_chain.run("What is the joining fee of this credit card?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the full name of the credit card?
Assistant: The full name of the credit card is "ELITE Master Card Aw_Rose Gold."
Follow Up Input: What is the joining fee of this credit card?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Back
Milestone Privileges   
Earn 15,000 bonus Reward Points on achieving annual spends of Rs. 5 lakhs and 8 lakhs
Spend Based Reversal of Annual Fee on spends of Rs. 10 Lakhs






ELITE Rewar

'The joining fee for the SBI Card ELITE is Rs. 4,999 plus taxes.'

In [29]:
qa_chain.run("What benefits do I get in movie tickets every month?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the full name of the credit card?
Assistant: The full name of the credit card is "ELITE Master Card Aw_Rose Gold."
Human: What is the joining fee of this credit card?
Assistant: The joining fee for the SBI Card ELITE is Rs. 4,999 plus taxes.
Follow Up Input: What benefits do I get in movie tickets every month?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
d) Customer is required to log on to www.bookmyshow.com we

'With the SBI Card ELITE, you can enjoy complimentary movie tickets worth Rs. 6,000 every year. The transaction is valid for at least 2 tickets per booking per month, with a maximum discount of Rs. 250 per ticket for 2 tickets only. However, please note that the convenience fee would still be chargeable.'