<a href="https://colab.research.google.com/github/joshiayush/cardmatch/blob/main/notebooks/card_seek_chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [90]:
!pip install langchain >/dev/null
!pip install langchain-community >/dev/null
!pip install langchain-openai >/dev/null
!pip install pypdf >/dev/null
!pip install chromadb >/dev/null

[0m

In [60]:
import os
import getpass

In [61]:
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LANGCHAIN_API_KEY:")
os.environ["OPENAI_API_KEY"] = getpass.getpass("OPENAI_API_KEY:")

LANGCHAIN_API_KEY:··········
OPENAI_API_KEY:··········


In [91]:
import pathlib
import re
from typing import Dict, Any

import numpy as np
import pandas as pd
from langchain.schema import Document, BaseRetriever
from langchain.document_loaders import (
    PyPDFLoader,
    TextLoader,
    UnstructuredWordDocumentLoader,
    UnstructuredEPubLoader,
    WebBaseLoader,
)
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.base import Chain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers.multi_query import MultiQueryRetriever

In [63]:
_cc_sheets = ["sbi", "axis", "hdfc"]


def load_cc_info() -> Dict:
    cc_info = dict()
    for sheet in _cc_sheets:
        df = pd.read_excel("cc_urls.xlsx", sheet_name=sheet)
        cc_info.update(df.set_index("card_name").T.to_dict())
    return cc_info

In [64]:
credit_cards_info = load_cc_info()

  cc_info.update(df.set_index("card_name").T.to_dict())
  cc_info.update(df.set_index("card_name").T.to_dict())


In [65]:
class DocumentLoaderException(Exception):
    pass

In [66]:
class EPubReader(UnstructuredEPubLoader):
    def __init__(self, file_path: str | list[str], **kwargs: Any):
        super().__init__(file_path, **kwargs, mode="elements", strategy="fast")

In [67]:
class DocumentLoader(object):
    """Loads in a document with a supported extension."""

    supported_extensions = {
        ".pdf": PyPDFLoader,
        ".txt": TextLoader,
        ".epub": EPubReader,
        ".docx": UnstructuredWordDocumentLoader,
        ".doc": UnstructuredWordDocumentLoader,
    }

In [68]:
def load_document(filepath: str) -> list[Document]:
  """Load a file and return it as a list of Document."""
  ext = pathlib.Path(filepath).suffix

  loader = DocumentLoader.supported_extensions.get(ext)
  if not loader:
    raise DocumentLoaderException(f"Unsupported file extension: {ext}")

  return loader(filepath).load()

In [92]:
def configure_retriever(docs: list[Document]) -> BaseRetriever:
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800, chunk_overlap=150, separators=["\n\n## ", "\n\n", "\n", ". "]
    )
    splits = text_splitter.split_documents(docs)

    for i, split in enumerate(splits):
        split.metadata["section_id"] = i // 10

    embeddings = OpenAIEmbeddings()
    vectordb = Chroma.from_documents(splits, embeddings)

    base_retriever = vectordb.as_retriever(
        search_type="mmr", search_kwargs={"k": 6, "fetch_k": 20, "alpha": 0.6}
    )

    return MultiQueryRetriever.from_llm(
        llm=ChatOpenAI(temperature=0), retriever=base_retriever
    )

In [70]:
def configure_chain(retriever: BaseRetriever) -> Chain:
    """Configure chain with a retriever."""
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
    return ConversationalRetrievalChain.from_llm(
        llm, retriever=retriever, memory=memory, verbose=True,
    )

In [76]:
def configure_qa_chain(cc_info: Dict) -> Chain:
    """Read documents, configure retriever, and the chain."""
    docs = []
    for key, info in cc_info.items():
        if info["card_link"] is np.nan:
            continue

        card_doc = WebBaseLoader(info["card_link"]).load()
        docs.extend(card_doc)

        tnc_doc = load_document(info["tnc"])
        docs.extend(tnc_doc)
        break

    retriever = configure_retriever(docs)
    return configure_chain(retriever)

In [93]:
qa_chain = configure_qa_chain(credit_cards_info)

In [94]:
qa_chain.run("What is the full name of the credit card?")



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
(d)  You require Internet connection, compatible telecommunications equipment and mobile phone service plan (if applicable) in order to 
register, store and use your Mobile Credit Card. 
 4.  Card Account and credit limit 
  (a) SBI Credit Card in its plastic card form and in its digital version (i.e. Mobile Credit Card) constitute one and the same Card and share the 
same Card Account. 
  (b)  All Mobile Credit Card Transactions will be billed to card statement of the SBI Credit Card. There will not be a separate statement for your 
Mobile Credit Card. 
  (c)  Your SBI Credit Card in plastic card form and your Mobile Credit Card share the credit limit of the same Card Ac

'The full name of the credit card is "SBI Card ELITE."'

In [95]:
qa_chain.run("What is the joining fee of this credit card?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the full name of the credit card?
Assistant: The full name of the credit card is "SBI Card ELITE."
Follow Up Input: What is the joining fee of this credit card?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Inside Book 9.5x9.75
40
SBI Card ELITE 4,999 4,999 (Waived off on annual spends of Rs.10 Lakh or 
more in the preceding year)
SBI Card ELITE Advantage 4,999 4,999 (Waived off on annual spends of Rs. 10 Lakh 
o

'The joining fee for the SBI Card ELITE credit card is Rs. 4,999 plus taxes.'

In [96]:
qa_chain.run("What benefits do I get in movie tickets every month?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What is the full name of the credit card?
Assistant: The full name of the credit card is "SBI Card ELITE."
Human: What is the joining fee of this credit card?
Assistant: The joining fee for the SBI Card ELITE credit card is Rs. 4,999 plus taxes.
Follow Up Input: What benefits do I get in movie tickets every month?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Process for availing the Free Movie Tickets:
a) This offering 

'With the SBI Card ELITE credit card, you are entitled to complimentary movie tickets worth Rs. 6,000 every year. Each month, you can avail a maximum discount of Rs. 250 per ticket for two tickets, provided you book at least two tickets per transaction. The total amount payable for the tickets will be reduced by Rs. 500 or the cost of two tickets, whichever is less. Note that the convenience fee is chargeable, and the offer can only be redeemed on the BookMyShow website.'