In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
#from get_embedding_function import get_embedding_function
from langchain.vectorstores.chroma import Chroma


# scripts starts

In [2]:
CHROMA_PATH = "chroma"
DATA_PATH='card_reviews.csv'
import warnings
warnings.filterwarnings("ignore")


In [3]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.embeddings.bedrock import BedrockEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings


def get_embedding_function():
    #embeddings = OllamaEmbeddings(model="nomic-embed-text")
    embeddings = HuggingFaceEmbeddings()
    #embeddings = BedrockEmbeddings(
    #    credentials_profile_name="default", region_name="us-east-1"
    #)
    # embeddings = OllamaEmbeddings(model="nomic-embed-text")
    return embeddings

In [4]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
def load_documents(FILE_PATH):
    loader = CSVLoader(file_path=FILE_PATH,encoding='utf-8',csv_args={
    'delimiter': ';',})
    #document_loader = CSVLoader(DATA_PATH)
    return loader.load()

In [5]:
from langchain.schema.document import Document

def split_documents(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)



In [6]:

def calculate_chunk_ids(chunks):

    last_row_id = None
    current_chunk_index = 0

    for chunk in chunks:
        source = chunk.metadata.get("source")
        row = chunk.metadata.get("row")
        current_row_id = f"{source}:{row}"

        # If the row ID is the same as the last one, increment the index.
        if current_row_id == last_row_id:
            current_chunk_index += 1
        else:
            current_chunk_index = 0

        # Calculate the chunk ID.
        chunk_id = f"{current_row_id}:{current_chunk_index}"
        last_row_id = current_row_id

        # Add it to the row meta-data.
        chunk.metadata["id"] = chunk_id

    return chunks


def clear_database():
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)


In [7]:
def split_into_batches(documents, batch_size):
    for i in range(0, len(documents), batch_size):
        yield documents[i:i + batch_size]


In [8]:

def add_to_chroma(chunks: list[Document]):
    # Load the existing database.
    db = Chroma(
        persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()
    )

    # Calculate row IDs.
    chunks_with_ids = calculate_chunk_ids(chunks)

    # Add or Update the documents.
    existing_items = db.get(include=[])  # IDs are always included by default
    existing_ids = set(existing_items["ids"])
    print(f"Number of existing documents in DB: {len(existing_ids)}")

    # Only add documents that don't exist in the DB.
    new_chunks = []
    for chunk in chunks_with_ids:
        if chunk.metadata["id"] not in existing_ids:
            new_chunks.append(chunk)

    if len(new_chunks):
        print(f" Adding new documents: {len(new_chunks)}")
        #new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
        max_batch_size = 160
        for batch in split_into_batches(new_chunks, max_batch_size):
            new_chunk_ids = [chunk.metadata["id"] for chunk in batch]
            db.add_documents(batch, ids=new_chunk_ids)
            db.persist()
        #db.add_documents(new_chunks, ids=new_chunk_ids)
        #db.persist()
    else:
        print(" No new documents to add")

In [9]:

documents = load_documents(DATA_PATH)
chunks = split_documents(documents)
add_to_chroma(chunks)

Number of existing documents in DB: 0
ðŸ‘‰ Adding new documents: 99


In [10]:
from langchain.vectorstores.chroma import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama


CHROMA_PATH = "chroma"

PROMPT_TEMPLATE = """
Here is the similar customers from different companies provided card reviews. YOu are card advisor expert. Suggest cards based only on the customer reviews:

{context}

---

There is a new customer that want to get new card. here is what customer expects from her/his card: {question}
"""

def query_rag(query_text: str):
    # Prepare the DB.
    embedding_function = get_embedding_function()
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    # Search the DB.
    results = db.similarity_search_with_score(query_text, k=5)

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)
    # print(prompt)

    model = Ollama(model="llama3")
    response_text = model.invoke(prompt)

    sources = [doc.metadata.get("id", None) for doc, _score in results]
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    print(formatted_response)
    #return formatted_response

In [11]:

query_rag('I want a card with low interest rate and high cashback. Also I travel a lot so I want a card with travel rewards.')

Response: A new customer looking for a new card! Based on the reviews provided, here are some card recommendations that fit their expectations:

1. **Low Interest Rate:** SBI ELITE (Foreign Exchange Markup @ 1.99% is a big advantage) seems to have a competitive interest rate, which would be suitable for this customer.

2. **High Cashback:** ICICI RUBYX MASTERCARD has been praised for its good credit limit and offers not applicable in real-time. Although the review doesn't specifically mention cashback, it's possible that the card may offer some form of rewards or incentives.

3. **Travel Rewards:** HDFC INFINIA was considered by a customer looking for a premium feel with travel acceptance (Master/Visa). This card might be suitable for someone who travels frequently and wants a card that can keep up with their needs.

Considering these factors, I would recommend the following cards:

* SBI ELITE: For its low interest rate and potential international usage.
* HDFC INFINIA: For its premiu

In [12]:
query_rag('I am totally foodie. I would like to have a credit card that has offers or discounts for restaurants.')

Response: As a card advisor expert, I'll analyze the reviews and suggest cards based on the customer's expectations.

From the reviews provided:

* HDFC JET PRIVILEGE is not known for its dining offers (only instant loans and good credit limit).
* Kotak Pvr Gold has great dining and entertainment category rewards, but limited to only 10% of value spent, up to a maximum of â‚¹600 per month.
* AXIS MILES AND MORE cards have varying opinions on the credit limit, interest rate, and cashback offers. However, none specifically mention restaurant-related benefits.

Considering the customer's expectation: "I am totally foodie. I would like to have a credit card that has offers or discounts for restaurants."

Based on this, I would recommend Kotak Pvr Gold as it provides 10% of value spent on Dining and Entertainment categories, which aligns with the customer's interest.

Please note that these recommendations are based solely on the provided reviews and may not include all aspects of a credit 