# load documents

In [1]:
import json
from langchain_core.documents import Document

In [2]:
with open("../data/scrappedFQA.json","r") as f:
    data = json.load(f)

In [3]:
data["data"][0]

{'query': 'Why Re-KYC is required?',
 'solution': 'KYC (Know Your Customer) details are updated while opening the Account.\nBased on RBI guidelines, Bank may ask for Re-KYC at specific intervals to keep the records of the Bank updated. If any of your personal or contact information has changed, they will updated in Bank Records through Re-KYC process.',
 'url': 'https://www.kotak.com/en/help-center/bank-account/kyc-and-re-kyc/what-is-re-kyc-and--why-is-it-needed-.html'}

In [4]:
documents = []

In [5]:
for doc in data["data"]:
    document =Document(
        page_content=f"{doc['query']}/n/n{doc['solution']}",
        metadata={"source":doc["url"]}
                     )
    documents.append(document)

In [6]:
len(documents)

1327

# load embedding model

In [7]:
! ollama list

NAME                       ID              SIZE      MODIFIED    
sqlcoder:latest            77ac14348387    4.1 GB    7 days ago     
nomic-embed-text:latest    0a109f422b47    274 MB    7 days ago     
llama3.2:3b                a80c4f17acd5    2.0 GB    5 weeks ago    


In [8]:
import ollama
embedd = ollama.embeddings(model='nomic-embed-text', prompt='The sky is blue because of rayleigh scattering')

In [9]:
#embedd.keys()

AttributeError: 'EmbeddingsResponse' object has no attribute 'keys'

In [10]:
len(embedd["embedding"])

768

# initialize  vector database

In [11]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="nomic-embed-text:latest",
)

In [12]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [13]:
MONGODB_ATLAS_CLUSTER_URI = os.getenv("MONGODB_ATLAS_CLUSTER_URI")

In [14]:
MONGODB_ATLAS_CLUSTER_URI

'mongodb+srv://spynom:QXyWRGovSLDdelpz@cluster0.ng1p2.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0'

In [16]:
from langchain_mongodb import MongoDBAtlasVectorSearch
from pymongo import MongoClient

# initialize MongoDB python client
client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)

DB_NAME = "langchain_db"
COLLECTION_NAME = "langchain_vectorstores"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "langchain-index-vectorstores"

MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch(
    collection=MONGODB_COLLECTION,
    embedding=embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
    relevance_score_fn="cosine",
)

# Create vector search index on the collection
# Since we are using the default OpenAI embedding model (ada-v2) we need to specify the dimensions as 1536
#vector_store.create_vector_search_index(dimensions=768)

In [16]:
#vector_store.add_documents(documents=documents)

['675fc587703b34d883338a93',
 '675fc587703b34d883338a94',
 '675fc587703b34d883338a95',
 '675fc587703b34d883338a96',
 '675fc587703b34d883338a97',
 '675fc587703b34d883338a98',
 '675fc587703b34d883338a99',
 '675fc587703b34d883338a9a',
 '675fc587703b34d883338a9b',
 '675fc587703b34d883338a9c',
 '675fc587703b34d883338a9d',
 '675fc587703b34d883338a9e',
 '675fc587703b34d883338a9f',
 '675fc587703b34d883338aa0',
 '675fc587703b34d883338aa1',
 '675fc587703b34d883338aa2',
 '675fc587703b34d883338aa3',
 '675fc587703b34d883338aa4',
 '675fc587703b34d883338aa5',
 '675fc587703b34d883338aa6',
 '675fc587703b34d883338aa7',
 '675fc587703b34d883338aa8',
 '675fc587703b34d883338aa9',
 '675fc587703b34d883338aaa',
 '675fc587703b34d883338aab',
 '675fc587703b34d883338aac',
 '675fc587703b34d883338aad',
 '675fc587703b34d883338aae',
 '675fc587703b34d883338aaf',
 '675fc587703b34d883338ab0',
 '675fc587703b34d883338ab1',
 '675fc587703b34d883338ab2',
 '675fc587703b34d883338ab3',
 '675fc587703b34d883338ab4',
 '675fc587703b

In [17]:
vectorstore_retriever = vector_store.as_retriever(search_kwargs={"k": 3})

In [18]:
vectorstore_retriever.invoke("what is video kyc?")

[Document(metadata={'_id': '675fc587703b34d883338bee', 'source': 'https://www.kotak.com/en/help-center/811-account/video-kyc/video-kyc-features-and-eligibity/q1.html'}, page_content='What is video KYC?/n/nVideo KYC is a start-to-end journey that can be opted for to complete Full KYC for your Kotak811 account through online video verification. You will get a full-fledged Kotak811 account after your Video KYC is done.\nHere are some advantages of choosing Video KYC and getting your Full KYC Account:\n\xa0\nGet the full-fledged award-winning Kotak811 experience\nEnjoy unlimited banking with zero paperwork\nUnlimited account validity\nUnrestricted deposits & spends'),
 Document(metadata={'_id': '675fc587703b34d883338bf5', 'source': 'https://www.kotak.com/en/help-center/811-account/video-kyc/video-kyc-features-and-eligibity/q3.html'}, page_content='What are the advantages of video KYC?/n/nVideo KYC process can be carried out from the comfort of your home. All you need is a good internet con

In [19]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.2:3b",
    temperature=0,
    # other params...
)

In [20]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [21]:
contextualize_q_system_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, vectorstore_retriever, contextualize_q_prompt
)

In [22]:



system_prompt = (
    "You are a bank customer care bot who provide "
    "who provide solution to customer input"
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [23]:
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [25]:
response = rag_chain.invoke({"input": "what is video kyc?","chat_history":[],"context":history_aware_retriever})

In [26]:
response["answer"]

'Video KYC (Know Your Customer) is a digital verification process that allows you to complete the full Know Your Customer requirements for your Kotak811 account through online video verification, from the comfort of your home with good internet connection and camera quality.'

In [27]:
from langchain_core.messages import HumanMessage, AIMessage
chat_history =[
    HumanMessage(content="what is video kyc?"),
    AIMessage(content=f"{response['answer']}"),
]

In [28]:
response = rag_chain.invoke({"input": "how to do it?","chat_history":chat_history,"context":history_aware_retriever})

In [29]:
response["answer"]

"To do Video KYC, follow these steps:\n\n1. Fill up your details online.\n2. The Kotak811 officer will carry out basic checks through video interaction.\n3. Keep your original PAN card, a blank paper, and a blue/black pen handy.\n\nAlternatively, if you're unable to complete Video KYC due to slow internet or poor video quality, you can:\n\n* Book an appointment with a Kotak811 officer using the mobile banking app\n* Visit your nearest Kotak Mahindra branch for face-to-face verification."