In [None]:
!pip install langchain langchain-community faiss-cpu sentence-transformers




In [None]:
import json
from langchain_core.documents import Document

with open(r"/content/documents_for_embedding.json", "r", encoding="utf-8") as f:
    data = json.load(f)

documents = [
    Document(
        page_content=item["content"],
        metadata=item["metadata"]
    )
    for item in data
]

len(documents)

117

In [None]:
from langchain_core.documents import Document

documents = []

for item in data:
    documents.append(
        Document(
            page_content=item["content"],
            metadata=item["metadata"] | {"id": item["id"]}
        )
    )

print(len(documents))

117


In [None]:
import os
os.environ["OPENAI_API_KEY"] = "OPEN_API_KEY"

In [None]:
!pip install langchain-openai
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)



In [None]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.from_documents(documents, embeddings)

In [None]:
vectorstore.save_local("compliance_faiss_index")


In [None]:
!pip install langchain langchain-openai faiss-cpu




In [None]:
import json
from langchain_core.documents import Document

with open("documents_for_embedding.json", "r", encoding="utf-8") as f:
    raw_docs = json.load(f)

documents = [
    Document(
        page_content=d["content"],
        metadata={**d["metadata"], "id": d["id"]}
    )
    for d in raw_docs
]

len(documents)

117

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_documents(
    documents,
    embedding=embeddings
)


In [None]:
vectorstore.save_local("compliance_faiss_index")


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.load_local(
    "compliance_faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)


In [None]:
docs = vectorstore.similarity_search(
    "Can a broker accept unredacted Aadhaar?",
    k=3
)

for d in docs:
    print(d.page_content)


Where Aadhaar authentication is not required, registered intermediaries shall not accept Aadhaar documents unless the Aadhaar number is appropriately redacted or blacked out by the client.
Where Aadhaar authentication is not required, an All India Financial Institution shall ensure that the customer redacts or blacks out the Aadhaar number before submission.
Registered intermediaries shall not retain or process Aadhaar numbers during digital KYC where Aadhaar authentication is not required and shall ensure masking in accordance with PML Rules.


In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.load_local(
    "compliance_faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)


In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)


In [None]:
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0
)


In [None]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template(
    """
You are a regulatory compliance assistant.
Answer ONLY using the context provided.
If the answer is not found, say: "Not found in regulations."

Context:
{context}

Question:
{question}
"""
)

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

rag_chain = (
    {
        "context": itemgetter("input") | retriever,
        "question": itemgetter("input")
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
docs = retriever.invoke("Is Aadhaar mandatory for KYC?")
len(docs)

6

In [None]:
for d in docs:
    print(d.page_content)


Aadhaar number shall not be mandatory for KYC purposes except where benefits under Section 7 of the Aadhaar Act are sought.
Registered intermediaries shall not mandate the use of Aadhaar for KYC, as its usage is purely voluntary for the client.
Registered intermediaries shall not insist on In-Person Verification where KYC has been completed using Aadhaar authentication or verified DigiLocker documents.
Where Aadhaar authentication is not required, an All India Financial Institution shall ensure that the customer redacts or blacks out the Aadhaar number before submission.
Registered intermediaries shall not accept or rely upon a PAN that has become inoperative due to non-linkage with Aadhaar for KYC compliance.
An All India Financial Institution shall not allow OTP-based e-KYC accounts to operate beyond one year without full identification.


In [None]:
!pip install langchain-openai
import os
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS

# Set the OpenAI API key (replace with your actual API key if running this cell in isolation)
os.environ["OPENAI_API_KEY"] = "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"

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.load_local(
    "compliance_faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)

docs = retriever.invoke(
    "Is Aadhaar mandatory for KYC?"
)

for d in docs:
    print(d.metadata["id"])
    print(d.page_content[:300])
    print("-"*50)


RBI_AIFI_KYC_2025_C23_EXP4
Aadhaar number shall not be mandatory for KYC purposes except where benefits under Section 7 of the Aadhaar Act are sought.
--------------------------------------------------
SEBI_KYC_MC_2023_C42_AADHAAR_VOLUNTARY
Registered intermediaries shall not mandate the use of Aadhaar for KYC, as its usage is purely voluntary for the client.
--------------------------------------------------
SEBI_KYC_MC_2023_C61_IPV_EXEMPTION
Registered intermediaries shall not insist on In-Person Verification where KYC has been completed using Aadhaar authentication or verified DigiLocker documents.
--------------------------------------------------
RBI_AIFI_KYC_2025_C23_EXP1
Where Aadhaar authentication is not required, an All India Financial Institution shall ensure that the customer redacts or blacks out the Aadhaar number before submission.
--------------------------------------------------
SEBI_KYC_MC_2023_C9_INOPERATIVE_PAN
Registered intermediaries shall not accept or rely upo

In [None]:
response = rag_chain.invoke({
    "input": "Is Aadhaar mandatory for KYC under Indian regulations?"
})

print(response)

Not found in regulations.


In [None]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a regulatory compliance assistant.

Answer the question using ONLY the regulatory clauses below.
If the answer is not present, say: "Not found in regulations."

Regulatory clauses:
{context}

Question:
{question}

Answer:
"""
)


In [None]:
docs = retriever.invoke("Is Aadhaar mandatory for KYC?")

context = "\n".join([d.page_content for d in docs])

print("DEBUG CONTEXT ↓↓↓")
print(context)


DEBUG CONTEXT ↓↓↓
Aadhaar number shall not be mandatory for KYC purposes except where benefits under Section 7 of the Aadhaar Act are sought.
Registered intermediaries shall not mandate the use of Aadhaar for KYC, as its usage is purely voluntary for the client.
Registered intermediaries shall not insist on In-Person Verification where KYC has been completed using Aadhaar authentication or verified DigiLocker documents.
Where Aadhaar authentication is not required, an All India Financial Institution shall ensure that the customer redacts or blacks out the Aadhaar number before submission.
Registered intermediaries shall not accept or rely upon a PAN that has become inoperative due to non-linkage with Aadhaar for KYC compliance.
An All India Financial Institution shall not allow OTP-based e-KYC accounts to operate beyond one year without full identification.


In [None]:
prompt = f"""
Based on the provided regulations, answer the question: "Is Aadhaar mandatory for KYC?"

- Use the provided context ONLY.
- If the context provides a conditional answer, explain those conditions.
- If the information is truly missing, say: Not found in regulations.

Context:
{context}
"""


In [None]:
response = llm.invoke(prompt)
print(response.content)


Aadhaar is not mandatory for KYC purposes except in cases where benefits under Section 7 of the Aadhaar Act are sought. This means that while Aadhaar can be used for KYC, it is not a requirement for all clients; its use is voluntary unless specific benefits tied to the Aadhaar Act are being requested.
