In [12]:
import os
import warnings
from dotenv import load_dotenv

os.environ['KMP_DUPLICATE_LIB_OK']="True"
warnings.filterwarnings('ignore')
load_dotenv()

True

### Document Loading

In [3]:
from langchain_community.document_loaders import PyMuPDFLoader


pdf_list = []

for root,dir,files in os.walk("data"):
    for file in files:
        if file.endswith("pdf"):
            pdf_list.append(os.path.join(root,file))

In [4]:
loader = PyMuPDFLoader(pdf_list[0])
pdf = loader.load()
pdf[0].page_content

'Risk Management Systems in Banks\nIntroduction\nBanks in the process of financial intermediation are confronted with various kinds of financial\nand non-financial risks viz., credit, interest rate, foreign exchange rate, liquidity, equity price,\ncommodity price, legal, regulatory, reputational, operational, etc. These risks are highly\ninterdependent and events that affect one area of risk can have ramifications for a range of other\nrisk categories. Thus, top management of banks should attach considerable importance to\nimprove the ability to identify, measure, monitor and control the overall level of risks\nundertaken.\nThe broad parameters of risk management function should encompass:\ni) \norganisational structure;\nii) \ncomprehensive risk measurement approach;\niii) \nrisk management policies approved by the Board which should be consistent with the\nbroader business strategies, capital strength, management expertise and overall\nwillingness to assume risk;\niv) \nguidelines an

In [5]:
docs = []

for pdf in pdf_list:
    pdf_loader = PyMuPDFLoader(pdf)
    temp = pdf_loader.load()
    docs.extend(temp)

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

chunks = text_splitter.split_documents(docs)


In [8]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-4o-mini")

len(encoding.encode(chunks[0].page_content)),len(encoding.encode(docs[0].page_content))

(186, 654)

### Document vector embeddings

In [9]:
from langchain_ollama import OllamaEmbeddings

import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [15]:
base_url= "127.0.0.1:11434"

embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url=base_url)

In [16]:
vector = embeddings.embed_query("Hello world")

index = faiss.IndexFlatL2(len(vector))
index.ntotal, index.d

(0, 768)

In [18]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [19]:
ids = vector_store.add_documents(documents=chunks)

In [20]:
ids = vector_store.add_documents(documents=chunks)

### Retreival

In [21]:
question = "What are the typical financial risks?"

docs = vector_store.search(query=question, k=5, search_type='similarity')

In [22]:
docs

[Document(id='05701300-a540-477c-ac29-80c364e0eead', metadata={'producer': 'Acrobat PDFWriter 3.02 for Windows', 'creator': 'Microsoft Word', 'creationdate': 'Wednesday, October 20, 1999 2:28:51 PM', 'source': 'data\\financial risk.pdf', 'file_path': 'data\\financial risk.pdf', 'total_pages': 25, 'format': 'PDF 1.2', 'title': 'guide-rms1.PDF', 'author': 'MAGUS', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': 'Wednesday, October 20, 1999 2:28:51 PM', 'page': 1}, page_content='3. Credit Risk\n3.1 General\n3.1.1  Lending involves a number of risks. In addition to the risks related to creditworthiness of\nthe counterparty, the banks are also exposed to interest rate, forex and country risks.\n3.1.2  Credit risk or default risk involves inability or unwillingness of a customer or counterparty\nto meet commitments in relation to lending, trading, hedging, settlement and other financial'),
 Document(id='80b0d64a-ffb8-4ff2-848c-f4239fe00039', metada

In [23]:
db_name = "financial_operations_risk_guidelines"

vector_store.save_local(db_name)

In [24]:
import os
import warnings
warnings.filterwarnings("ignore")

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

load_dotenv()

True

In [25]:
from langchain_ollama import OllamaEmbeddings

import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [41]:
base_url= "127.0.0.1:11434"
embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url=base_url)

db_name = r"financial_operations_risk_guidelines"

vector_store = FAISS.load_local(db_name, embeddings=embeddings, allow_dangerous_deserialization=True)

In [27]:
docs = vector_store.search(query=question, k=5, search_type='similarity')

In [28]:
question = "how to avoid risk in operations?"

retriever = vector_store.as_retriever(search_type = "similarity",
                                      search_kwargs ={'k':3})

retriever.invoke(question)

[Document(id='58c4fd06-2259-4403-aac4-e5a5fafbc127', metadata={'producer': 'Acrobat PDFWriter 3.02 for Windows', 'creator': 'Microsoft Word', 'creationdate': 'Wednesday, October 20, 1999 2:28:51 PM', 'source': 'data\\financial risk.pdf', 'file_path': 'data\\financial risk.pdf', 'total_pages': 25, 'format': 'PDF 1.2', 'title': 'guide-rms1.PDF', 'author': 'MAGUS', 'subject': '', 'keywords': '', 'moddate': '', 'trapped': '', 'modDate': '', 'creationDate': 'Wednesday, October 20, 1999 2:28:51 PM', 'page': 22}, page_content='23\n12.   Operational Risk\n12.1 Managing operational risk is becoming an important feature of sound risk management\npractices in modern financial markets in the wake of phenomenal increase in the volume of\ntransactions, high degree of structural changes and complex support systems.  The most\nimportant type of operational risk involves breakdowns in internal controls and corporate\ngovernance. Such breakdowns can lead to financial loss through error, fraud, or failur

### RAG with LLAMA and OLLAMA

In [29]:
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

from langchain import hub


In [32]:
# prompt = """"
# You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
# Question: {question} 
# Context: {context} 
# Answer:"""

prompt = """"
You are an assistant for question-answering tasks on Reserve Bank of India Finanacial and Operations Risk Guidelines. Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Answer in bullet points. Make sure your answer is relevant to the question and it is answered from context only.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(prompt)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='"\nYou are an assistant for question-answering tasks on Reserve Bank of India Finanacial and Operations Risk Guidelines. Use the following pieces of retrieved context to answer the question.\nIf you don\'t know the answer, just say that you don\'t know.\nAnswer in bullet points. Make sure your answer is relevant to the question and it is answered from context only.\nQuestion: {question} \nContext: {context} \nAnswer:'), additional_kwargs={})])

In [31]:
llm = ChatOllama(model="llama3.2:latest", base_url=base_url)

llm.invoke("Hi")

AIMessage(content='How can I assist you today?', additional_kwargs={}, response_metadata={'model': 'llama3.2:latest', 'created_at': '2025-02-09T06:03:32.5835001Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7548741100, 'load_duration': 6977200400, 'prompt_eval_count': 26, 'prompt_eval_duration': 369000000, 'eval_count': 8, 'eval_duration': 200000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-d1cad937-4fe6-40f7-a0f3-cd4bd5ee44ce-0', usage_metadata={'input_tokens': 26, 'output_tokens': 8, 'total_tokens': 34})

In [35]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

context = format_docs(docs)
# print(context)

In [36]:
rag_chain = (
    {'context': retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [39]:
question = "how to avoid risk in operations?"
response = rag_chain.invoke(question)

print(response)

Here are some key points on how to avoid risk in operations from the context:

• Implementing effective internal controls and corporate governance can help prevent breakdowns that lead to financial loss through error, fraud, or failure to perform.
• Developing and regularly updating policies and procedures can mitigate operational risk.
• Ensuring special attention is paid to internal control activities when engaging in new activities, developing new products, entering unfamiliar markets, or engaging in businesses that are geographically distant from the head office.
• Being transparent about material operational risks and ensuring they align with the bank's overall business strategy and appetite for risk can help avoid retaining or self-insuring such risks.


In [40]:
question = "What are some of the pitfalls we should look for in financial risk?"
response = rag_chain.invoke(question)

print(response)

Here are some pitfalls related to financial risk that can be identified:

* Transaction risk or default risk in transactions
* Portfolio risk, including intrinsic and concentration risk
* Counterparty risk from non-transactional risks (e.g. credit risk arising from non-traditional relationships)
* Deficiencies in:
	+ Loan policies/administration
	+ Prudential credit concentration limits
	+ Lending limits for Loan Officers/Credit Committees
	+ Appraisal of borrowers' financial position
	+ Excessive dependence on collaterals
	+ Inadequate risk pricing
	+ Absence of loan review mechanism and post sanction surveillance


In [46]:
vector_db = FAISS.from_documents(docs, embeddings)

In [47]:
from langchain.chains import ConversationalRetrievalChain

qa = ConversationalRetrievalChain.from_llm(llm=llm, 
                                    chain_type="stuff",\
                                    retriever=vector_db.as_retriever())

In [49]:
qa.invoke({'question':question, 'chat_history':[]})

{'question': 'What are some of the pitfalls we should look for in financial risk?',
 'chat_history': [],
 'answer': "Based on the provided context, here are some potential pitfalls to watch out for in financial risk:\n\n1. **Lack of clear risk policies**: Insufficient or unclear risk management policies can lead to inadequate controls and a lack of transparency.\n2. **Inadequate credit assessment**: Failing to properly assess borrowers' financial positions can result in lending decisions that become unprofitable or even lead to defaults.\n3. **Excessive dependence on collaterals**: Over-reliance on collateral as a means of securing loans can lead to high risks if the collateral is insufficient or becomes illiquid during market downturns.\n4. **Inadequate risk pricing**: Failing to price risks accurately can result in inadequate premiums for credit-risky transactions, leading to excessive exposure to potential losses.\n5. **Lack of loan review mechanism and post-sanction surveillance**: