In [None]:
#!pip install langchain langchain-openai langchain-community chromadb sentence-transformers beautifulsoup4 requests langchain sentence-transformers faiss-cpu euri

In [None]:
# Install dependencies if needed

import os
import requests
from bs4 import BeautifulSoup
from pathlib import Path

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Set your Euri API key
load_dotenv()

os.getenv("EURI_API_KEY")
os.getenv("EURI_BASE_URL")

llm = ChatOpenAI(model="gpt-4.1-nano", temperature=0)


In [None]:
def scrape_bom_loan_pages(start_urls):
    documents = []
    for url in start_urls:
        res = requests.get(url)
        if res.status_code == 200:
            soup = BeautifulSoup(res.text, "html.parser")
            text = soup.get_text(separator="\n", strip=True)
            documents.append(text) 
        else:
            print(f"❌ Failed to fetch {url}")
    return documents


start_urls = [
    "https://bankofmaharashtra.in/mahabank-personalloan-scheme-for-all", 
    "https://bankofmaharashtra.in/personal-banking/loans/personal-loan",  
    "https://bankofmaharashtra.in/personal-banking/loans/home-loan",        
    "https://bankofmaharashtra.in/msme-large-credit",  
    "https://bankofmaharashtra.in/retail-loans", 
    "https://bankofmaharashtra.in/model-education-loan-scheme", 
    "https://bankofmaharashtra.in/collateral-free-term-loan-facility", 
    "https://bankofmaharashtra.in/maha-super-housing-loan-scheme-for-construction-acquiring",  
    "https://bankofmaharashtra.in/maha-super-flexi-housing-loan-scheme",    
    "https://bankofmaharashtra.in/maha-super-housing-loan-scheme-for-purchase-plot-construction-thereon", 
    "https://bankofmaharashtra.in/pradhan-mantri-mudra-yojana", 
    "https://bankofmaharashtra.in/personal-loan-for-salaried-customers", 
    "https://bankofmaharashtra.in/loan-scheme-for-corporates",                                        
    "https://bankofmaharashtra.in/mutual-credit-guarantee-scheme"     
]

raw_docs = scrape_bom_loan_pages(start_urls)
print(f"✅ Scraped {len(raw_docs)} pages")


✅ Scraped 14 pages


In [25]:
import re
from pathlib import Path
from langchain_community.document_loaders import TextLoader

cleaned_docs = []
for doc in raw_docs:
    text = doc

    text = re.sub(r'\n\s*\n+', '\n\n', text)

    text = re.sub(r'[ \t]+', ' ', text)

    text = text.replace("“", '"').replace("”", '"').replace("’", "'")
    text = text.replace("–", "-").replace("—", "-")

    text = text.strip()

    patterns_to_remove = [
        r"©.*Bank of Maharashtra.*",  
        r"Follow us on.*",             
        r"Back to Top.*"
    ]
    for pat in patterns_to_remove:
        text = re.sub(pat, "", text, flags=re.IGNORECASE)

    if text:
        cleaned_docs.append(text)

print(f"✅ Cleaned {len(cleaned_docs)} documents (from {len(raw_docs)})")

raw_docs = cleaned_docs


✅ Cleaned 14 documents (from 14)


In [26]:
import pickle
import numpy as np

data_dir = Path("bom_data")
data_dir.mkdir(exist_ok=True)

all_docs = []
for idx, text in enumerate(raw_docs):
    file_path = data_dir / f"page_{idx}.txt"
    file_path.write_text(text, encoding="utf-8")
    loader = TextLoader(str(file_path), encoding="utf-8")
    docs = loader.load()
    all_docs.extend(docs)

print(f"✅ Loaded {len(all_docs)} docs")

✅ Loaded 14 docs


In [28]:
from langchain.vectorstores import FAISS

# Chunking
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50,
    separators=["\n\n", "\n", ". "]
)
splits = text_splitter.split_documents(all_docs)
print(f"✅ Created {len(splits)} chunks")

# Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# FAISS vectorstore
vectorstore = FAISS.from_documents(
    splits,
    embedding_model
)
print("✅ FAISS index created")

# Save FAISS index and metadata
vectorstore.save_local("faiss_index_bom")
print("✅ FAISS index saved locally")

✅ Created 715 chunks
✅ FAISS index created
✅ FAISS index saved locally


In [30]:
llm = ChatOpenAI(temperature=0, model="gpt-4.1-nano")
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k":4})
)


In [31]:
questions = [
    "What are the interest rates for a Bank of Maharashtra home loan?",
    "What is the maximum tenure for a personal loan if my salary account is with the bank?",
    "Tell me about the Maha Super Flexi Housing Loan Scheme.", 
    "Are there any processing fee concessions for women or defence personnel on home loans?"
]

for q in questions:
    print("Q:", q)
    print("A:", qa_chain.run(q))
    print("-" * 50)


Q: What are the interest rates for a Bank of Maharashtra home loan?
A: The interest rate for a Bank of Maharashtra home loan is 7.35% per annum.
--------------------------------------------------
Q: What is the maximum tenure for a personal loan if my salary account is with the bank?
A: The maximum tenure for a personal loan if your salary account is with the bank is up to 30 years or until the borrower reaches the age of 75 years, whichever is earlier.
--------------------------------------------------
Q: Tell me about the Maha Super Flexi Housing Loan Scheme.
A: The Maha Super Flexi Housing Loan Scheme is a term loan linked with a savings account offered by the Bank of Maharashtra. It provides liquidity and interest relief for individuals looking to finance the purchase or construction of a new or existing house/flat, as well as for the extension of an existing house/flat. The scheme is suitable for various purposes including the purchase of a plot and construction thereon or for con