In [6]:
from langchain_community.vectorstores import FAISS
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker

embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') ##embedding model

In [7]:
##loading docs##
dir_loader = DirectoryLoader(
    path='data',
    glob='*.txt',
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
)
docs = dir_loader.load()
print(f"Number of docs loaded:{len(docs)}")
print(f"Doc Page content: {docs[0].page_content[0:100]}")

##using semantic chunker for better chunks##
sem_chunker = SemanticChunker(embedding_model)
doc_chunks = sem_chunker.split_documents(documents=docs)
print(f"Number of chunks loaded: {len(doc_chunks)}")
print(f"Chunks Page content: {doc_chunks[0].page_content[0:100]}")


Number of docs loaded:2
Doc Page content: LangChain is a framework for building applications with LLMs.
Langchain provides modular abstraction
Number of chunks loaded: 4
Chunks Page content: LangChain is a framework for building applications with LLMs. Langchain provides modular abstraction


In [15]:
## create the FAISS vectorstore ##
vector_store = FAISS.from_documents(
    documents=doc_chunks,
    embedding=embedding_model
)
dense_retriever = vector_store.as_retriever(search_kwargs={'k':3})

### create the BM25 retreiver ###
sparse_retreiver = BM25Retriever.from_documents(doc_chunks)
sparse_retreiver.k = 3

##Ensemble retreiver (weighted)
combined_retreiver = EnsembleRetriever(
    retrievers=[dense_retriever,sparse_retreiver],
    weights=[0.80,0.20]
)



In [16]:
query = 'Is Mumbai a good city to live in?'
results = combined_retreiver.invoke(query)

results

[Document(id='e282c493-36e1-4c3f-ae97-3333e9308c32', metadata={'source': 'data\\transformer_india_mumbai.txt'}, page_content='With its massive population of young professionals and engineers, India has become a hub for innovation and digital transformation. Government programs like Digital India and Make in India are accelerating technological adoption across industries. The growth of AI, fintech, and data analytics sectors reflects India’s ambition to lead in the digital era. Mumbai, India’s financial capital, is a city that never sleeps. Mumbai hosts major financial institutions, startups, and the country’s entertainment industry. Despite challenges like overcrowding and high living costs, Mumbai remains a land of opportunities and resilience. Its diverse culture, fast-paced lifestyle, and entrepreneurial spirit make Mumbai the beating heart of modern India. '),
 Document(id='504431ff-ac61-4913-9971-2ca3a849c8e1', metadata={'source': 'data\\langchain_intro.txt'}, page_content='The Ei