In [46]:
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WikipediaLoader,TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate,SystemMessagePromptTemplate,HumanMessagePromptTemplate
from langchain_core.messages import HumanMessage,SystemMessage,AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap,RunnableLambda
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from pydantic import BaseModel,Field
from dotenv import load_dotenv
load_dotenv()

llm_model = ChatGoogleGenerativeAI(model = 'gemini-2.0-flash',max_retries=2)
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') ##embedding model

In [8]:
## Load document and textsplitter ##
loader = WikipediaLoader(
    query='Mumbai',
    lang='en',
    load_max_docs=10
)
docs = loader.load()
print(f"Number of docs loaded:{len(docs)}")
print(f"Document content preview:\n{docs[0].page_content[0:200]}")

Number of docs loaded:10
Document content preview:
Mumbai ( muum-BY; Marathi: Mumbaī, pronounced [ˈmumbəi] ), also known as Bombay ( bom-BAY; its official name until 1995), is the capital city of the Indian state of Maharashtra. Mumbai is the financia


In [19]:
## text splitter / chunking ##
chunker = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50,
    separators=["\n"," "]
)
doc_chunks = chunker.split_documents(docs)
print(f"Number of chunks loaded: {len(doc_chunks)}")
print(f"Chunk page content:\n",doc_chunks[10].metadata)
print(f"Chunk page content:\n",doc_chunks[10].page_content)

Number of chunks loaded: 120
Chunk page content:
 {'title': 'Mumbai', 'summary': "Mumbai ( muum-BY; Marathi: Mumbaī, pronounced [ˈmumbəi] ), also known as Bombay ( bom-BAY; its official name until 1995), is the capital city of the Indian state of Maharashtra. Mumbai is the financial capital and the most populous city proper of India with an estimated population of 12.5 million (1.25 crore). Mumbai is the centre of the Mumbai Metropolitan Region, which is among the most populous metropolitan areas in the world with a population of over 23 million (2.3 crore). Mumbai lies on the Konkan coast on the west coast of India and has a deep natural harbour. In 2008, Mumbai was named an alpha world city. Mumbai has the highest number of billionaires out of any city in Asia.\nThe seven islands that constitute Mumbai were earlier home to communities of Marathi language-speaking Koli people. For centuries, the seven islands of Bombay were under the control of successive indigenous rulers before bein

In [44]:
### prompt templates ##
system_prompt_template = SystemMessagePromptTemplate.from_template(
    '''You are an intelligent assistant, which understands the human query nuances.
 Answer the human query in about 200 words, make it as descriptive as possible.'''
)

human_prompt_template = HumanMessagePromptTemplate.from_template(
    '''This is the human query : {query}'''
)
chat_prompt = ChatPromptTemplate.from_messages(
    [system_prompt_template,human_prompt_template]
)

In [28]:
## chromaDB ##
persist_dir = './chromadb'
vector_store = Chroma.from_documents(
    documents=doc_chunks,
    embedding=embedding_model,
    collection_name='hyde',
    persist_directory=persist_dir
)
print(f"Vector store created with {vector_store._collection.count()} vectors")

## vectore store as retriever ##
retriever = vector_store.as_retriever(search_type='mmr',
                                      search_kwargs={'k':3})

Vector store created with 240 vectors


In [51]:
## pydantic for grounding ##
class Response_structure(BaseModel):
    response:str =  Field(...,description="Answer the human query in about 200 words")

query = 'Why is Mumbai the Financial capital?'
format_message = chat_prompt.format_messages(query=query)
hyde_chain = chat_prompt | llm_model.with_structured_output(Response_structure)
hyde_chain.invoke(query).response

"Mumbai is considered the financial capital of India due to a confluence of historical, economic, and infrastructural factors. Historically, Mumbai's prominence as a major port city during the British Raj facilitated trade and commerce, attracting businesses and financial institutions. This legacy continues to shape its economic landscape. Economically, Mumbai boasts the headquarters of major Indian and multinational banks, financial institutions, and insurance companies. The presence of the Bombay Stock Exchange (BSE), one of Asia's oldest and largest stock exchanges, further solidifies its financial standing. Moreover, Mumbai's robust infrastructure, including its international airport, railway network, and port facilities, supports the seamless flow of capital and goods. The city's large and skilled workforce, coupled with its entrepreneurial spirit, fosters a dynamic business environment. Additionally, Mumbai's concentration of regulatory bodies, such as the Reserve Bank of India (

In [58]:
##### RAG PIPELINE #####
rag_instruction_prompt = ChatPromptTemplate.from_template(
'''
You are an intelligent assistant, based on following  retrieved context documents,
answer the question in a concise and streamlined manner in about 4-5 sentences.

Context : {context},
question : {query}
'''
)

rag_pipeline = (
    RunnableMap({
        "query": lambda x : x['input'],
        'context': lambda x : retriever.invoke(hyde_chain.invoke(x['input']).response)
    }) | rag_instruction_prompt | llm_model | StrOutputParser()
)
query = {'input':'Why is Mumbai the Financial capital?'}
rag_pipeline.invoke(query)

"Mumbai is the financial capital of India due to its significant contributions to the country's economy. It generates 6.16% of India's GDP and accounts for 25% of the nation's industrial output. The city handles 70% of maritime trade in India and 70% of capital transactions to India's economy. Mumbai houses important financial institutions, corporate headquarters, and the Bombay Stock Exchange, solidifying its status as a major center for commerce and finance."