In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings

llm = ChatGoogleGenerativeAI(model="gemini-1.0-pro")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [3]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.schema.document import Document

In [4]:
def load_documents():
    document_loader=DirectoryLoader("../data2",glob="./*.txt",loader_cls=TextLoader)
    return document_loader.load()

def split_documents(documents: list[Document]):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)

def add_to_chroma(chunks: list[Document]):
    db=Chroma.from_documents(documents=chunks,embedding=embeddings,persist_directory="Chroma")
    return db

if __name__=="__main__":
    documents=load_documents()
    chunks=split_documents(documents=documents)
    db=add_to_chroma(chunks=chunks)

In [5]:
retriever = db.as_retriever()

In [7]:
query = "what is BSE?"
docs = retriever.get_relevant_documents(query)
print(docs[0].metadata)
print(docs[0].page_content)

{'source': '..\\data2\\indian_economy.txt'}
This expanded dataset provides deeper insights into the industrial revival, key growth sectors, government actions, and broader economic trends shaping India’s economy.
