In [1]:
## imports ##
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
from typing import List
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
## document loaders ##
loader = DirectoryLoader(
    path='data',
    glob="*.txt",
    loader_cls=TextLoader,
    loader_kwargs={'encoding':'utf-8'}
)
documents = loader.load()
print(f"Number of documents loaded : {len(documents)}")
for index,doc in enumerate(documents):
    print(f"Metadata of the document : {doc.metadata}")
    print(f"Document content:\n{doc.page_content[0:100]}")
    print("-------------------------------")

Number of documents loaded : 3
Metadata of the document : {'source': 'data\\chatgpt_conversational_ai.txt'}
Document content:
ChatGPT and Conversational AI

ChatGPT is one of the most widely known conversational AI models deve
-------------------------------
Metadata of the document : {'source': 'data\\google_gemini_multimodal_ai.txt'}
Document content:
Google Gemini and the Rise of Multimodal AI

Google Gemini represents a new generation of large-scal
-------------------------------
Metadata of the document : {'source': 'data\\machine_learning_fundamentals.txt'}
Document content:
Machine Learning Fundamentals

Machine Learning (ML) is a branch of artificial intelligence focused 
-------------------------------


In [4]:
## document splitters ##
text_splitter = RecursiveCharacterTextSplitter(

    chunk_size = 500, ##max size of one chunk
    chunk_overlap = 50, ##chunk overlap character count
    length_function=len,
    separators=[" "]
)
chunks = text_splitter.split_documents(documents)
print(f"Number of chunks created : {len(chunks)}")
print(f"Content in chunk : {chunks[6].page_content[0:100]}")
print(f"Metadata in a chunk:{chunks[6].metadata}")


Number of chunks created : 12
Content in chunk : architecture but extends it with multimodal embeddings. These embeddings allow the model to represen
Metadata in a chunk:{'source': 'data\\google_gemini_multimodal_ai.txt'}


In [5]:
### chromadb vector store ###
persist_dir = "./chroma_db"
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') ##embedding model
vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    persist_directory=persist_dir,
    collection_name='rag_collection'
)
print(f"Vector store created with {vector_store._collection.count()} vectors")


Vector store created with 24 vectors


In [7]:
## testing the similarity search##

query = 'What capabilities does Google gemini have?'
sim_chunks = vector_store.similarity_search(query,k=3)
for index,chnk in enumerate(sim_chunks):
    print(f"chunk number-{index+1}")
    print(f"chunk metadata:\n{chnk.metadata}")
    print(f"Data in chunk:\n{chnk.page_content}")
    print("----------------------------")

chunk number-1
chunk metadata:
{'source': 'data\\google_gemini_multimodal_ai.txt'}
Data in chunk:
to systems capable of general intelligence. Gemini’s release positions Google as a key competitor to OpenAI’s GPT-4 and Anthropic’s Claude models. As multimodal AI continues to mature, we can expect breakthroughs in robotics, education, and accessibility—where machines can truly “see,” “hear,” and “understand” the world like humans do.
----------------------------
chunk number-2
chunk metadata:
{'source': 'data\\google_gemini_multimodal_ai.txt'}
Data in chunk:
to systems capable of general intelligence. Gemini’s release positions Google as a key competitor to OpenAI’s GPT-4 and Anthropic’s Claude models. As multimodal AI continues to mature, we can expect breakthroughs in robotics, education, and accessibility—where machines can truly “see,” “hear,” and “understand” the world like humans do.
----------------------------
chunk number-3
chunk metadata:
{'source': 'data\\google_gemini_multimo

### RAG PIPELINE 

In [18]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
llm_model = ChatGoogleGenerativeAI(model = 'gemini-2.0-flash',max_retries=2)

In [None]:
## create vector store retreiver ##
retriever = vector_store.as_retriever(
    search_kwargs = {'k':3} ##top3 similar chunks will be retrieved
)
## retriever acts as the frontend of the vector store or a link for the vector store ##

## system prompt ##
system_prompt = '''You are an intelligent assisstant, for question-answering tasks,
Use the retrieved context given below to answer the question in a more streamlined way.
Answer in only 3 sentences

Context : {context}
'''
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}")
    ]
)

In [25]:
document_chain = create_stuff_documents_chain(llm_model,prompt)
rag_pipeline = create_retrieval_chain(retriever,document_chain) ##linking the retreiver to the augmentation part##
rag_pipeline

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000027F38C14830>, search_kwargs={'k': 3}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='You are an intelligent assisstant, for question-answering tasks,\nUse the retrieved context given below to answer the questio

In [27]:
query = 'What are the capabilities of Chatgpt?'
response = rag_pipeline.invoke({'input':query})
print(f"Input Question: {query}")
print(f"Number of chunks retrieved: {len(response['context'])}")
print(f"Response:{response['answer']}")

Input Question: What are the capabilities of Chatgpt?
Number of chunks retrieved: 3
Response:ChatGPT is a conversational AI system designed to mimic human dialogue. It can answer questions, draft emails, and write code. Its strength lies in its ability to generalize from patterns in data, enabling it to handle a wide variety of topics without task-specific training.


### USING LCEL

Query -> Embeddings -> VectorStore -> Context ->Augmentation ->LLM Generation -> Response

In [30]:
def format_docs(docs):
    '''Creating a context from chunks'''
    return "\n\n".join(doc.page_content for doc in docs)


prompt = ChatPromptTemplate.from_template(
"""
You are an intelligent assisstant, for question-answering tasks,
Use the retrieved context given below to answer the question in a more streamlined way.
Answer in only 3 sentences

Context : {context}
Question : {question}

"""
)

In [33]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
rag_chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm_model
    | StrOutputParser()
)

## When using RunnablePassThrough - no need of using key value pair ##
rag_chain.invoke("What are ML fundamentals?")

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details..


'Machine Learning (ML) is a field of artificial intelligence that focuses on creating systems capable of learning from data without explicit programming. It enables computers to identify patterns and make predictions based on experience. ML algorithms are categorized into supervised, unsupervised, and reinforcement learning.'

In [35]:
retriever.get_relevant_documents("What are ML fundamentals?")

[Document(metadata={'source': 'data\\machine_learning_fundamentals.txt'}, page_content='Machine Learning Fundamentals\n\nMachine Learning (ML) is a branch of artificial intelligence focused on building systems that learn from data rather than being explicitly programmed. The core idea is to enable computers to identify patterns and make predictions based on experience. ML algorithms can be broadly divided into supervised, unsupervised, and reinforcement learning.\n\nIn supervised learning, models are trained on labeled data, meaning that each training example has a known output.'),
 Document(metadata={'source': 'data\\machine_learning_fundamentals.txt'}, page_content='Machine Learning Fundamentals\n\nMachine Learning (ML) is a branch of artificial intelligence focused on building systems that learn from data rather than being explicitly programmed. The core idea is to enable computers to identify patterns and make predictions based on experience. ML algorithms can be broadly divided in

### Adding new files to Vectore store

In [39]:
## text loaders##
text_loader = TextLoader('data//convolutional_neural_networks.txt',encoding='utf-8')
data = text_loader.load()
print(f"Data Loaded : {data[0].page_content[0:100]}")
print(f"Metadata: {data[0].metadata}")

Data Loaded : Convolutional Neural Networks (CNNs)

Convolutional Neural Networks, or CNNs, are a class of deep le
Metadata: {'source': 'data//convolutional_neural_networks.txt'}


In [42]:
### chunking ##
new_doc_chunks = text_splitter.split_documents(data)
print(f"New chunks created : {len(new_doc_chunks)}")
print(f"Chunk example:\n{new_doc_chunks[0].page_content}")

New chunks created : 5
Chunk example:
Convolutional Neural Networks (CNNs)

Convolutional Neural Networks, or CNNs, are a class of deep learning models specifically designed for processing structured grid-like data such as images and videos. They are the backbone of modern computer vision systems, powering tasks like image classification, object detection, facial recognition, and medical image analysis.

The core idea behind CNNs is the use of convolutional layers that apply filters (also called kernels) over the input data to


In [43]:
vector_store.add_documents(new_doc_chunks)

['d7ad86fa-98a5-401e-8775-62fefc38cf35',
 '311b7ff4-7ad9-4445-a80b-21642f38b8be',
 '15abc462-1195-460b-ad6f-f3e11b8d1a93',
 '75fe8943-f5ed-41a8-a10c-66a4de7bb8e3',
 '920e9a84-3bac-41fe-875b-81c7af9fe3dd']

In [46]:
query = "What is the difference between Convolutional Neural Network and Transformer architecture?"
print("Response:",rag_chain.invoke(query))
print("----------------------")
print("Relevant chunks retrieved:",retriever.get_relevant_documents(query))

Response: CNNs excel at spatial understanding but struggle with sequential or contextual relationships, while Transformers are designed to handle such relationships. CNNs use convolutional layers to extract spatial features, whereas Transformers employ self-attention mechanisms to capture dependencies between elements in a sequence. This difference in architecture makes CNNs suitable for vision tasks and Transformers for natural language processing.
----------------------
Relevant chunks retrieved: [Document(metadata={'source': 'data//convolutional_neural_networks.txt'}, page_content='(also called kernels) over the input data to extract spatial features. Each filter captures specific patterns, such as edges, textures, or shapes. As data passes through multiple convolutional layers, CNNs learn hierarchical representations — from low-level details to high-level semantic features.\n\nA typical CNN architecture includes several key components: convolutional layers for feature extraction, p

### Conversational memory + RAG ##

In [50]:
from langchain.chains import create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import MessagesPlaceholder

In [59]:
instruction1 = """Given a chat history and the latest user question \
        which might reference context in the chat history, formulate a standalone question \
        which can be understood without the chat history. Do NOT answer the question, \
        just reformulate it if needed and otherwise return it as is."""

instruction_prompt = ChatPromptTemplate.from_messages([
        ("system", instruction1),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")])
### create history aware retriever ##
history_aware_retr = create_history_aware_retriever(
    llm_model, retriever, instruction_prompt
)

'''
Working :
-It replaces the input and chat_history placeholders in the prompt with specified values, creating a new ready-to-use prompt that essentially says "Take this chat history and this last input, and rephrase the last input in a way that anyone can understand it without seeing the chat history".
-It sends the new prompt to the LLM and receives a rephrased input.
-It then sends the rephrased input to the vector store retriever and receives a list of documents relevant to this rephrased input.
'''
history_aware_retr

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000027F38C14830>, search_kwargs={'k': 3}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, T

In [61]:
system_prompt = '''You are an intelligent assisstant, for question-answering tasks,
Use the retrieved context given below to answer the question in a more streamlined way.
Answer in only 3 sentences

Context : {context}
'''
prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}")
    ]
)

questn_answer_chain = create_stuff_documents_chain(
    llm_model,prompt
)
conversational_ragChain = create_retrieval_chain(

    history_aware_retr,questn_answer_chain
)

In [62]:
questn_answer_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['chat_history', 'context', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[l

In [63]:
conversational_ragChain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x0000027F38C14830>, search_kwargs={'k': 3}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='to

In [65]:
chat_history = []
response1 = conversational_ragChain.invoke(
    {"chat_history":chat_history,
     "input":"What is a CNN architecture?"}
)
response1

{'chat_history': [],
 'input': 'What is a CNN architecture?',
 'context': [Document(metadata={'source': 'data//convolutional_neural_networks.txt'}, page_content='(also called kernels) over the input data to extract spatial features. Each filter captures specific patterns, such as edges, textures, or shapes. As data passes through multiple convolutional layers, CNNs learn hierarchical representations — from low-level details to high-level semantic features.\n\nA typical CNN architecture includes several key components: convolutional layers for feature extraction, pooling layers for dimensionality reduction, activation functions (like ReLU) for'),
  Document(metadata={'source': 'data//convolutional_neural_networks.txt'}, page_content='reduction, activation functions (like ReLU) for non-linearity, and fully connected layers for decision-making. Pooling operations, such as max pooling, reduce computation and help achieve translation invariance. Batch normalization and dropout are often use

In [67]:
chat_history.extend([HumanMessage(content=response1['input']),
                     AIMessage(content=response1['answer'])])
response2 = conversational_ragChain.invoke(
    {"chat_history":chat_history,
     "input":"Where can it be used?"}
)
response2

{'chat_history': [HumanMessage(content='What is a CNN architecture?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='CNN architecture uses convolutional layers to extract features, pooling layers for dimensionality reduction, and activation functions for non-linearity. It also uses fully connected layers for decision-making. Architectures like ResNet have increased depth and complexity while improving performance.', additional_kwargs={}, response_metadata={})],
 'input': 'Where can it be used?',
 'context': [Document(metadata={'source': 'data//convolutional_neural_networks.txt'}, page_content='(also called kernels) over the input data to extract spatial features. Each filter captures specific patterns, such as edges, textures, or shapes. As data passes through multiple convolutional layers, CNNs learn hierarchical representations — from low-level details to high-level semantic features.\n\nA typical CNN architecture includes several key components: convolutional laye