## Setup and Import Libraries

In [1]:
import os
from langchain.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

import warnings
warnings.filterwarnings("ignore")

In [2]:
load_dotenv()

True

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["HUGGINGFACE_API_KEY"] = os.getenv("HUGGINGFACE_API_KEY")

In [4]:
huggingface_embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
openai_embeddings=OpenAIEmbeddings()
llm = init_chat_model(model="groq:gemma2-9b-it")

## Document Loading and Text Splitter

In [5]:
loader = TextLoader(file_path="sample.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents=documents)
docs

[Document(metadata={'source': 'sample.txt'}, page_content='LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.'),
 Document(metadata={'source': 'sample.txt'}, page_content='LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables developers to experiment with different models and optimize performance for specific use cases like summarization, question answering, or translation.'),
 Document(metadata={'source': 'sample.txt'}, page_content='Retrieval-Augmented Generation (RAG) is a powerful technique where external knowledge is retrieved and passed into the prompt to ground LLM responses. LangChain makes it easy to implement RAG using vector databases like FAISS, Chroma, and Pinecone.\nBM25 is a traditional sparse retrieval method that s

## Build a Retriever

In [7]:
huggingface_vector_store = FAISS.from_documents(
    documents=docs,
    embedding=huggingface_embeddings
)

huggingface_retriever = huggingface_vector_store.as_retriever(
    search_kwargs={"k":6}
)

In [8]:
openai_vector_store = FAISS.from_documents(
    documents=docs,
    embedding=openai_embeddings
)

openai_retriever = openai_vector_store.as_retriever(
    search_kwargs={"k":6}
)

In [9]:
huggingface_retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001C3BAE95100>, search_kwargs={'k': 6})

In [10]:
openai_retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001C3B9DF50A0>, search_kwargs={'k': 6})

## Creating Prompt Template and Chain

In [11]:
# Prompt Template
prompt = PromptTemplate.from_template("""
You are a helpful assistant. Your task is to rank the following documents from most to least relevant to the user's question.

User Question: "{question}"

Documents:
{documents}

Instructions:
- Think about the relevance of each document to the user's question.
- Return a list of document indices in ranked order, starting from the most relevant.

Output format: comma-separated document indices (e.g., 2,1,3,0,...)
""")

In [12]:
query="How can i use langchain to build an application with memory and tools?"

retrieved_docs = huggingface_retriever.invoke(input=query)
retrieved_docs

[Document(id='07b6ec3c-1602-44da-8d3c-82c83855049d', metadata={'source': 'sample.txt'}, page_content='LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.\nMemory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.'),
 Document(id='d65dd18f-5b20-4c49-b541-5ab8ad3d0fc4', metadata={'source': 'sample.txt'}, page_content='LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.'),
 Document(id='082afbd9-ec48-4ffe-8ad5-9c1442b49e3c', metadata={'source': 'sample.txt'}, page_content='LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables 

In [13]:
chain = prompt| llm | StrOutputParser()

chain

PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Your task is to rank the following documents from most to least relevant to the user\'s question.\n\nUser Question: "{question}"\n\nDocuments:\n{documents}\n\nInstructions:\n- Think about the relevance of each document to the user\'s question.\n- Return a list of document indices in ranked order, starting from the most relevant.\n\nOutput format: comma-separated document indices (e.g., 2,1,3,0,...)\n')
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001C3BADCAC30>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001C3BACDFE90>, model_name='gemma2-9b-it', model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [14]:
doc_lines = [f"{i+1}. {doc.page_content}" for i, doc in enumerate(retrieved_docs)]

formatted_docs = "\n".join(doc_lines)

In [15]:
doc_lines

['1. LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.\nMemory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.',
 '2. LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.',
 '3. LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables developers to experiment with different models and optimize performance for specific use cases like summarization, question answering, or translation.',
 '4. FAISS is a popular library used for fast approximate nearest neighbor search in high-dimensional spaces. It supports both flat and comp

In [16]:
formatted_docs

'1. LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.\nMemory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.\n2. LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.\n3. LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables developers to experiment with different models and optimize performance for specific use cases like summarization, question answering, or translation.\n4. FAISS is a popular library used for fast approximate nearest neighbor search in high-dimensional spaces. It supports both flat and compressed ind

In [None]:
response = chain.invoke(
    {
        "question":query,
        "documents":formatted_docs,
    },
)

response

"2,1,4, \t \n\n\nHere's why:\n\n* **Document 2** directly addresses the user's need to build an application with memory and tools by highlighting LangChain's components for those functionalities.\n* **Document 1**  focuses on  memory and tools, explaining how LangChain  implements them.\n* **Document 4** discusses agents, which are chains using LLMs to decide tool usage, directly relevant to the user's question.\n\n\n\n\nLet me know if you have any other questions!\n"

## Parsing and Rerank

In [24]:
indices = [int(x.strip()) - 1 for x in response.split(",") if x.strip().isdigit()]

indices

[1, 0, 3]

In [25]:
retrieved_docs

[Document(id='07b6ec3c-1602-44da-8d3c-82c83855049d', metadata={'source': 'sample.txt'}, page_content='LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.\nMemory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.'),
 Document(id='d65dd18f-5b20-4c49-b541-5ab8ad3d0fc4', metadata={'source': 'sample.txt'}, page_content='LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.'),
 Document(id='082afbd9-ec48-4ffe-8ad5-9c1442b49e3c', metadata={'source': 'sample.txt'}, page_content='LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables 

In [26]:
reranked_docs = [retrieved_docs[index] for index in indices if 0 <= index < len(retrieved_docs)]

reranked_docs

[Document(id='d65dd18f-5b20-4c49-b541-5ab8ad3d0fc4', metadata={'source': 'sample.txt'}, page_content='LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.'),
 Document(id='07b6ec3c-1602-44da-8d3c-82c83855049d', metadata={'source': 'sample.txt'}, page_content='LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.\nMemory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.'),
 Document(id='9a023184-d012-42cc-8fa2-121bed836c82', metadata={'source': 'sample.txt'}, page_content='FAISS is a popular library used for fast approximate nearest neighbor search in high-dimensional spaces. It

In [27]:
print("\nðŸ“Š Final Reranked Results:")

for i, document in enumerate(reranked_docs, 1):
    print(f"\nRank {i}:\n{document.page_content}")


ðŸ“Š Final Reranked Results:

Rank 1:
LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.

Rank 2:
LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.
Memory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.

Rank 3:
FAISS is a popular library used for fast approximate nearest neighbor search in high-dimensional spaces. It supports both flat and compressed indexes, which makes it scalable for large document stores.
Agents in LangChain are chains that use LLMs to decide which tools to use and in what order. This makes them suitable for multi-step tasks like question answerin