## Maximal Marignal Retriever(MMR)

In [2]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings


In [35]:
# Document load
loader=TextLoader("mmr-rag-dataset.txt",encoding="utf-8")
docs=loader.load()
print(len(docs))
# Split documents
splitter=RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=20)
chunks=splitter.split_documents(docs)
print(len(chunks))
#Embeding model
embedding_model=OpenAIEmbeddings()

# vectorstore Faiss
vectorstore=FAISS.from_documents(chunks,embedding_model)


1
19


In [45]:
# MMR Retriever

mmr_retriever=vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k":5}
)
mmr_retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000202D651F250>, search_type='mmr', search_kwargs={'k': 5})

In [46]:
# Prompt
from langchain_core.prompts import PromptTemplate
prompt=PromptTemplate.from_template(
    """
    Answer the given uery using only the given context:{context}
    if you have not sufficient information then just say i don't know.
    question:{input} 
        """
)
prompt

PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="\n    Answer the given uery using only the given context:{context}\n    if you have not sufficient information then just say i don't know.\n    question:{input} \n        ")

In [47]:
# initialize llm
from langchain.chat_models import init_chat_model
llm=init_chat_model("openai:gpt-3.5-turbo")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000202D66BA190>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000202D66BA3F0>, root_client=<openai.OpenAI object at 0x00000202D66CA5D0>, root_async_client=<openai.AsyncOpenAI object at 0x00000202D66CB020>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [48]:
# create document chain and retrieval chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
doc_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(mmr_retriever,doc_chain)

In [49]:
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000202D651F250>, search_type='mmr', search_kwargs={'k': 5}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="\n    Answer the given uery using only the given context:{context}\n    if you have not sufficient information then just say i don't know.\n    question:{input} \n        ")
            | ChatOpenAI(client=<openai.resources.chat.completions.comp

In [50]:
query = {"input": "How does LangChain support agents and memory?"}
response=rag_chain.invoke(query)
response

{'input': 'How does LangChain support agents and memory?',
 'context': [Document(id='b6fafd5e-5f6e-4077-8052-35b326e54b90', metadata={'source': 'mmr-rag-dataset.txt'}, page_content='LangChain supports conversational memory using ConversationBufferMemory and summarization memory with ConversationSummaryMemory.'),
  Document(id='2c45a5ac-9477-463f-acd0-adaf0d1ffe9d', metadata={'source': 'mmr-rag-dataset.txt'}, page_content='Agents in LangChain can use tools like calculators, search APIs, or custom functions based on the instructions they receive.'),
  Document(id='bd35d2d3-dc27-44ed-bc00-037288f9a3bc', metadata={'source': 'mmr-rag-dataset.txt'}, page_content='MMR (Maximal Marginal Relevance) retrieval in LangChain improves diversity by balancing relevance and redundancy.'),
  Document(id='7ac85a28-b003-41b2-aeec-e8ede3f25451', metadata={'source': 'mmr-rag-dataset.txt'}, page_content='LangChain agents can interact with external APIs and databases, enhancing the capabilities of LLM-powered

In [51]:
print(f"\n Answer:{response['answer']}\n")
print(f"Source documents are:\n")
for i,doc in enumerate(response['context']):
    print(f"\nDocument{i+1}:\n{doc.page_content}")


 Answer:LangChain supports agents with conversational memory using ConversationBufferMemory and summarization memory with ConversationSummaryMemory. Agents can also interact with external APIs and databases, use tools like calculators, search APIs, or custom functions, and utilize MMR retrieval for improved diversity. Additionally, Chroma is often used for embedding-based document storage and retrieval in LangChain.

Source documents are:


Document1:
LangChain supports conversational memory using ConversationBufferMemory and summarization memory with ConversationSummaryMemory.

Document2:
Agents in LangChain can use tools like calculators, search APIs, or custom functions based on the instructions they receive.

Document3:
MMR (Maximal Marginal Relevance) retrieval in LangChain improves diversity by balancing relevance and redundancy.

Document4:
LangChain agents can interact with external APIs and databases, enhancing the capabilities of LLM-powered applications.

Document5:
Chroma 

In [44]:
rag_chain.invoke({"input":"what is Machine learning"})

{'input': 'what is Machine learning',
 'context': [Document(id='2c5cedb6-ae8a-4a24-baa2-2482d52d571f', metadata={'source': 'mmr-rag-dataset.txt'}, page_content='LangChain is an open-source framework designed to simplify the development of applications using large language models (LLMs).'),
  Document(id='624e1bbc-aabd-43c8-a63a-0817fa4589ee', metadata={'source': 'mmr-rag-dataset.txt'}, page_content="The 'map-reduce' chain breaks up large documents, processes them separately, and then aggregates the outputs."),
  Document(id='81e63bb9-514a-4003-9134-c1fcb20445f0', metadata={'source': 'mmr-rag-dataset.txt'}, page_content='The framework supports integration with various vector databases like FAISS and Chroma for semantic retrieval.')],
 'answer': "I don't know."}