### How can we pick results that are not only relevant to the query but also different from each other? Remove Redundancy

In [7]:
from langchain_community.vectorstores import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_core.documents import Document

In [8]:
!pip install faiss-cpu



In [9]:
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
    Document(page_content="MMR helps you to get diverse results when doing similarity search")
]


In [10]:
from langchain_community.vectorstores import FAISS

In [11]:
embedding_model = OllamaEmbeddings(model='nomic-embed-text:latest')

In [12]:
vectorstore = FAISS.from_documents(
    documents=documents,
    embedding=embedding_model
)

In [13]:
retriever = vectorstore.as_retriever(
    search_type='mmr', # use mmr type
    search_kwargs={'k': 3, 'lambda_mult': 0.5} # top k results, lambda_mult = relevance-diversity balance [0, 1]
)

In [14]:
query = 'what is langchain?'
results = retriever.invoke(query)

In [15]:
for doc in results:
    print(doc)

page_content='LangChain helps developers build LLM applications easily.'
page_content='Embeddings convert text into high-dimensional vectors.'
page_content='Chroma is a vector database optimized for LLM-based search.'
